Bug 1497580 - Save text/html as UTF-8 to the clipboard. r=karlt
☠☠ backed out by 96a0fd705c2a ☠ ☠
authorTom Schuster <evilpies@gmail.com>
Tue, 16 Oct 2018 21:08:59 +0000
changeset 499983 bb2acd046eae1fbedfe11348dc3f31530a645fd8
parent 499982 d4fe026dee75521ac39478591cb84d782eb0b189
child 499984 8fee7620570b941294f056eab7fa220cc531669c
push id1864
push userffxbld-merge
push dateMon, 03 Dec 2018 15:51:40 +0000
treeherdermozilla-release@f040763d99ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskarlt
bugs1497580
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1497580 - Save text/html as UTF-8 to the clipboard. r=karlt I verified that we can still copy from Firefox to an older version of Firefox without this patch. LibreOffice also still works. Talking to some GTK people on IRC they are also happy about UTF-8 instead of wrongly declared UCS2. Differential Revision: https://phabricator.services.mozilla.com/D8467
widget/gtk/nsClipboard.cpp
--- a/widget/gtk/nsClipboard.cpp
+++ b/widget/gtk/nsClipboard.cpp
@@ -29,22 +29,26 @@
 
 #include "imgIContainer.h"
 
 #include <gtk/gtk.h>
 #include <gtk/gtkx.h>
 
 #include "mozilla/Encoding.h"
 
-
 using namespace mozilla;
 
 // Idle timeout for receiving selection and property notify events (microsec)
 const int kClipboardTimeout = 500000;
 
+// We add this prefix to HTML markup, so that GetHTMLCharset can correctly
+// detect the HTML as UTF-8 encoded.
+static const char kHTMLMarkupPrefix[] =
+    R"(<meta http-equiv="content-type" content="text/html; charset=utf-8">)";
+
 // Callback when someone asks us for the data
 void
 clipboard_get_cb(GtkClipboard *aGtkClipboard,
                  GtkSelectionData *aSelectionData,
                  guint info,
                  gpointer user_data);
 
 // Callback when someone asks us to clear a clipboard
@@ -522,16 +526,41 @@ nsClipboard::SelectionGetEvent(GtkClipbo
         if (!pixbuf)
             return;
 
         gtk_selection_data_set_pixbuf(aSelectionData, pixbuf);
         g_object_unref(pixbuf);
         return;
     }
 
+    if (selectionTarget == gdk_atom_intern(kHTMLMime, FALSE)) {
+        rv = trans->GetTransferData(kHTMLMime, getter_AddRefs(item), &len);
+        if (!item || NS_FAILED(rv)) {
+            return;
+        }
+
+        nsCOMPtr<nsISupportsString> wideString;
+        wideString = do_QueryInterface(item);
+        if (!wideString) {
+            return;
+        }
+
+        nsAutoString ucs2string;
+        wideString->GetData(ucs2string);
+
+        nsAutoCString html;
+        // Add the prefix so the encoding is correctly detected.
+        html.AppendLiteral(kHTMLMarkupPrefix);
+        AppendUTF16toUTF8(ucs2string, html);
+
+        gtk_selection_data_set(aSelectionData, selectionTarget, 8,
+                               (const guchar*)html.get(), html.Length());
+        return;
+    }
+
     // Try to match up the selection data target to something our
     // transferable provides.
     gchar *target_name = gdk_atom_name(selectionTarget);
     if (!target_name)
         return;
 
     rv = trans->GetTransferData(target_name, getter_AddRefs(item), &len);
     // nothing found?
@@ -540,41 +569,20 @@ nsClipboard::SelectionGetEvent(GtkClipbo
         return;
     }
 
     void *primitive_data = nullptr;
     nsPrimitiveHelpers::CreateDataFromPrimitive(nsDependentCString(target_name),
                                                 item, &primitive_data, len);
 
     if (primitive_data) {
-        // Check to see if the selection data is text/html
-        if (selectionTarget == gdk_atom_intern (kHTMLMime, FALSE)) {
-            /*
-             * "text/html" can be encoded UCS2. It is recommended that
-             * documents transmitted as UCS2 always begin with a ZERO-WIDTH
-             * NON-BREAKING SPACE character (hexadecimal FEFF, also called
-             * Byte Order Mark (BOM)). Adding BOM can help other app to
-             * detect mozilla use UCS2 encoding when copy-paste.
-             */
-            guchar *buffer = (guchar *)
-                    g_malloc((len * sizeof(guchar)) + sizeof(char16_t));
-            if (!buffer)
-                return;
-            char16_t prefix = 0xFEFF;
-            memcpy(buffer, &prefix, sizeof(prefix));
-            memcpy(buffer + sizeof(prefix), primitive_data, len);
-            g_free((guchar *)primitive_data);
-            primitive_data = (guchar *)buffer;
-            len += sizeof(prefix);
-        }
-
         gtk_selection_data_set(aSelectionData, selectionTarget,
                                8, /* 8 bits in a unit */
                                (const guchar *)primitive_data, len);
-        g_free(primitive_data);
+        free(primitive_data);
     }
 
     g_free(target_name);
 
 }
 
 void
 nsClipboard::SelectionClearEvent(GtkClipboard *aGtkClipboard)
@@ -651,43 +659,50 @@ void ConvertHTMLtoUCS2(const char* data,
         auto encoding = Encoding::ForLabelNoReplacement(charset);
         if (!encoding) {
 #ifdef DEBUG_CLIPBOARD
             g_print("        get unicode decoder error\n");
 #endif
             outUnicodeLen = 0;
             return;
         }
+
+        auto dataSpan = MakeSpan(data, dataLength);
+        // Remove kHTMLMarkupPrefix again, it won't necessarily cause any
+        // issues, but might confuse other users.
+        const size_t prefixLen = ArrayLength(kHTMLMarkupPrefix) - 1;
+        if (dataSpan.Length() >= prefixLen &&
+            !strncmp(data, kHTMLMarkupPrefix, prefixLen)) {
+          dataSpan = dataSpan.From(prefixLen);
+        }
+
         auto decoder = encoding->NewDecoder();
-        CheckedInt<size_t> needed = decoder->MaxUTF16BufferLength(dataLength);
+        CheckedInt<size_t> needed =
+            decoder->MaxUTF16BufferLength(dataSpan.Length());
         if (!needed.isValid() || needed.value() > INT32_MAX) {
           outUnicodeLen = 0;
           return;
         }
 
         outUnicodeLen = 0;
         if (needed.value()) {
           *unicodeData = reinterpret_cast<char16_t*>(
             moz_xmalloc((needed.value() + 1) * sizeof(char16_t)));
           uint32_t result;
           size_t read;
           size_t written;
           bool hadErrors;
           Tie(result, read, written, hadErrors) =
-            decoder->DecodeToUTF16(AsBytes(MakeSpan(data, dataLength)),
+            decoder->DecodeToUTF16(AsBytes(dataSpan),
                                    MakeSpan(*unicodeData, needed.value()),
                                    true);
           MOZ_ASSERT(result == kInputEmpty);
-          MOZ_ASSERT(read == size_t(dataLength));
+          MOZ_ASSERT(read == size_t(dataSpan.Length()));
           MOZ_ASSERT(written <= needed.value());
           Unused << hadErrors;
-#ifdef DEBUG_CLIPBOARD
-          if (read != dataLength)
-            printf("didn't consume all the bytes\n");
-#endif
           outUnicodeLen = written;
           // null terminate.
           (*unicodeData)[outUnicodeLen] = '\0';
         } // if valid length
     }
 }
 
 /*