Bug 1372230 part 1 - Move the code behing mozilla::Encoding::ForName() from encoding_rs to encoding_glue. r=emk
authorHenri Sivonen <hsivonen@hsivonen.fi>
Fri, 25 Aug 2017 12:37:28 +0300
changeset 429203 e4fe008d0265d4075f7b230706095cd463699d7b
parent 429202 d17398b0dcfcc79cda31971d792bdf15046023ed
child 429204 72887ce9eaba5f8c0831fbc2b6026690dc8f060d
push id1567
push userjlorenzo@mozilla.com
push dateThu, 02 Nov 2017 12:36:05 +0000
treeherdermozilla-release@e512c14a0406 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemk
bugs1372230
milestone57.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1372230 part 1 - Move the code behing mozilla::Encoding::ForName() from encoding_rs to encoding_glue. r=emk MozReview-Commit-ID: 6cFRuyWoPhR
intl/Encoding.h
intl/encoding_glue/src/lib.rs
intl/gtest/TestEncoding.cpp
--- a/intl/Encoding.h
+++ b/intl/Encoding.h
@@ -30,16 +30,19 @@ class Encoder;
 #define ENCODING_RS_NOT_NULL_CONST_ENCODING_PTR mozilla::NotNull<const mozilla::Encoding*>
 #define ENCODING_RS_ENCODER mozilla::Encoder
 #define ENCODING_RS_DECODER mozilla::Decoder
 
 #include "encoding_rs.h"
 
 extern "C" {
 
+mozilla::Encoding const*
+mozilla_encoding_for_name(uint8_t const* name, size_t name_len);
+
 nsresult
 mozilla_encoding_decode_to_nsstring(mozilla::Encoding const** encoding,
                                     uint8_t const* src,
                                     size_t src_len,
                                     nsAString* dst);
 
 nsresult
 mozilla_encoding_decode_to_nsstring_with_bom_removal(
@@ -255,17 +258,17 @@ public:
    * The motivating use case for this method is interoperability with
    * legacy Gecko code that represents encodings as name string instead of
    * type-safe `Encoding` objects. Using this method for other purposes is
    * most likely the wrong thing to do.
    */
   static inline NotNull<const mozilla::Encoding*> ForName(
     Span<const char> aName)
   {
-    return WrapNotNull(encoding_for_name(
+    return WrapNotNull(mozilla_encoding_for_name(
       reinterpret_cast<const uint8_t*>(aName.Elements()), aName.Length()));
   }
 
   /**
    * Writes the name of this encoding into `aName`.
    *
    * This name is appropriate to return as-is from the DOM
    * `document.characterSet` property.
--- a/intl/encoding_glue/src/lib.rs
+++ b/intl/encoding_glue/src/lib.rs
@@ -11,16 +11,17 @@
 // "top-level directory" in the above notice refers to
 // third_party/rust/encoding_rs/.
 
 extern crate encoding_rs;
 extern crate nsstring;
 extern crate nserror;
 
 use std::slice;
+use std::cmp::Ordering;
 use encoding_rs::*;
 use nsstring::*;
 use nserror::*;
 
 // nsStringBuffer's internal bookkeeping takes 8 bytes from
 // the allocation. Plus one for termination.
 const NS_CSTRING_OVERHEAD: usize = 9;
 
@@ -47,16 +48,123 @@ macro_rules! try_dst_set_len {
     unsafe {
         if $dst.fallible_set_length(needed).is_err() {
             return $ret;
         }
     }
      )
 }
 
+static ENCODINGS_SORTED_BY_NAME: [&'static Encoding; 39] = [&GBK_INIT,
+                                                            &BIG5_INIT,
+                                                            &IBM866_INIT,
+                                                            &EUC_JP_INIT,
+                                                            &KOI8_R_INIT,
+                                                            &EUC_KR_INIT,
+                                                            &KOI8_U_INIT,
+                                                            &GB18030_INIT,
+                                                            &UTF_16BE_INIT,
+                                                            &UTF_16LE_INIT,
+                                                            &SHIFT_JIS_INIT,
+                                                            &MACINTOSH_INIT,
+                                                            &ISO_8859_2_INIT,
+                                                            &ISO_8859_3_INIT,
+                                                            &ISO_8859_4_INIT,
+                                                            &ISO_8859_5_INIT,
+                                                            &ISO_8859_6_INIT,
+                                                            &ISO_8859_7_INIT,
+                                                            &ISO_8859_8_INIT,
+                                                            &ISO_8859_10_INIT,
+                                                            &ISO_8859_13_INIT,
+                                                            &ISO_8859_14_INIT,
+                                                            &WINDOWS_874_INIT,
+                                                            &ISO_8859_15_INIT,
+                                                            &ISO_8859_16_INIT,
+                                                            &ISO_2022_JP_INIT,
+                                                            &REPLACEMENT_INIT,
+                                                            &WINDOWS_1250_INIT,
+                                                            &WINDOWS_1251_INIT,
+                                                            &WINDOWS_1252_INIT,
+                                                            &WINDOWS_1253_INIT,
+                                                            &WINDOWS_1254_INIT,
+                                                            &WINDOWS_1255_INIT,
+                                                            &WINDOWS_1256_INIT,
+                                                            &WINDOWS_1257_INIT,
+                                                            &WINDOWS_1258_INIT,
+                                                            &ISO_8859_8_I_INIT,
+                                                            &X_MAC_CYRILLIC_INIT,
+                                                            &X_USER_DEFINED_INIT];
+
+/// If the argument matches exactly (case-sensitively; no whitespace
+/// removal performed) the name of an encoding, returns
+/// `const Encoding*` representing that encoding. Otherwise panics.
+///
+/// The motivating use case for this function is interoperability with
+/// legacy Gecko code that represents encodings as name string instead of
+/// type-safe `Encoding` objects. Using this function for other purposes is
+/// most likely the wrong thing to do.
+///
+/// `name` must be non-`NULL` even if `name_len` is zero. When `name_len`
+/// is zero, it is OK for `name` to be something non-dereferencable,
+/// such as `0x1`. This is required due to Rust's optimization for slices
+/// within `Option`.
+///
+/// # Panics
+///
+/// Panics if the argument is not the name of an encoding.
+///
+/// # Undefined behavior
+///
+/// UB ensues if `name` and `name_len` don't designate a valid memory block
+/// of if `name` is `NULL`.
+#[no_mangle]
+pub unsafe extern "C" fn mozilla_encoding_for_name(name: *const u8, name_len: usize) -> *const Encoding {
+    let name_slice = ::std::slice::from_raw_parts(name, name_len);
+    encoding_for_name(name_slice)
+}
+
+/// If the argument matches exactly (case-sensitively; no whitespace
+/// removal performed) the name of an encoding, returns
+/// `&'static Encoding` representing that encoding. Otherwise panics.
+///
+/// The motivating use case for this method is interoperability with
+/// legacy Gecko code that represents encodings as name string instead of
+/// type-safe `Encoding` objects. Using this method for other purposes is
+/// most likely the wrong thing to do.
+///
+/// Available via the C wrapper.
+///
+/// # Panics
+///
+/// Panics if the argument is not the name of an encoding.
+#[cfg_attr(feature = "cargo-clippy", allow(match_wild_err_arm))]
+pub fn encoding_for_name(name: &[u8]) -> &'static Encoding {
+    // The length of `"UTF-8"` is unique, so it's easy to check the most
+    // common case first.
+    if name.len() == 5 {
+        assert_eq!(name, b"UTF-8", "Bogus encoding name");
+        return UTF_8;
+    }
+    match ENCODINGS_SORTED_BY_NAME.binary_search_by(
+        |probe| {
+            let bytes = probe.name().as_bytes();
+            let c = bytes.len().cmp(&name.len());
+            if c != Ordering::Equal {
+                return c;
+            }
+            let probe_iter = bytes.iter().rev();
+            let candidate_iter = name.iter().rev();
+            probe_iter.cmp(candidate_iter)
+        }
+    ) {
+        Ok(i) => ENCODINGS_SORTED_BY_NAME[i],
+        Err(_) => panic!("Bogus encoding name"),
+    }
+}
+
 #[no_mangle]
 pub unsafe extern "C" fn mozilla_encoding_decode_to_nsstring(encoding: *mut *const Encoding,
                                                              src: *const u8,
                                                              src_len: usize,
                                                              dst: *mut nsAString)
                                                              -> nsresult {
     let (rv, enc) = decode_to_nsstring(&**encoding, slice::from_raw_parts(src, src_len), &mut *dst);
     *encoding = enc as *const Encoding;
--- a/intl/gtest/TestEncoding.cpp
+++ b/intl/gtest/TestEncoding.cpp
@@ -22,22 +22,113 @@ ENCODING_TEST(ForLabel)
   nsAutoCString label("  uTf-8   ");
   ASSERT_EQ(Encoding::ForLabel(label), UTF_8_ENCODING);
   label.AssignLiteral("   cseucpkdfmTjapanese  ");
   ASSERT_EQ(Encoding::ForLabel(label), EUC_JP_ENCODING);
 }
 
 ENCODING_TEST(ForName)
 {
-  nsAutoCString encoding("UTF-8");
+  nsAutoCString encoding("GBK");
+  ASSERT_EQ(Encoding::ForName(encoding), GBK_ENCODING);
+  encoding.AssignLiteral("Big5");
+  ASSERT_EQ(Encoding::ForName(encoding), BIG5_ENCODING);
+  encoding.AssignLiteral("UTF-8");
   ASSERT_EQ(Encoding::ForName(encoding), UTF_8_ENCODING);
+  encoding.AssignLiteral("IBM866");
+  ASSERT_EQ(Encoding::ForName(encoding), IBM866_ENCODING);
   encoding.AssignLiteral("EUC-JP");
   ASSERT_EQ(Encoding::ForName(encoding), EUC_JP_ENCODING);
+  encoding.AssignLiteral("KOI8-R");
+  ASSERT_EQ(Encoding::ForName(encoding), KOI8_R_ENCODING);
+  encoding.AssignLiteral("EUC-KR");
+  ASSERT_EQ(Encoding::ForName(encoding), EUC_KR_ENCODING);
+  encoding.AssignLiteral("KOI8-U");
+  ASSERT_EQ(Encoding::ForName(encoding), KOI8_U_ENCODING);
+  encoding.AssignLiteral("gb18030");
+  ASSERT_EQ(Encoding::ForName(encoding), GB18030_ENCODING);
+  encoding.AssignLiteral("UTF-16BE");
+  ASSERT_EQ(Encoding::ForName(encoding), UTF_16BE_ENCODING);
+  encoding.AssignLiteral("UTF-16LE");
+  ASSERT_EQ(Encoding::ForName(encoding), UTF_16LE_ENCODING);
+  encoding.AssignLiteral("Shift_JIS");
+  ASSERT_EQ(Encoding::ForName(encoding), SHIFT_JIS_ENCODING);
+  encoding.AssignLiteral("macintosh");
+  ASSERT_EQ(Encoding::ForName(encoding), MACINTOSH_ENCODING);
+  encoding.AssignLiteral("ISO-8859-2");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_2_ENCODING);
+  encoding.AssignLiteral("ISO-8859-3");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_3_ENCODING);
+  encoding.AssignLiteral("ISO-8859-4");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_4_ENCODING);
+  encoding.AssignLiteral("ISO-8859-5");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_5_ENCODING);
+  encoding.AssignLiteral("ISO-8859-6");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_6_ENCODING);
+  encoding.AssignLiteral("ISO-8859-7");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_7_ENCODING);
+  encoding.AssignLiteral("ISO-8859-8");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_8_ENCODING);
+  encoding.AssignLiteral("ISO-8859-10");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_10_ENCODING);
+  encoding.AssignLiteral("ISO-8859-13");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_13_ENCODING);
+  encoding.AssignLiteral("ISO-8859-14");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_14_ENCODING);
+  encoding.AssignLiteral("windows-874");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_874_ENCODING);
+  encoding.AssignLiteral("ISO-8859-15");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_15_ENCODING);
+  encoding.AssignLiteral("ISO-8859-16");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_16_ENCODING);
+  encoding.AssignLiteral("ISO-2022-JP");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_2022_JP_ENCODING);
+  encoding.AssignLiteral("replacement");
+  ASSERT_EQ(Encoding::ForName(encoding), REPLACEMENT_ENCODING);
+  encoding.AssignLiteral("windows-1250");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1250_ENCODING);
+  encoding.AssignLiteral("windows-1251");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1251_ENCODING);
+  encoding.AssignLiteral("windows-1252");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1252_ENCODING);
+  encoding.AssignLiteral("windows-1253");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1253_ENCODING);
+  encoding.AssignLiteral("windows-1254");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1254_ENCODING);
+  encoding.AssignLiteral("windows-1255");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1255_ENCODING);
+  encoding.AssignLiteral("windows-1256");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1256_ENCODING);
+  encoding.AssignLiteral("windows-1257");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1257_ENCODING);
+  encoding.AssignLiteral("windows-1258");
+  ASSERT_EQ(Encoding::ForName(encoding), WINDOWS_1258_ENCODING);
+  encoding.AssignLiteral("ISO-8859-8-I");
+  ASSERT_EQ(Encoding::ForName(encoding), ISO_8859_8_I_ENCODING);
+  encoding.AssignLiteral("x-mac-cyrillic");
+  ASSERT_EQ(Encoding::ForName(encoding), X_MAC_CYRILLIC_ENCODING);
+  encoding.AssignLiteral("x-user-defined");
+  ASSERT_EQ(Encoding::ForName(encoding), X_USER_DEFINED_ENCODING);
 }
 
+// Test disabled pending bug 1393711
+#if 0
+ENCODING_TEST(BogusName)
+{
+  nsAutoCString encoding("utf-8");
+  ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+  encoding.AssignLiteral("ISO-8859-1");
+  ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+  encoding.AssignLiteral("gbk");
+  ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+  encoding.AssignLiteral(" UTF-8 ");
+  ASSERT_DEATH_IF_SUPPORTED(Encoding::ForName(encoding), "Bogus encoding name");
+}
+#endif
+
 ENCODING_TEST(ForBOM)
 {
   nsAutoCString data("\xEF\xBB\xBF\x61");
   const Encoding* encoding;
   size_t bomLength;
   Tie(encoding, bomLength) = Encoding::ForBOM(data);
   ASSERT_EQ(encoding, UTF_8_ENCODING);
   ASSERT_EQ(bomLength, 3U);