Bug 1034627 part 7 - Cleanup and expose UTF8 conversion functions. r=terrence
authorJan de Mooij <jdemooij@mozilla.com>
Fri, 11 Jul 2014 16:22:37 +0200
changeset 215535 52291e750b098ebacd926875e5d4977e9ce948b1
parent 215534 4dfa43eb6862a1d795cdbb7a9e7ffb2ce1dbed51
child 215536 6f10bdf152448668e5f7157673bbaee3bce2aae7
push id515
push userraliiev@mozilla.com
push dateMon, 06 Oct 2014 12:51:51 +0000
treeherdermozilla-release@267c7a481bef [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersterrence
bugs1034627
milestone33.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1034627 part 7 - Cleanup and expose UTF8 conversion functions. r=terrence
js/public/CharacterEncoding.h
js/src/vm/CharacterEncoding.cpp
--- a/js/public/CharacterEncoding.h
+++ b/js/public/CharacterEncoding.h
@@ -12,16 +12,18 @@
 
 #include "js/TypeDecls.h"
 #include "js/Utility.h"
 
 namespace js {
 struct ThreadSafeContext;
 }
 
+class JSFlatString;
+
 namespace JS {
 
 /*
  * By default, all C/C++ 1-byte-per-character strings passed into the JSAPI
  * are treated as ISO/IEC 8859-1, also known as Latin-1. That is, each
  * byte is treated as a 2-byte character, and there is no way to pass in a
  * string containing characters beyond U+00FF.
  */
@@ -197,14 +199,28 @@ UTF8CharsToNewTwoByteCharsZ(JSContext *c
 /*
  * The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
  * will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
  * input.
  */
 extern TwoByteCharsZ
 LossyUTF8CharsToNewTwoByteCharsZ(JSContext *cx, const UTF8Chars utf8, size_t *outlen);
 
+/*
+ * Returns the length of the char buffer required to encode |s| as UTF8.
+ * Does not include the null-terminator.
+ */
+JS_PUBLIC_API(size_t)
+GetDeflatedUTF8StringLength(JSFlatString *s);
+
+/*
+ * Encode |src| as UTF8. The caller must ensure |dst| has enough space.
+ * Does not write the null terminator.
+ */
+JS_PUBLIC_API(void)
+DeflateStringToUTF8Buffer(JSFlatString *src, mozilla::RangedPtr<char> dst);
+
 } // namespace JS
 
 inline void JS_free(JS::Latin1CharsZ &ptr) { js_free((void*)ptr.get()); }
 inline void JS_free(JS::UTF8CharsZ &ptr) { js_free((void*)ptr.get()); }
 
 #endif /* js_CharacterEncoding_h */
--- a/js/src/vm/CharacterEncoding.cpp
+++ b/js/src/vm/CharacterEncoding.cpp
@@ -60,112 +60,100 @@ GetDeflatedUTF8StringLength(const CharT 
         while (v) {
             v >>= 5;
             nbytes++;
         }
     }
     return nbytes;
 }
 
-static bool
-PutUTF8ReplacementCharacter(char **dst, size_t *dstlenp) {
-    if (*dstlenp < 3)
-        return false;
-    *(*dst)++ = (char) 0xEF;
-    *(*dst)++ = (char) 0xBF;
-    *(*dst)++ = (char) 0xBD;
-    *dstlenp -= 3;
-    return true;
+JS_PUBLIC_API(size_t)
+JS::GetDeflatedUTF8StringLength(JSFlatString *s)
+{
+    JS::AutoCheckCannotGC nogc;
+    return s->hasLatin1Chars()
+           ? ::GetDeflatedUTF8StringLength(s->latin1Chars(nogc), s->length())
+           : ::GetDeflatedUTF8StringLength(s->twoByteChars(nogc), s->length());
 }
 
-/*
- * Write up to |*dstlenp| bytes into |dst|.  Writes the number of bytes used
- * into |*dstlenp| on success.  Returns false on failure.
- */
+static void
+PutUTF8ReplacementCharacter(mozilla::RangedPtr<char> &dst)
+{
+    *dst++ = char(0xEF);
+    *dst++ = char(0xBF);
+    *dst++ = char(0xBD);
+}
+
 template <typename CharT>
-static bool
-DeflateStringToUTF8Buffer(js::ThreadSafeContext *cx, const CharT *src, size_t srclen,
-                          char *dst, size_t *dstlenp)
+static void
+DeflateStringToUTF8Buffer(const CharT *src, size_t srclen, mozilla::RangedPtr<char> dst)
 {
-    size_t dstlen = *dstlenp;
-    size_t origDstlen = dstlen;
-
     while (srclen) {
         uint32_t v;
         jschar c = *src++;
         srclen--;
         if (c >= 0xDC00 && c <= 0xDFFF) {
-            if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
-                goto bufferTooSmall;
+            PutUTF8ReplacementCharacter(dst);
             continue;
         } else if (c < 0xD800 || c > 0xDBFF) {
             v = c;
         } else {
             if (srclen < 1) {
-                if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
-                    goto bufferTooSmall;
+                PutUTF8ReplacementCharacter(dst);
                 continue;
             }
             jschar c2 = *src;
             if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
-                if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
-                    goto bufferTooSmall;
+                PutUTF8ReplacementCharacter(dst);
                 continue;
             }
             src++;
             srclen--;
             v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
         }
         size_t utf8Len;
         if (v < 0x0080) {
             /* no encoding necessary - performance hack */
-            if (dstlen == 0)
-                goto bufferTooSmall;
-            *dst++ = (char) v;
+            *dst++ = char(v);
             utf8Len = 1;
         } else {
             uint8_t utf8buf[4];
             utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);
-            if (utf8Len > dstlen)
-                goto bufferTooSmall;
             for (size_t i = 0; i < utf8Len; i++)
-                *dst++ = (char) utf8buf[i];
+                *dst++ = char(utf8buf[i]);
         }
-        dstlen -= utf8Len;
     }
-    *dstlenp = (origDstlen - dstlen);
-    return true;
+}
 
-bufferTooSmall:
-    *dstlenp = (origDstlen - dstlen);
-    if (cx->isJSContext()) {
-        js::gc::AutoSuppressGC suppress(cx->asJSContext());
-        JS_ReportErrorNumber(cx->asJSContext(), js_GetErrorMessage, nullptr,
-                             JSMSG_BUFFER_TOO_SMALL);
-    }
-    return false;
+JS_PUBLIC_API(void)
+JS::DeflateStringToUTF8Buffer(JSFlatString *src, mozilla::RangedPtr<char> dst)
+{
+    JS::AutoCheckCannotGC nogc;
+    return src->hasLatin1Chars()
+           ? ::DeflateStringToUTF8Buffer(src->latin1Chars(nogc), src->length(), dst)
+           : ::DeflateStringToUTF8Buffer(src->twoByteChars(nogc), src->length(), dst);
 }
 
 template <typename CharT>
 UTF8CharsZ
 JS::CharsToNewUTF8CharsZ(js::ThreadSafeContext *cx, const mozilla::Range<const CharT> chars)
 {
     JS_ASSERT(cx);
 
     /* Get required buffer size. */
     const CharT *str = chars.start().get();
-    size_t len = GetDeflatedUTF8StringLength(str, chars.length());
+    size_t len = ::GetDeflatedUTF8StringLength(str, chars.length());
 
     /* Allocate buffer. */
     char *utf8 = cx->pod_malloc<char>(len + 1);
     if (!utf8)
         return UTF8CharsZ();
 
     /* Encode to UTF8. */
-    JS_ALWAYS_TRUE(DeflateStringToUTF8Buffer(cx, str, chars.length(), utf8, &len));
+    ::DeflateStringToUTF8Buffer(str, chars.length(), mozilla::RangedPtr<char>(utf8, len));
     utf8[len] = '\0';
 
     return UTF8CharsZ(utf8, len);
 }
 
 template UTF8CharsZ
 JS::CharsToNewUTF8CharsZ(js::ThreadSafeContext *cx, const mozilla::Range<const Latin1Char> chars);