Bug 1034627 part 7 - Cleanup and expose UTF8 conversion functions. r=terrence
☠☠ backed out by 6904f207728c ☠ ☠
authorJan de Mooij <jdemooij@mozilla.com>
Fri, 11 Jul 2014 16:22:37 +0200
changeset 193595 110fbc2ebc1a74d5aa424c911aebd792550745ce
parent 193594 8f50bc05d3373561b0d0e6a27a063361c0b37970
child 193596 bcba40acc0ac6dd9fb287f72a36653bdb166734b
push idunknown
push userunknown
push dateunknown
reviewersterrence
bugs1034627
milestone33.0a1
Bug 1034627 part 7 - Cleanup and expose UTF8 conversion functions. r=terrence
js/public/CharacterEncoding.h
js/src/vm/CharacterEncoding.cpp
--- a/js/public/CharacterEncoding.h
+++ b/js/public/CharacterEncoding.h
@@ -197,14 +197,28 @@ UTF8CharsToNewTwoByteCharsZ(JSContext *c
 /*
  * The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
  * will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
  * input.
  */
 extern TwoByteCharsZ
 LossyUTF8CharsToNewTwoByteCharsZ(JSContext *cx, const UTF8Chars utf8, size_t *outlen);
 
+/*
+ * Returns the length of the char buffer required to encode |s| as UTF8.
+ * Does not include the null-terminator.
+ */
+JS_PUBLIC_API(size_t)
+GetDeflatedUTF8StringLength(JSFlatString *s);
+
+/*
+ * Encode |src| as UTF8. The caller must ensure |dst| has enough space.
+ * Does not write the null terminator.
+ */
+JS_PUBLIC_API(void)
+DeflateStringToUTF8Buffer(JSFlatString *src, mozilla::RangedPtr<char> dst);
+
 } // namespace JS
 
 inline void JS_free(JS::Latin1CharsZ &ptr) { js_free((void*)ptr.get()); }
 inline void JS_free(JS::UTF8CharsZ &ptr) { js_free((void*)ptr.get()); }
 
 #endif /* js_CharacterEncoding_h */
--- a/js/src/vm/CharacterEncoding.cpp
+++ b/js/src/vm/CharacterEncoding.cpp
@@ -60,112 +60,100 @@ GetDeflatedUTF8StringLength(const CharT 
         while (v) {
             v >>= 5;
             nbytes++;
         }
     }
     return nbytes;
 }
 
-static bool
-PutUTF8ReplacementCharacter(char **dst, size_t *dstlenp) {
-    if (*dstlenp < 3)
-        return false;
-    *(*dst)++ = (char) 0xEF;
-    *(*dst)++ = (char) 0xBF;
-    *(*dst)++ = (char) 0xBD;
-    *dstlenp -= 3;
-    return true;
+JS_PUBLIC_API(size_t)
+JS::GetDeflatedUTF8StringLength(JSFlatString *s)
+{
+    JS::AutoCheckCannotGC nogc;
+    return s->hasLatin1Chars()
+           ? ::GetDeflatedUTF8StringLength(s->latin1Chars(nogc), s->length())
+           : ::GetDeflatedUTF8StringLength(s->twoByteChars(nogc), s->length());
 }
 
-/*
- * Write up to |*dstlenp| bytes into |dst|.  Writes the number of bytes used
- * into |*dstlenp| on success.  Returns false on failure.
- */
+static void
+PutUTF8ReplacementCharacter(mozilla::RangedPtr<char> &dst)
+{
+    *dst++ = char(0xEF);
+    *dst++ = char(0xBF);
+    *dst++ = char(0xBD);
+}
+
 template <typename CharT>
-static bool
-DeflateStringToUTF8Buffer(js::ThreadSafeContext *cx, const CharT *src, size_t srclen,
-                          char *dst, size_t *dstlenp)
+static void
+DeflateStringToUTF8Buffer(const CharT *src, size_t srclen, mozilla::RangedPtr<char> dst)
 {
-    size_t dstlen = *dstlenp;
-    size_t origDstlen = dstlen;
-
     while (srclen) {
         uint32_t v;
         jschar c = *src++;
         srclen--;
         if (c >= 0xDC00 && c <= 0xDFFF) {
-            if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
-                goto bufferTooSmall;
+            PutUTF8ReplacementCharacter(dst);
             continue;
         } else if (c < 0xD800 || c > 0xDBFF) {
             v = c;
         } else {
             if (srclen < 1) {
-                if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
-                    goto bufferTooSmall;
+                PutUTF8ReplacementCharacter(dst);
                 continue;
             }
             jschar c2 = *src;
             if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
-                if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
-                    goto bufferTooSmall;
+                PutUTF8ReplacementCharacter(dst);
                 continue;
             }
             src++;
             srclen--;
             v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
         }
         size_t utf8Len;
         if (v < 0x0080) {
             /* no encoding necessary - performance hack */
-            if (dstlen == 0)
-                goto bufferTooSmall;
-            *dst++ = (char) v;
+            *dst++ = char(v);
             utf8Len = 1;
         } else {
             uint8_t utf8buf[4];
             utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);
-            if (utf8Len > dstlen)
-                goto bufferTooSmall;
             for (size_t i = 0; i < utf8Len; i++)
-                *dst++ = (char) utf8buf[i];
+                *dst++ = char(utf8buf[i]);
         }
-        dstlen -= utf8Len;
     }
-    *dstlenp = (origDstlen - dstlen);
-    return true;
+}
 
-bufferTooSmall:
-    *dstlenp = (origDstlen - dstlen);
-    if (cx->isJSContext()) {
-        js::gc::AutoSuppressGC suppress(cx->asJSContext());
-        JS_ReportErrorNumber(cx->asJSContext(), js_GetErrorMessage, nullptr,
-                             JSMSG_BUFFER_TOO_SMALL);
-    }
-    return false;
+JS_PUBLIC_API(void)
+JS::DeflateStringToUTF8Buffer(JSFlatString *src, mozilla::RangedPtr<char> dst)
+{
+    JS::AutoCheckCannotGC nogc;
+    return src->hasLatin1Chars()
+           ? ::DeflateStringToUTF8Buffer(src->latin1Chars(nogc), src->length(), dst)
+           : ::DeflateStringToUTF8Buffer(src->twoByteChars(nogc), src->length(), dst);
 }
 
 template <typename CharT>
 UTF8CharsZ
 JS::CharsToNewUTF8CharsZ(js::ThreadSafeContext *cx, const mozilla::Range<const CharT> chars)
 {
     JS_ASSERT(cx);
 
     /* Get required buffer size. */
     const CharT *str = chars.start().get();
-    size_t len = GetDeflatedUTF8StringLength(str, chars.length());
+    size_t len = ::GetDeflatedUTF8StringLength(str, chars.length());
 
     /* Allocate buffer. */
     char *utf8 = cx->pod_malloc<char>(len + 1);
     if (!utf8)
         return UTF8CharsZ();
 
     /* Encode to UTF8. */
-    JS_ALWAYS_TRUE(DeflateStringToUTF8Buffer(cx, str, chars.length(), utf8, &len));
+    ::DeflateStringToUTF8Buffer(str, chars.length(), mozilla::RangedPtr<char>(utf8, len));
     utf8[len] = '\0';
 
     return UTF8CharsZ(utf8, len);
 }
 
 template UTF8CharsZ
 JS::CharsToNewUTF8CharsZ(js::ThreadSafeContext *cx, const mozilla::Range<const Latin1Char> chars);