Bug 811911 - Allow UTF-8 output from the SpiderMonkey shell; r=Norbert
authorTerrence Cole <terrence@mozilla.com>
Fri, 16 Nov 2012 18:14:51 -0800
changeset 113940 f7f8011950c99e8af0848b310f4009b959a7b378
parent 113939 c4e31e87a072661019b41630c30e3642828435d0
child 113941 bfd5f652e5f068c5c04057102c18cde8f2f0026e
push id18464
push usertcole@mozilla.com
push dateWed, 21 Nov 2012 22:21:22 +0000
treeherdermozilla-inbound@f7f8011950c9 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersNorbert
bugs811911
milestone20.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 811911 - Allow UTF-8 output from the SpiderMonkey shell; r=Norbert
js/src/shell/js.cpp
--- a/js/src/shell/js.cpp
+++ b/js/src/shell/js.cpp
@@ -296,16 +296,153 @@ GetLine(FILE *file, const char * prompt)
         current = buffer + len;
     }
     if (len && !ferror(file))
         return buffer;
     free(buffer);
     return NULL;
 }
 
+static size_t
+GetDeflatedUTF8StringLength(JSContext *cx, const jschar *chars,
+                            size_t nchars)
+{
+    size_t nbytes;
+    const jschar *end;
+    unsigned c, c2;
+
+    nbytes = nchars;
+    for (end = chars + nchars; chars != end; chars++) {
+        c = *chars;
+        if (c < 0x80)
+            continue;
+        if (0xD800 <= c && c <= 0xDFFF) {
+            /* nbytes sets 1 length since this is surrogate pair. */
+            if (c >= 0xDC00 || (chars + 1) == end) {
+                nbytes += 2; /* Bad Surrogate */
+                continue;
+            }
+            c2 = chars[1];
+            if (c2 < 0xDC00 || c2 > 0xDFFF) {
+                nbytes += 2; /* Bad Surrogate */
+                continue;
+            }
+            c = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
+            nbytes--;
+            chars++;
+        }
+        c >>= 11;
+        nbytes++;
+        while (c) {
+            c >>= 5;
+            nbytes++;
+        }
+    }
+    return nbytes;
+}
+
+static bool
+PutUTF8ReplacementCharacter(char **dst, size_t *dstlenp) {
+    if (*dstlenp < 3)
+        return false;
+    *(*dst)++ = (char) 0xEF;
+    *(*dst)++ = (char) 0xBF;
+    *(*dst)++ = (char) 0xBD;
+    *dstlenp -= 3;
+    return true;
+}
+
+/*
+ * Write up to |*dstlenp| bytes into |dst|.  Writes the number of bytes used
+ * into |*dstlenp| on success.  Returns false on failure.
+ */
+static bool
+DeflateStringToUTF8Buffer(JSContext *cx, const jschar *src, size_t srclen,
+                          char *dst, size_t *dstlenp)
+{
+    size_t dstlen = *dstlenp;
+    size_t origDstlen = dstlen;
+
+    while (srclen) {
+        uint32_t v;
+        jschar c = *src++;
+        srclen--;
+        if (c >= 0xDC00 && c <= 0xDFFF) {
+            if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
+                goto bufferTooSmall;
+            continue;
+        } else if (c < 0xD800 || c > 0xDBFF) {
+            v = c;
+        } else {
+            if (srclen < 1) {
+                if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
+                    goto bufferTooSmall;
+                continue;
+            }
+            jschar c2 = *src;
+            if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
+                if (!PutUTF8ReplacementCharacter(&dst, &dstlen))
+                    goto bufferTooSmall;
+                continue;
+            }
+            src++;
+            srclen--;
+            v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
+        }
+        size_t utf8Len;
+        if (v < 0x0080) {
+            /* no encoding necessary - performance hack */
+            if (dstlen == 0)
+                goto bufferTooSmall;
+            *dst++ = (char) v;
+            utf8Len = 1;
+        } else {
+            uint8_t utf8buf[4];
+            utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);
+            if (utf8Len > dstlen)
+                goto bufferTooSmall;
+            for (size_t i = 0; i < utf8Len; i++)
+                *dst++ = (char) utf8buf[i];
+        }
+        dstlen -= utf8Len;
+    }
+    *dstlenp = (origDstlen - dstlen);
+    return true;
+
+bufferTooSmall:
+    *dstlenp = (origDstlen - dstlen);
+    JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
+    return false;
+}
+
+static char *
+JSStringToUTF8(JSContext *cx, JSString *str)
+{
+    JSLinearString *linear = str->ensureLinear(cx);
+    if (!linear)
+        return NULL;
+
+    const jschar *chars = linear->chars();
+    size_t length = linear->length();
+
+    size_t tgtlen = GetDeflatedUTF8StringLength(cx, chars, length);
+    char *utf8chars = cx->pod_malloc<char>(tgtlen + 1);
+    if (!utf8chars)
+        return NULL;
+
+    bool ok = DeflateStringToUTF8Buffer(cx, chars, length, utf8chars, &tgtlen);
+    if (!ok) {
+        JS_free(cx, utf8chars);
+        return NULL;
+    }
+
+    utf8chars[tgtlen] = 0;
+    return utf8chars;
+}
+
 /*
  * State to store as JSContext private.
  *
  * We declare such timestamp as volatile as they are updated in the operation
  * callback without taking any locks. Any possible race can only lead to more
  * frequent callback calls. This is safe as the callback does everything based
  * on timing.
  */
@@ -540,20 +677,22 @@ Process(JSContext *cx, JSObject *obj_, c
         JS_ASSERT_IF(!script, gGotError);
 
         if (script && !compileOnly) {
             ok = JS_ExecuteScript(cx, obj, script, &result);
             if (ok && !JSVAL_IS_VOID(result)) {
                 str = JS_ValueToSource(cx, result);
                 ok = !!str;
                 if (ok) {
-                    JSAutoByteString bytes(cx, str);
-                    ok = !!bytes;
-                    if (ok)
-                        fprintf(gOutFile, "%s\n", bytes.ptr());
+                    char *utf8chars = JSStringToUTF8(cx, str);
+                    ok = !!utf8chars;
+                    if (ok) {
+                        fprintf(gOutFile, "%s\n", utf8chars);
+                        JS_free(cx, utf8chars);
+                    }
                 }
             }
         }
         *buffer = '\0';
         free(uc_buffer);
     } while (!hitEOF && !gQuitting);
 
     free(buffer);
@@ -1159,17 +1298,17 @@ PutStr(JSContext *cx, unsigned argc, jsv
     JSString *str;
     char *bytes;
 
     if (argc != 0) {
         argv = JS_ARGV(cx, vp);
         str = JS_ValueToString(cx, argv[0]);
         if (!str)
             return false;
-        bytes = JS_EncodeString(cx, str);
+        bytes = JSStringToUTF8(cx, str);
         if (!bytes)
             return false;
         fputs(bytes, gOutFile);
         JS_free(cx, bytes);
         fflush(gOutFile);
     }
 
     JS_SET_RVAL(cx, vp, JSVAL_VOID);
@@ -1192,17 +1331,17 @@ PrintInternal(JSContext *cx, unsigned ar
     JSString *str;
     char *bytes;
 
     argv = JS_ARGV(cx, vp);
     for (i = 0; i < argc; i++) {
         str = JS_ValueToString(cx, argv[i]);
         if (!str)
             return false;
-        bytes = JS_EncodeString(cx, str);
+        bytes = JSStringToUTF8(cx, str);
         if (!bytes)
             return false;
         fprintf(file, "%s%s", i ? " " : "", bytes);
 #if JS_TRACE_LOGGING
         TraceLog(TraceLogging::defaultLogger(), bytes);
 #endif
         JS_free(cx, bytes);
     }
@@ -4196,17 +4335,17 @@ Exec(JSContext *cx, unsigned argc, jsval
     nargv[0] = name;
     jsval *argv = JS_ARGV(cx, vp);
     for (i = 0; i < nargc; i++) {
         str = (i == 0) ? fun->atom : JS_ValueToString(cx, argv[i-1]);
         if (!str) {
             ok = false;
             goto done;
         }
-        nargv[i] = JS_EncodeString(cx, str);
+        nargv[i] = JSStringToUTF8(cx, str);
         if (!nargv[i]) {
             ok = false;
             goto done;
         }
     }
     pid = fork();
     switch (pid) {
       case -1: