Bug 489089 - JSON.parse is way slower than it needs to be (r=igor/sayrer).
authorBrendan Eich <brendan@mozilla.org>
Tue, 21 Apr 2009 12:09:27 -0700
changeset 27634 89d209a23a3abae5bf33481517b856300f7d5c61
parent 27578 e0e3d87da9add6b0ef434d489778346a4a9ed36b
child 27635 2ee1b85ffe90af7e0d1f35983e859758ca2cad2e
push id6664
push userrsayre@mozilla.com
push dateWed, 22 Apr 2009 17:56:08 +0000
treeherdermozilla-central@ddd5fcb96d72 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersigor, sayrer
bugs489089
milestone1.9.2a1pre
Bug 489089 - JSON.parse is way slower than it needs to be (r=igor/sayrer).
js/src/json.cpp
js/src/json.h
js/src/jsscan.cpp
js/src/jsscan.h
--- a/js/src/json.cpp
+++ b/js/src/json.cpp
@@ -1,9 +1,12 @@
-/* ***** BEGIN LICENSE BLOCK *****
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sw=4 et tw=99:
+ *
+ * ***** BEGIN LICENSE BLOCK *****
  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  *
  * The contents of this file are subject to the Mozilla Public License Version
  * 1.1 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  * http://www.mozilla.org/MPL/
  *
  * Software distributed under the License is distributed on an "AS IS" basis,
@@ -30,17 +33,17 @@
  * use your version of this file under the terms of the MPL, indicate your
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
-
+#include <string.h>     /* memset */
 #include "jsapi.h"
 #include "jsarena.h"
 #include "jsarray.h"
 #include "jsatom.h"
 #include "jsbool.h"
 #include "jscntxt.h"
 #include "jsdtoa.h"
 #include "jsinterp.h"
@@ -512,95 +515,90 @@ Revive(JSContext *cx, jsval reviver, jsv
     if (!OBJ_DEFINE_PROPERTY(cx, obj, ATOM_TO_JSID(cx->runtime->atomState.emptyAtom),
                              *vp, NULL, NULL, JSPROP_ENUMERATE, NULL)) {
         return JS_FALSE;
     }
 
     return Walk(cx, ATOM_TO_JSID(cx->runtime->atomState.emptyAtom), obj, reviver, vp);
 }
 
-
+#define JSON_INITIAL_BUFSIZE    256
 
 JSONParser *
 js_BeginJSONParse(JSContext *cx, jsval *rootVal)
 {
     if (!cx)
         return NULL;
 
     JSObject *arr = js_NewArrayObject(cx, 0, NULL);
     if (!arr)
         return NULL;
 
     JSONParser *jp = (JSONParser*) JS_malloc(cx, sizeof(JSONParser));
     if (!jp)
         return NULL;
-    jp->buffer = NULL;
+    memset(jp, 0, sizeof *jp);
 
     jp->objectStack = arr;
     if (!js_AddRoot(cx, &jp->objectStack, "JSON parse stack"))
         goto bad;
 
-    jp->hexChar = 0;
-    jp->numHex = 0;
     jp->statep = jp->stateStack;
     *jp->statep = JSON_PARSE_STATE_INIT;
     jp->rootVal = rootVal;
 
-    jp->objectKey = (JSStringBuffer*) JS_malloc(cx, sizeof(JSStringBuffer));
-    if (!jp->objectKey)
+    js_InitStringBuffer(&jp->objectKey);
+    js_InitStringBuffer(&jp->buffer);
+
+    if (!jp->buffer.grow(&jp->buffer, JSON_INITIAL_BUFSIZE)) {
+        JS_ReportOutOfMemory(cx);
         goto bad;
-    js_InitStringBuffer(jp->objectKey);
-    
-    jp->buffer = (JSStringBuffer*) JS_malloc(cx, sizeof(JSStringBuffer));
-    if (!jp->buffer)
-        goto bad;
-    js_InitStringBuffer(jp->buffer);
+    }
+    return jp;
 
-    return jp;
 bad:
-    JS_free(cx, jp->buffer);
-    JS_free(cx, jp);
+    js_FinishJSONParse(cx, jp, JSVAL_NULL);
     return NULL;
 }
 
 JSBool
 js_FinishJSONParse(JSContext *cx, JSONParser *jp, jsval reviver)
 {
     if (!jp)
         return JS_TRUE;
 
+    JSBool oom = JS_FALSE;
+
     // Check for unprocessed primitives at the root. This doesn't happen for
     // strings because a closing quote triggers value processing.
     if ((jp->statep - jp->stateStack) == 1) {
         if (*jp->statep == JSON_PARSE_STATE_KEYWORD) {
-            if (HandleData(cx, jp, JSON_DATA_KEYWORD)) {
+            oom = HandleData(cx, jp, JSON_DATA_KEYWORD);
+            if (!oom)
                 PopState(cx, jp);
-            }
         } else if (*jp->statep == JSON_PARSE_STATE_NUMBER) {
-            if (HandleData(cx, jp, JSON_DATA_NUMBER)) {
+            oom = HandleData(cx, jp, JSON_DATA_NUMBER);
+            if (!oom)
                 PopState(cx, jp);
-            }
         }
     }
 
-    if (jp->objectKey)
-        js_FinishStringBuffer(jp->objectKey);
-    JS_free(cx, jp->objectKey);
+    js_FinishStringBuffer(&jp->objectKey);
+    js_FinishStringBuffer(&jp->buffer);
 
-    if (jp->buffer)
-        js_FinishStringBuffer(jp->buffer);
-    JS_free(cx, jp->buffer);
-
-    if (!js_RemoveRoot(cx->runtime, &jp->objectStack))
-        return JS_FALSE;
+    /* This internal API is infallible, in spite of its JSBool return type. */
+    js_RemoveRoot(cx->runtime, &jp->objectStack);
 
     JSBool ok = *jp->statep == JSON_PARSE_STATE_FINISHED;
     jsval *vp = jp->rootVal;
     JS_free(cx, jp);
 
+    if (oom)
+        return JS_FALSE;
+
     if (!ok) {
         JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_JSON_BAD_PARSE);
         return JS_FALSE;
     }
 
     if (!JSVAL_IS_PRIMITIVE(reviver) && js_IsCallable(JSVAL_TO_OBJECT(reviver), cx))
         ok = Revive(cx, reviver, vp);
 
@@ -654,21 +652,20 @@ PushValue(JSContext *cx, JSONParser *jp,
         if (ok) {
             jsid index;
             if (!js_IndexToId(cx, len, &index))
                 return JS_FALSE;
             ok = OBJ_DEFINE_PROPERTY(cx, parent, index, value,
                                      NULL, NULL, JSPROP_ENUMERATE, NULL);
         }
     } else {
-        ok = JS_DefineUCProperty(cx, parent, jp->objectKey->base,
-                                 STRING_BUFFER_OFFSET(jp->objectKey), value,
+        ok = JS_DefineUCProperty(cx, parent, jp->objectKey.base,
+                                 STRING_BUFFER_OFFSET(&jp->objectKey), value,
                                  NULL, NULL, JSPROP_ENUMERATE);
-        js_FinishStringBuffer(jp->objectKey);
-        js_InitStringBuffer(jp->objectKey);
+        js_RewindStringBuffer(&jp->objectKey);
     }
 
     return ok;
 }
 
 static JSBool
 PushObject(JSContext *cx, JSONParser *jp, JSObject *obj)
 {
@@ -827,50 +824,48 @@ HandleKeyword(JSContext *cx, JSONParser 
     }
 
     return PushPrimitive(cx, jp, keyword);
 }
 
 static JSBool
 HandleData(JSContext *cx, JSONParser *jp, JSONDataType type)
 {
-  JSBool ok = JS_FALSE;
+    JSBool ok;
+
+    switch (type) {
+      case JSON_DATA_STRING:
+        ok = HandleString(cx, jp, jp->buffer.base, STRING_BUFFER_OFFSET(&jp->buffer));
+        break;
 
-  if (!STRING_BUFFER_OK(jp->buffer)) {
-      // what to call this error?
-      JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_JSON_BAD_PARSE);
-      return JS_FALSE;
-  }
-
-  switch (type) {
-    case JSON_DATA_STRING:
-      ok = HandleString(cx, jp, jp->buffer->base, STRING_BUFFER_OFFSET(jp->buffer));
-      break;
+      case JSON_DATA_KEYSTRING:
+        js_AppendUCString(&jp->objectKey, jp->buffer.base, STRING_BUFFER_OFFSET(&jp->buffer));
+        ok = STRING_BUFFER_OK(&jp->objectKey);
+        if (!ok)
+            JS_ReportOutOfMemory(cx);
+        break;
 
-    case JSON_DATA_KEYSTRING:
-      js_AppendUCString(jp->objectKey, jp->buffer->base, STRING_BUFFER_OFFSET(jp->buffer));
-      ok = STRING_BUFFER_OK(jp->objectKey);
-      break;
-
-    case JSON_DATA_NUMBER:
-      ok = HandleNumber(cx, jp, jp->buffer->base, STRING_BUFFER_OFFSET(jp->buffer));
-      break;
+      case JSON_DATA_NUMBER:
+        ok = HandleNumber(cx, jp, jp->buffer.base, STRING_BUFFER_OFFSET(&jp->buffer));
+        break;
 
-    case JSON_DATA_KEYWORD:
-      ok = HandleKeyword(cx, jp, jp->buffer->base, STRING_BUFFER_OFFSET(jp->buffer));
-      break;
+      default:
+        JS_ASSERT(type == JSON_DATA_KEYWORD);
+        ok = HandleKeyword(cx, jp, jp->buffer.base, STRING_BUFFER_OFFSET(&jp->buffer));
+        break;
+    }
 
-    default:
-      JS_NOT_REACHED("Should have a JSON data type");
-  }
-
-  js_FinishStringBuffer(jp->buffer);
-  js_InitStringBuffer(jp->buffer);
-
-  return ok;
+    if (ok) {
+        ok = STRING_BUFFER_OK(&jp->buffer);
+        if (ok)
+            js_RewindStringBuffer(&jp->buffer);
+        else
+            JS_ReportOutOfMemory(cx);
+    }
+    return ok;
 }
 
 JSBool
 js_ConsumeJSONText(JSContext *cx, JSONParser *jp, const jschar *data, uint32 len)
 {
     uint32 i;
 
     if (*jp->statep == JSON_PARSE_STATE_INIT) {
@@ -905,23 +900,23 @@ js_ConsumeJSONText(JSContext *cx, JSONPa
 
             if (c == '"') {
                 *jp->statep = JSON_PARSE_STATE_STRING;
                 break;
             }
 
             if (IsNumChar(c)) {
                 *jp->statep = JSON_PARSE_STATE_NUMBER;
-                js_AppendChar(jp->buffer, c);
+                js_FastAppendChar(&jp->buffer, c);
                 break;
             }
 
             if (JS7_ISLET(c)) {
                 *jp->statep = JSON_PARSE_STATE_KEYWORD;
-                js_AppendChar(jp->buffer, c);
+                js_FastAppendChar(&jp->buffer, c);
                 break;
             }
 
           // fall through in case the value is an object or array
           case JSON_PARSE_STATE_OBJECT_VALUE:
             if (c == '{') {
                 *jp->statep = JSON_PARSE_STATE_OBJECT;
                 if (!OpenObject(cx, jp) || !PushState(cx, jp, JSON_PARSE_STATE_OBJECT_PAIR))
@@ -997,17 +992,17 @@ js_ConsumeJSONText(JSContext *cx, JSONPa
                 } else {
                     jdt = JSON_DATA_STRING;
                 }
                 if (!HandleData(cx, jp, jdt))
                     return JS_FALSE;
             } else if (c == '\\') {
                 *jp->statep = JSON_PARSE_STATE_STRING_ESCAPE;
             } else {
-                js_AppendChar(jp->buffer, c);
+                js_FastAppendChar(&jp->buffer, c);
             }
             break;
 
           case JSON_PARSE_STATE_STRING_ESCAPE:
             switch (c) {
               case '"':
               case '\\':
               case '/':
@@ -1024,57 +1019,57 @@ js_ConsumeJSONText(JSContext *cx, JSONPa
                     *jp->statep = JSON_PARSE_STATE_STRING_HEX;
                     continue;
                 } else {
                     JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_JSON_BAD_PARSE);
                     return JS_FALSE;
                 }
             }
 
-            js_AppendChar(jp->buffer, c);
+            js_FastAppendChar(&jp->buffer, c);
             *jp->statep = JSON_PARSE_STATE_STRING;
             break;
 
           case JSON_PARSE_STATE_STRING_HEX:
             if (('0' <= c) && (c <= '9')) {
                 jp->hexChar = (jp->hexChar << 4) | (c - '0');
             } else if (('a' <= c) && (c <= 'f')) {
                 jp->hexChar = (jp->hexChar << 4) | (c - 'a' + 0x0a);
             } else if (('A' <= c) && (c <= 'F')) {
                 jp->hexChar = (jp->hexChar << 4) | (c - 'A' + 0x0a);
             } else {
                 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_JSON_BAD_PARSE);
                 return JS_FALSE;
             }
             
             if (++(jp->numHex) == 4) {
-                js_AppendChar(jp->buffer, jp->hexChar);
+                js_FastAppendChar(&jp->buffer, jp->hexChar);
                 jp->hexChar = 0;
                 jp->numHex = 0;
                 *jp->statep = JSON_PARSE_STATE_STRING;
             }
             break;
 
           case JSON_PARSE_STATE_KEYWORD:
             if (JS7_ISLET(c)) {
-                js_AppendChar(jp->buffer, c);
+                js_FastAppendChar(&jp->buffer, c);
             } else {
                 // this character isn't part of the keyword, process it again
                 i--;
                 if (!PopState(cx, jp))
                     return JS_FALSE;
             
                 if (!HandleData(cx, jp, JSON_DATA_KEYWORD))
                     return JS_FALSE;
             }
             break;
 
           case JSON_PARSE_STATE_NUMBER:
             if (IsNumChar(c)) {
-                js_AppendChar(jp->buffer, c);
+                js_FastAppendChar(&jp->buffer, c);
             } else {
                 // this character isn't part of the number, process it again
                 i--;
                 if (!PopState(cx, jp))
                     return JS_FALSE;
                 if (!HandleData(cx, jp, JSON_DATA_NUMBER))
                     return JS_FALSE;
             }
@@ -1085,19 +1080,25 @@ js_ConsumeJSONText(JSContext *cx, JSONPa
                 // extra input
                 JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_JSON_BAD_PARSE);
                 return JS_FALSE;
             }
             break;
 
           default:
             JS_NOT_REACHED("Invalid JSON parser state");
-      }
+        }
+
+        if (!STRING_BUFFER_OK(&jp->buffer)) {
+            JS_ReportOutOfMemory(cx);
+            return JS_FALSE;
+        }
     }
 
+    *jp->buffer.ptr = 0;
     return JS_TRUE;
 }
 
 #if JS_HAS_TOSOURCE
 static JSBool
 json_toSource(JSContext *cx, uintN argc, jsval *vp)
 {
     *vp = ATOM_KEY(CLASS_ATOM(cx, JSON));
--- a/js/src/json.h
+++ b/js/src/json.h
@@ -35,16 +35,17 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
 #ifndef json_h___
 #define json_h___
 /*
  * JS JSON functions.
  */
+#include "jsscan.h"
 
 #define JSON_MAX_DEPTH  2048
 #define JSON_PARSER_BUFSIZE 1024
 
 JS_BEGIN_EXTERN_C
 
 extern JSClass js_JSONClass;
 
@@ -83,18 +84,18 @@ enum JSONDataType {
 struct JSONParser {
     /* Used while handling \uNNNN in strings */
     jschar hexChar;
     uint8 numHex;
 
     JSONParserState *statep;
     JSONParserState stateStack[JSON_MAX_DEPTH];
     jsval *rootVal;
-    JSStringBuffer *objectKey;
-    JSStringBuffer *buffer;
+    JSStringBuffer objectKey;
+    JSStringBuffer buffer;
     JSObject *objectStack;
 };
 
 extern JSONParser *
 js_BeginJSONParse(JSContext *cx, jsval *rootVal);
 
 extern JSBool
 js_ConsumeJSONText(JSContext *cx, JSONParser *jp, const jschar *data, uint32 len);
--- a/js/src/jsscan.cpp
+++ b/js/src/jsscan.cpp
@@ -49,16 +49,17 @@
 #include <memory.h>
 #endif
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
 #include "jstypes.h"
 #include "jsstdint.h"
 #include "jsarena.h" /* Added by JSIFY */
+#include "jsbit.h"
 #include "jsutil.h" /* Added by JSIFY */
 #include "jsdtoa.h"
 #include "jsprf.h"
 #include "jsapi.h"
 #include "jsatom.h"
 #include "jscntxt.h"
 #include "jsversion.h"
 #include "jsemit.h"
@@ -678,16 +679,23 @@ static JSBool
 GrowStringBuffer(JSStringBuffer *sb, size_t newlength)
 {
     ptrdiff_t offset;
     jschar *bp;
 
     offset = sb->ptr - sb->base;
     JS_ASSERT(offset >= 0);
     newlength += offset + 1;
+
+    /* Grow by powers of two until 16MB, then grow by that chunk size. */
+    const size_t CHUNK_SIZE = JS_BIT(24);
+    if (newlength < CHUNK_SIZE)
+        newlength = JS_BIT(JS_CeilingLog2(newlength));
+    else
+        newlength = JS_ROUNDUP(newlength, CHUNK_SIZE);
     if ((size_t)offset < newlength && newlength < ~(size_t)0 / sizeof(jschar))
         bp = (jschar *) realloc(sb->base, newlength * sizeof(jschar));
     else
         bp = NULL;
     if (!bp) {
         free(sb->base);
         sb->base = STRING_BUFFER_ERROR_BASE;
         return JS_FALSE;
@@ -716,29 +724,16 @@ js_InitStringBuffer(JSStringBuffer *sb)
 }
 
 void
 js_FinishStringBuffer(JSStringBuffer *sb)
 {
     sb->free(sb);
 }
 
-#define ENSURE_STRING_BUFFER(sb,n) \
-    ((sb)->ptr + (n) <= (sb)->limit || sb->grow(sb, n))
-
-static void
-FastAppendChar(JSStringBuffer *sb, jschar c)
-{
-    if (!STRING_BUFFER_OK(sb))
-        return;
-    if (!ENSURE_STRING_BUFFER(sb, 1))
-        return;
-    *sb->ptr++ = c;
-}
-
 void
 js_AppendChar(JSStringBuffer *sb, jschar c)
 {
     jschar *bp;
 
     if (!STRING_BUFFER_OK(sb))
         return;
     if (!ENSURE_STRING_BUFFER(sb, 1))
@@ -812,26 +807,26 @@ GetXMLEntity(JSContext *cx, JSTokenStrea
     int32 c, d;
     JSBool ispair;
     jschar *bp, digit;
     char *bytes;
     JSErrNum msg;
 
     /* Put the entity, including the '&' already scanned, in ts->tokenbuf. */
     offset = ts->tokenbuf.ptr - ts->tokenbuf.base;
-    FastAppendChar(&ts->tokenbuf, '&');
+    js_FastAppendChar(&ts->tokenbuf, '&');
     if (!STRING_BUFFER_OK(&ts->tokenbuf))
         return JS_FALSE;
     while ((c = GetChar(ts)) != ';') {
         if (c == EOF || c == '\n') {
             js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
                                         JSMSG_END_OF_XML_ENTITY);
             return JS_FALSE;
         }
-        FastAppendChar(&ts->tokenbuf, (jschar) c);
+        js_FastAppendChar(&ts->tokenbuf, (jschar) c);
         if (!STRING_BUFFER_OK(&ts->tokenbuf))
             return JS_FALSE;
     }
 
     /* Let length be the number of jschars after the '&', including the ';'. */
     length = (ts->tokenbuf.ptr - ts->tokenbuf.base) - offset;
     bp = ts->tokenbuf.base + offset;
     c = d = 0;
@@ -1023,17 +1018,21 @@ js_GetToken(JSContext *cx, JSTokenStream
 #define TOKENBUF_LENGTH()   (ts->tokenbuf.ptr - ts->tokenbuf.base)
 #define TOKENBUF_OK()       STRING_BUFFER_OK(&ts->tokenbuf)
 #define TOKENBUF_TO_ATOM()  (TOKENBUF_OK()                                    \
                              ? js_AtomizeChars(cx,                            \
                                                TOKENBUF_BASE(),               \
                                                TOKENBUF_LENGTH(),             \
                                                0)                             \
                              : NULL)
-#define ADD_TO_TOKENBUF(c)  FastAppendChar(&ts->tokenbuf, (jschar) (c))
+#define ADD_TO_TOKENBUF(c)  JS_BEGIN_MACRO                                    \
+                                js_FastAppendChar(&ts->tokenbuf, jschar(c));  \
+                                if (!TOKENBUF_OK())                           \
+                                    goto error;                               \
+                            JS_END_MACRO
 
 /* The following 4 macros should only be used when TOKENBUF_OK() is true. */
 #define TOKENBUF_BASE()     (ts->tokenbuf.base)
 #define TOKENBUF_END()      (ts->tokenbuf.ptr)
 #define TOKENBUF_CHAR(i)    (ts->tokenbuf.base[i])
 #define TRIM_TOKENBUF(i)    (ts->tokenbuf.ptr = ts->tokenbuf.base + i)
 #define NUL_TERM_TOKENBUF() (*ts->tokenbuf.ptr = 0)
 
--- a/js/src/jsscan.h
+++ b/js/src/jsscan.h
@@ -171,16 +171,49 @@ struct JSStringBuffer {
 #define STRING_BUFFER_OFFSET(sb)        ((sb)->ptr -(sb)->base)
 
 extern void
 js_InitStringBuffer(JSStringBuffer *sb);
 
 extern void
 js_FinishStringBuffer(JSStringBuffer *sb);
 
+static inline void
+js_RewindStringBuffer(JSStringBuffer *sb)
+{
+    JS_ASSERT(STRING_BUFFER_OK(sb));
+    sb->ptr = sb->base;
+}
+
+#define ENSURE_STRING_BUFFER(sb,n) \
+    ((sb)->ptr + (n) <= (sb)->limit || sb->grow(sb, n))
+
+/*
+ * NB: callers are obligated to test STRING_BUFFER_OK(sb) after this returns,
+ * before calling it again -- but not necessarily before calling other sb ops
+ * declared in this header file.
+ *
+ * Thus multiple calls, to ops other than this one that check STRING_BUFFER_OK
+ * and suppress updating sb if true, can consolidate the final STRING_BUFFER_OK
+ * test that conditions a JS_ReportOutOfMemory (if necessary -- the grow hook
+ * can report OOM early, obviating the need for the callers to report).
+ *
+ * This style of error checking is not obviously better, and it could be worse
+ * in efficiency, than the propagated failure return code style used elsewhere
+ * in the engine. I view it as a failed experiment. /be
+ */
+static inline void
+js_FastAppendChar(JSStringBuffer *sb, jschar c)
+{
+    JS_ASSERT(STRING_BUFFER_OK(sb));
+    if (!ENSURE_STRING_BUFFER(sb, 1))
+        return;
+    *sb->ptr++ = c;
+}
+
 extern void
 js_AppendChar(JSStringBuffer *sb, jschar c);
 
 extern void
 js_RepeatChar(JSStringBuffer *sb, jschar c, uintN count);
 
 extern void
 js_AppendCString(JSStringBuffer *sb, const char *asciiz);