Bug 634444 - Don't show the whole line in error messages, to avoid memory spikes. r=jwalden.
authorNicholas Nethercote <nnethercote@mozilla.com>
Thu, 17 Feb 2011 19:02:48 -0800
changeset 101336 451ed5328312eef52bd4c7b762fbbed6a6dafc7c
parent 101335 5387220c0609850b533bba269ed449d4b8f3eeb6
child 101337 129be81434844046734ee1157c382f768ccc97a6
push id1316
push userakeybl@mozilla.com
push dateMon, 27 Aug 2012 22:37:00 +0000
treeherdermozilla-beta@db4b09302ee2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjwalden
bugs634444
milestone16.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 634444 - Don't show the whole line in error messages, to avoid memory spikes. r=jwalden.
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
js/src/vm/StringBuffer.h
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -29,16 +29,17 @@
 #include "jsnum.h"
 #include "jsopcode.h"
 #include "jsscript.h"
 
 #include "frontend/Parser.h"
 #include "frontend/TokenStream.h"
 #include "frontend/TreeContext.h"
 #include "vm/RegExpObject.h"
+#include "vm/StringBuffer.h"
 
 #include "jsscriptinlines.h"
 
 #if JS_HAS_XML_SUPPORT
 #include "jsxml.h"
 #endif
 
 using namespace js;
@@ -369,45 +370,40 @@ TokenStream::peekChars(int n, jschar *cp
         cp[i] = (jschar)c;
     }
     for (j = i - 1; j >= 0; j--)
         ungetCharIgnoreEOL(cp[j]);
     return i == n;
 }
 
 const jschar *
-TokenStream::TokenBuf::findEOL()
+TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max)
 {
-    const jschar *tmp = ptr;
-#ifdef DEBUG
-    /*
-     * This is the one exception to the "TokenBuf isn't accessed after
-     * poisoning" rule -- we may end up calling findEOL() in order to set up
-     * an error.
-     */
-    if (!tmp)
-        tmp = ptrWhenPoisoned;
-#endif
+    JS_ASSERT(base <= p && p <= limit);
 
+    size_t n = 0;
     while (true) {
-        if (tmp >= limit)
+        if (p >= limit)
+            break;
+        if (n >= max)
             break;
-        if (TokenBuf::isRawEOLChar(*tmp++))
+        if (TokenBuf::isRawEOLChar(*p++))
             break;
+        n++;
     }
-    return tmp;
+    return p;
 }
 
 bool
 TokenStream::reportCompileErrorNumberVA(ParseNode *pn, unsigned flags, unsigned errorNumber, va_list ap)
 {
     JSErrorReport report;
     char *message;
-    jschar *linechars;
-    char *linebytes;
+    jschar *windowChars;
+    char *windowBytes;
     bool warning;
     JSBool ok;
     const TokenPos *tp;
     unsigned i;
 
     if (JSREPORT_IS_STRICT(flags) && !cx->hasStrictOption())
         return true;
 
@@ -416,18 +412,18 @@ TokenStream::reportCompileErrorNumberVA(
         flags &= ~JSREPORT_WARNING;
         warning = false;
     }
 
     PodZero(&report);
     report.flags = flags;
     report.errorNumber = errorNumber;
     message = NULL;
-    linechars = NULL;
-    linebytes = NULL;
+    windowChars = NULL;
+    windowBytes = NULL;
 
     MUST_FLOW_THROUGH("out");
     ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
                                  errorNumber, &message, &report,
                                  !(flags & JSREPORT_UC), ap);
     if (!ok) {
         warning = false;
         goto out;
@@ -445,39 +441,63 @@ TokenStream::reportCompileErrorNumberVA(
      * the line that T starts on, which makes it hard to print some or all of
      * T's (starting) line for context.
      *
      * So we don't even try, leaving report.linebuf and friends zeroed.  This
      * means that any error involving a multi-line token (eg. an unterminated
      * multi-line string literal) won't have a context printed.
      */
     if (report.lineno == lineno) {
-        size_t linelength = userbuf.findEOL() - linebase;
+        const jschar *tokptr = linebase + tp->begin.index;
+
+        // We show only a portion (a "window") of the line around the erroneous
+        // token -- the first char in the token, plus |windowRadius| chars
+        // before it and |windowRadius - 1| chars after it.  This is because
+        // lines can be very long and printing the whole line is (a) not that
+        // helpful, and (b) can waste a lot of memory.  See bug 634444.
+        static const size_t windowRadius = 60;
 
-        linechars = (jschar *)cx->malloc_((linelength + 1) * sizeof(jschar));
-        if (!linechars) {
+        // Truncate at the front if necessary.
+        const jschar *windowBase = (linebase + windowRadius < tokptr)
+                                 ? tokptr - windowRadius
+                                 : linebase;
+        size_t nTrunc = windowBase - linebase;
+        uint32_t windowIndex = tp->begin.index - nTrunc;
+
+        // Find EOL, or truncate at the back if necessary.
+        const jschar *windowLimit = userbuf.findEOLMax(tokptr, windowRadius);
+        size_t windowLength = windowLimit - windowBase;
+        JS_ASSERT(windowLength <= windowRadius * 2);
+
+        // Create the windowed strings.
+        StringBuffer windowBuf(cx);
+        if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0)) {
             warning = false;
             goto out;
         }
-        PodCopy(linechars, linebase, linelength);
-        linechars[linelength] = 0;
-        linebytes = DeflateString(cx, linechars, linelength);
-        if (!linebytes) {
+        windowChars = windowBuf.extractWellSized();
+        if (!windowChars) {
+            warning = false;
+            goto out;
+        }
+        windowBytes = DeflateString(cx, windowChars, windowLength);
+        if (!windowBytes) {
             warning = false;
             goto out;
         }
 
-        /* Unicode and char versions of the offending source line, without final \n */
-        report.linebuf = linebytes;
-        report.uclinebuf = linechars;
+        // Unicode and char versions of the window into the offending source
+        // line, without final \n.
+        report.linebuf = windowBytes;
+        report.uclinebuf = windowChars;
 
-        /* The lineno check above means we should only see single-line tokens here. */
+        // The lineno check above means we should only see single-line tokens here.
         JS_ASSERT(tp->begin.lineno == tp->end.lineno);
-        report.tokenptr = report.linebuf + tp->begin.index;
-        report.uctokenptr = report.uclinebuf + tp->begin.index;
+        report.tokenptr = report.linebuf + windowIndex;
+        report.uctokenptr = report.uclinebuf + windowIndex;
     }
 
     /*
      * If there's a runtime exception type associated with this error
      * number, set that as the pending exception.  For errors occuring at
      * compile time, this is very likely to be a JSEXN_SYNTAXERR.
      *
      * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
@@ -498,20 +518,20 @@ TokenStream::reportCompileErrorNumberVA(
             reportError = hook(cx, message, &report, cx->runtime->debugHooks.debugErrorHookData);
 
         /* Report the error */
         if (reportError && cx->errorReporter)
             cx->errorReporter(cx, message, &report);
     }
 
   out:
-    if (linebytes)
-        cx->free_(linebytes);
-    if (linechars)
-        cx->free_(linechars);
+    if (windowBytes)
+        cx->free_(windowBytes);
+    if (windowChars)
+        cx->free_(windowChars);
     if (message)
         cx->free_(message);
     if (report.ucmessage)
         cx->free_((void *)report.ucmessage);
 
     if (report.messageArgs) {
         if (!(flags & JSREPORT_UC)) {
             i = 0;
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -651,17 +651,17 @@ class TokenStream
      * gets raw chars, basically.  TokenStreams functions are layered on top
      * and do some extra stuff like converting all EOL sequences to '\n',
      * tracking the line number, and setting the TSF_EOF flag.  (The "raw" in
      * "raw chars" refers to the lack of EOL sequence normalization.)
      */
     class TokenBuf {
       public:
         TokenBuf(const jschar *buf, size_t length)
-          : base(buf), limit(buf + length), ptr(buf), ptrWhenPoisoned(NULL) { }
+          : base(buf), limit(buf + length), ptr(buf) { }
 
         bool hasRawChars() const {
             return ptr < limit;
         }
 
         bool atStart() const {
             return ptr == base;
         }
@@ -703,38 +703,34 @@ class TokenStream
 
         /* Use this with caution! */
         void setAddressOfNextRawChar(const jschar *a) {
             JS_ASSERT(a);
             ptr = a;
         }
 
 #ifdef DEBUG
-        /*
-         * Poison the TokenBuf so it cannot be accessed again.  There's one
-         * exception to this rule -- see findEOL() -- which is why
-         * ptrWhenPoisoned exists.
-         */
+        /* Poison the TokenBuf so it cannot be accessed again. */
         void poison() {
-            ptrWhenPoisoned = ptr;
             ptr = NULL;
         }
 #endif
 
         static bool isRawEOLChar(int32_t c) {
             return (c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR);
         }
 
-        const jschar *findEOL();
+        // Finds the next EOL, but stops once 'max' jschars have been scanned
+        // (*including* the starting jschar).
+        const jschar *findEOLMax(const jschar *p, size_t max);
 
       private:
         const jschar *base;             /* base of buffer */
         const jschar *limit;            /* limit for quick bounds check */
         const jschar *ptr;              /* next char to get */
-        const jschar *ptrWhenPoisoned;  /* |ptr| when poison() was called */
     };
 
     TokenKind getTokenInternal();     /* doesn't check for pushback or error flag. */
 
     int32_t getChar();
     int32_t getCharIgnoreEOL();
     void ungetChar(int32_t c);
     void ungetCharIgnoreEOL(int32_t c);
--- a/js/src/vm/StringBuffer.h
+++ b/js/src/vm/StringBuffer.h
@@ -30,17 +30,16 @@ namespace js {
 class StringBuffer
 {
     /* cb's buffer is taken by the new string so use ContextAllocPolicy. */
     typedef Vector<jschar, 32, ContextAllocPolicy> CharBuffer;
 
     CharBuffer cb;
 
     JSContext *context() const { return cb.allocPolicy().context(); }
-    jschar *extractWellSized();
 
     StringBuffer(const StringBuffer &other) MOZ_DELETE;
     void operator=(const StringBuffer &other) MOZ_DELETE;
 
   public:
     explicit StringBuffer(JSContext *cx) : cb(cx) { }
 
     inline bool reserve(size_t len) { return cb.reserve(len); }
@@ -82,16 +81,23 @@ class StringBuffer
     /*
      * Creates a string from the characters in this buffer, then (regardless
      * whether string creation succeeded or failed) empties the buffer.
      */
     JSFixedString *finishString();
 
     /* Identical to finishString() except that an atom is created. */
     JSAtom *finishAtom();
+
+    /*
+     * Creates a raw string from the characters in this buffer.  The string is
+     * exactly the characters in this buffer: it is *not* null-terminated
+     * unless the last appended character was |(jschar)0|.
+     */
+    jschar *extractWellSized();
 };
 
 inline bool
 StringBuffer::append(JSLinearString *str)
 {
     JS::Anchor<JSString *> anch(str);
     return cb.append(str->chars(), str->length());
 }