Bug 777190 - Don't compress files with huge strings; reenable source compression. r=jorendorff
authorBenjamin Peterson <benjamin@python.org>
Mon, 30 Jul 2012 19:23:46 -0700
changeset 100923 c3ddad2296ad8d5667d016e3e054bd75687248e0
parent 100922 c607d1fd6d35603e1e67fb12ce492ffe1a6aa5bd
child 100924 a57c83db51a72b1df7362283ff8d8fc0eb5f9eba
push id1
push userroot
push dateMon, 20 Oct 2014 17:29:22 +0000
reviewersjorendorff
bugs777190
milestone17.0a1
Bug 777190 - Don't compress files with huge strings; reenable source compression. r=jorendorff
js/src/frontend/BytecodeCompiler.cpp
js/src/frontend/Parser.cpp
js/src/frontend/Parser.h
js/src/jsscript.cpp
js/src/jsscript.h
js/src/jsutil.cpp
js/src/jsutil.h
--- a/js/src/frontend/BytecodeCompiler.cpp
+++ b/js/src/frontend/BytecodeCompiler.cpp
@@ -75,28 +75,29 @@ frontend::CompileScript(JSContext *cx, H
      * and non-zero static level requires callerFrame.
      */
     JS_ASSERT_IF(callerFrame, options.compileAndGo);
     JS_ASSERT_IF(staticLevel != 0, callerFrame);
 
     if (!CheckLength(cx, length))
         return NULL;
     AutoAttachToRuntime attacher(cx->runtime);
-    SourceCompressionToken sct(cx->runtime);
+    SourceCompressionToken sct(cx);
     ScriptSource *ss = NULL;
     if (!cx->hasRunOption(JSOPTION_ONLY_CNG_SOURCE) || options.compileAndGo) {
         ss = ScriptSource::createFromSource(cx, chars, length, false, &sct);
         if (!ss)
             return NULL;
         attacher.ss = ss;
     }
 
     Parser parser(cx, options, chars, length, /* foldConstants = */ true);
     if (!parser.init())
         return NULL;
+    parser.sct = &sct;
 
     SharedContext sc(cx, scopeChain, /* fun = */ NULL, /* funbox = */ NULL, StrictModeFromContext(cx));
 
     TreeContext tc(&parser, &sc, staticLevel, /* bodyid = */ 0);
     if (!tc.init())
         return NULL;
 
     bool savedCallerFun = options.compileAndGo && callerFrame && callerFrame->isFunctionFrame();
@@ -240,26 +241,27 @@ frontend::CompileScript(JSContext *cx, H
 // handler attribute in an HTML <INPUT> tag, or in a Function() constructor.
 bool
 frontend::CompileFunctionBody(JSContext *cx, HandleFunction fun, CompileOptions options,
                               Bindings *bindings, const jschar *chars, size_t length)
 {
     if (!CheckLength(cx, length))
         return false;
     AutoAttachToRuntime attacher(cx->runtime);
-    SourceCompressionToken sct(cx->runtime);
+    SourceCompressionToken sct(cx);
     ScriptSource *ss = ScriptSource::createFromSource(cx, chars, length, true, &sct);
     if (!ss)
         return NULL;
     attacher.ss = ss;
 
     options.setCompileAndGo(false);
     Parser parser(cx, options, chars, length, /* foldConstants = */ true);
     if (!parser.init())
         return false;
+    parser.sct = &sct;
 
     JS_ASSERT(fun);
     SharedContext funsc(cx, /* scopeChain = */ NULL, fun, /* funbox = */ NULL,
                         StrictModeFromContext(cx));
     funsc.bindings.transfer(bindings);
     fun->setArgCount(funsc.bindings.numArgs());
 
     unsigned staticLevel = 0;
--- a/js/src/frontend/Parser.cpp
+++ b/js/src/frontend/Parser.cpp
@@ -109,16 +109,17 @@ Parser::Parser(JSContext *cx, const Comp
   : AutoGCRooter(cx, PARSER),
     context(cx),
     strictModeGetter(thisForCtor()),
     tokenStream(cx, options, chars, length, &strictModeGetter),
     tempPoolMark(NULL),
     allocator(cx),
     traceListHead(NULL),
     tc(NULL),
+    sct(NULL),
     keepAtoms(cx->runtime),
     foldConstants(foldConstants),
     compileAndGo(options.compileAndGo)
 {
     cx->activeCompilations++;
 }
 
 bool
@@ -6509,16 +6510,24 @@ ParseNode *
 Parser::atomNode(ParseNodeKind kind, JSOp op)
 {
     ParseNode *node = NullaryNode::create(kind, this);
     if (!node)
         return NULL;
     node->setOp(op);
     const Token &tok = tokenStream.currentToken();
     node->pn_atom = tok.atom();
+
+    // Large strings are fast to parse but slow to compress. Stop compression on
+    // them, so we don't wait for a long time for compression to finish at the
+    // end of compilation.
+    const size_t HUGE_STRING = 50000;
+    if (sct && kind == PNK_STRING && node->pn_atom->length() >= HUGE_STRING)
+        sct->abort();
+
     return node;
 }
 
 ParseNode *
 Parser::primaryExpr(TokenKind tt, bool afterDoubleDot)
 {
     JS_ASSERT(tokenStream.isCurrentTokenType(tt));
 
--- a/js/src/frontend/Parser.h
+++ b/js/src/frontend/Parser.h
@@ -37,16 +37,18 @@ struct Parser : private AutoGCRooter
     StrictModeGetter    strictModeGetter; /* used by tokenStream to test for strict mode */
     TokenStream         tokenStream;
     void                *tempPoolMark;  /* initial JSContext.tempLifoAlloc mark */
     ParseNodeAllocator  allocator;
     ObjectBox           *traceListHead; /* list of parsed object for GC tracing */
 
     TreeContext         *tc;            /* innermost tree context (stack-allocated) */
 
+    SourceCompressionToken *sct;        /* compression token for aborting */
+
     /* Root atoms and objects allocated for the parsed tree. */
     AutoKeepAtoms       keepAtoms;
 
     /* Perform constant-folding; must be true when interfacing with the emitter. */
     const bool          foldConstants:1;
 
   private:
     /* Script can optimize name references based on scope chain. */
--- a/js/src/jsscript.cpp
+++ b/js/src/jsscript.cpp
@@ -1021,60 +1021,111 @@ SourceCompressorThread::threadLoop()
     while (true) {
         switch (state) {
           case SHUTDOWN:
             PR_Unlock(lock);
             return;
           case IDLE:
             PR_WaitCondVar(wakeup, PR_INTERVAL_NO_TIMEOUT);
             break;
-          case COMPRESSING:
+          case COMPRESSING: {
             JS_ASSERT(tok);
-            JS_ASSERT(!tok->ss->ready());
-            tok->ss->considerCompressing(rt, tok->chars);
+            ScriptSource *ss = tok->ss;
+            JS_ASSERT(!ss->ready());
+            const size_t COMPRESS_THRESHOLD = 512;
+            size_t compressedLength = 0;
+#ifdef USE_ZLIB
+            size_t nbytes = sizeof(jschar) * ss->length();
+            if (nbytes >= COMPRESS_THRESHOLD) {
+                Compressor comp(reinterpret_cast<const unsigned char *>(tok->chars),
+                                nbytes, ss->data.compressed);
+                if (comp.init()) {
+                    while (!stop && comp.compressMore())
+                        ;
+                    compressedLength = comp.finish();
+                    if (stop || compressedLength == nbytes)
+                        compressedLength = 0;
+                }
+            }
+#endif
+            ss->compressedLength_ = compressedLength;
+            if (compressedLength == 0) {
+                PodCopy(ss->data.source, tok->chars, ss->length());
+            } else {
+                // Shrink the buffer to the size of the compressed data. The
+                // memory allocation functions on JSContext and JSRuntime are
+                // not threadsafe, so use js_realloc directly. We'll fix up the
+                // memory accounting of the runtime in waitOnCompression().
+                void *newmem = js_realloc(ss->data.compressed, compressedLength);
+                JS_ASSERT(newmem); // Reducing memory size shouldn't fail.
+                ss->data.compressed = static_cast<unsigned char *>(newmem);
+            }
+
             // We hold the lock, so no one should have changed this.
             JS_ASSERT(state == COMPRESSING);
             state = IDLE;
             PR_NotifyCondVar(done);
             break;
+          }
         }
     }
 }
 
 void
 SourceCompressorThread::compress(SourceCompressionToken *sct)
 {
     if (tok)
         // We have reentered the compiler. (This can happen through the
         // debugger.) Complete the current compression before starting the next
         // one.
         waitOnCompression(tok);
     JS_ASSERT(state == IDLE);
     JS_ASSERT(!tok);
+    stop = false;
     PR_Lock(lock);
     tok = sct;
     state = COMPRESSING;
     PR_NotifyCondVar(wakeup);
     PR_Unlock(lock);
 }
 
 void
 SourceCompressorThread::waitOnCompression(SourceCompressionToken *userTok)
 {
     JS_ASSERT(userTok == tok);
     JS_ASSERT(!tok->ss->onRuntime());
     PR_Lock(lock);
     if (state == COMPRESSING)
         PR_WaitCondVar(done, PR_INTERVAL_NO_TIMEOUT);
     JS_ASSERT(state == IDLE);
-    JS_ASSERT(tok->ss->ready());
-    tok->ss = NULL;
-    tok->chars = NULL;
+    SourceCompressionToken *saveTok = tok;
     tok = NULL;
     PR_Unlock(lock);
+
+    JS_ASSERT(!saveTok->ss->ready());
+#ifdef DEBUG
+    saveTok->ss->ready_ = true;
+#endif
+
+    // Update memory accounting if needed.
+    if (saveTok->ss->compressed()) {
+        ptrdiff_t delta = saveTok->ss->compressedLength_ - sizeof(jschar) * saveTok->ss->length();
+        JS_ASSERT(delta < 0);
+        saveTok->cx->runtime->updateMallocCounter(NULL, delta);
+    }
+
+    saveTok->ss = NULL;
+    saveTok->chars = NULL;
+}
+
+void
+SourceCompressorThread::abort(SourceCompressionToken *userTok)
+{
+    JS_ASSERT(userTok == tok);
+    stop = true;
 }
 #endif /* JS_THREADSAFE */
 
 void
 JSScript::setScriptSource(JSContext *cx, ScriptSource *ss)
 {
 #ifdef JSGC_INCREMENTAL
     // During IGC, we need to barrier writing to scriptSource_.
@@ -1187,22 +1238,22 @@ ScriptSource::substring(JSContext *cx, u
     return js_NewStringCopyN(cx, chars + start, stop - start);
 }
 
 ScriptSource *
 ScriptSource::createFromSource(JSContext *cx, const jschar *src, uint32_t length,
                                bool argumentsNotIncluded, SourceCompressionToken *tok,
                                bool ownSource)
 {
-    ScriptSource *ss = static_cast<ScriptSource *>(cx->malloc_(sizeof(*ss)));
+    ScriptSource *ss = static_cast<ScriptSource *>(cx->runtime->malloc_(sizeof(*ss)));
     if (!ss)
         return NULL;
     if (!ownSource) {
         const size_t nbytes = length * sizeof(jschar);
-        ss->data.compressed = static_cast<unsigned char *>(cx->malloc_(nbytes));
+        ss->data.compressed = static_cast<unsigned char *>(cx->runtime->malloc_(nbytes));
         if (!ss->data.compressed) {
             cx->free_(ss);
             return NULL;
         }
     }
     ss->next = NULL;
     ss->length_ = length;
     ss->compressedLength_ = 0;
@@ -1210,64 +1261,48 @@ ScriptSource::createFromSource(JSContext
     ss->argumentsNotIncluded_ = argumentsNotIncluded;
 #ifdef DEBUG
     ss->ready_ = false;
 #endif
 
     JS_ASSERT_IF(ownSource, !tok);
 
 #ifdef JS_THREADSAFE
-    if (tok && 0) {
+    if (tok && !ownSource) {
         tok->ss = ss;
         tok->chars = src;
         cx->runtime->sourceCompressorThread.compress(tok);
     } else
 #endif
-        ss->considerCompressing(cx->runtime, src, ownSource);
-
+    {
+        if (ownSource)
+            ss->data.source = const_cast<jschar *>(src);
+        else
+            PodCopy(ss->data.source, src, ss->length_);
+#ifdef DEBUG
+        ss->ready_ = true;
+#endif
+    }
 
     return ss;
 }
 
 void
-ScriptSource::considerCompressing(JSRuntime *rt, const jschar *src, bool ownSource)
-{
-    JS_ASSERT(!ready());
-
-#if USE_ZLIB
-    const size_t nbytes = length_ * sizeof(jschar);
-    const size_t COMPRESS_THRESHOLD = 512;
-    size_t compressedLength;
-#endif
-    if (ownSource) {
-        data.source = const_cast<jschar *>(src);
-#if USE_ZLIB
-    } else if (nbytes >= COMPRESS_THRESHOLD && 0 &&
-        TryCompressString(reinterpret_cast<const unsigned char *>(src), nbytes,
-                          data.compressed, &compressedLength))
-    {
-        JS_ASSERT(compressedLength < nbytes);
-        compressedLength_ = compressedLength;
-        void *mem = rt->realloc_(data.compressed, compressedLength_);
-        data.compressed = static_cast<unsigned char *>(mem);
-        JS_ASSERT(data.compressed);
-#endif
-    } else {
-        PodCopy(data.source, src, length_);
-    }
-#ifdef DEBUG    
-    ready_ = true;
-#endif
-}
-
-void
 SourceCompressionToken::ensureReady()
 {
 #ifdef JS_THREADSAFE
-    rt->sourceCompressorThread.waitOnCompression(this);
+    cx->runtime->sourceCompressorThread.waitOnCompression(this);
+#endif
+}
+
+void
+SourceCompressionToken::abort()
+{
+#ifdef JS_THREADSAFE
+    cx->runtime->sourceCompressorThread.abort(this);
 #endif
 }
 
 void
 ScriptSource::attachToRuntime(JSRuntime *rt)
 {
     JS_ASSERT(!onRuntime());
     next = rt->scriptSources;
--- a/js/src/jsscript.h
+++ b/js/src/jsscript.h
@@ -1019,17 +1019,16 @@ struct ScriptSource
     static void sweep(JSRuntime *rt);
 
     // XDR handling
     template <XDRMode mode>
     static bool performXDR(XDRState<mode> *xdr, ScriptSource **ss);
 
   private:
     bool compressed() { return compressedLength_ != 0; }
-    void considerCompressing(JSRuntime *rt, const jschar *src, bool ownSource = false);
 };
 
 #ifdef JS_THREADSAFE
 /*
  * Background thread to compress JS source code. This happens only while parsing
  * and bytecode generation is happening in the main thread. If needed, the
  * compiler waits for compression to complete before returning.
  *
@@ -1056,16 +1055,18 @@ class SourceCompressorThread
     // Protects |state| and |tok| when it's non-NULL.
     PRLock *lock;
     // When it's idling, the compression thread blocks on this. The main thread
     // uses it to notify the compression thread when it has source to be
     // compressed.
     PRCondVar *wakeup;
     // The main thread can block on this to wait for compression to finish.
     PRCondVar *done;
+    // Flag which can be set by the main thread to ask compression to abort.
+    volatile bool stop;
 
     void threadLoop();
     static void compressorThread(void *arg);
 
   public:
     explicit SourceCompressorThread(JSRuntime *rt)
     : state(IDLE),
       rt(rt),
@@ -1073,37 +1074,40 @@ class SourceCompressorThread
       thread(NULL),
       lock(NULL),
       wakeup(NULL),
       done(NULL) {}
     void finish();
     bool init();
     void compress(SourceCompressionToken *tok);
     void waitOnCompression(SourceCompressionToken *userTok);
+    void abort(SourceCompressionToken *userTok);
 };
 #endif
 
 struct SourceCompressionToken
 {
     friend struct ScriptSource;
     friend class SourceCompressorThread;
   private:
-    JSRuntime *rt;
+    JSContext *cx;
     ScriptSource *ss;
     const jschar *chars;
   public:
-    SourceCompressionToken(JSRuntime *rt)
-      : rt(rt), ss(NULL), chars(NULL) {}
+    SourceCompressionToken(JSContext *cx)
+      : cx(cx), ss(NULL), chars(NULL) {}
     ~SourceCompressionToken()
     {
         JS_ASSERT_IF(!ss, !chars);
         if (ss)
             ensureReady();
     }
+
     void ensureReady();
+    void abort();
 };
 
 extern void
 CallDestroyScriptHook(FreeOp *fop, JSScript *script);
 
 extern const char *
 SaveScriptFilename(JSContext *cx, const char *filename);
 
--- a/js/src/jsutil.cpp
+++ b/js/src/jsutil.cpp
@@ -1,9 +1,9 @@
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  *
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 /* Various JS utility functions. */
 
 #include "mozilla/Assertions.h"
@@ -37,43 +37,61 @@ zlib_alloc(void *cx, uInt items, uInt si
 }
 
 static void
 zlib_free(void *cx, void *addr)
 {
     Foreground::free_(addr);
 }
 
-
 bool
-js::TryCompressString(const unsigned char *inp, size_t inplen, unsigned char *out, size_t *outlen)
+Compressor::init()
 {
-    JS_ASSERT(inplen);
     if (inplen >= UINT32_MAX)
         return false;
-    z_stream zs;
-    zs.opaque = NULL;
     zs.zalloc = zlib_alloc;
     zs.zfree = zlib_free;
-    zs.next_in = (Bytef *)inp;
-    zs.avail_in = inplen;
-    zs.next_out = out;
-    zs.avail_out = inplen;
-    int ret = deflateInit(&zs, Z_BEST_SPEED);
+    int ret = deflateInit(&zs, Z_DEFAULT_COMPRESSION);
     if (ret != Z_OK) {
         JS_ASSERT(ret == Z_MEM_ERROR);
         return false;
     }
-    ret = deflate(&zs, Z_FINISH);
-    DebugOnly<int> ret2 = deflateEnd(&zs);
-    JS_ASSERT(ret2 == Z_OK);
-    if (ret != Z_STREAM_END)
+    return true;
+}
+
+bool
+Compressor::compressMore()
+{
+    uInt left = inplen - (zs.next_in - inp);
+    bool done = left <= CHUNKSIZE;
+    if (done)
+        zs.avail_in = left;
+    else if (zs.avail_in == 0)
+        zs.avail_in = CHUNKSIZE;
+    int ret = deflate(&zs, done ? Z_FINISH : Z_NO_FLUSH);
+    if (ret == Z_BUF_ERROR) {
+        JS_ASSERT(zs.avail_out == 0);
         return false;
-    *outlen = inplen - zs.avail_out;
-    return true;
+    }
+    JS_ASSERT_IF(!done, ret == Z_OK);
+    JS_ASSERT_IF(done, ret == Z_STREAM_END);
+    return !done;
+}
+
+size_t
+Compressor::finish()
+{
+    size_t outlen = inplen - zs.avail_out;
+    int ret = deflateEnd(&zs);
+    if (ret != Z_OK) {
+        // If we finished early, we can get a Z_DATA_ERROR.
+        JS_ASSERT(ret == Z_DATA_ERROR);
+        JS_ASSERT(uInt(zs.next_in - inp) < inplen || !zs.avail_out);
+    }
+    return outlen;
 }
 
 bool
 js::DecompressString(const unsigned char *inp, size_t inplen, unsigned char *out, size_t outlen)
 {
     JS_ASSERT(inplen <= UINT32_MAX);
     z_stream zs;
     zs.zalloc = zlib_alloc;
@@ -81,18 +99,18 @@ js::DecompressString(const unsigned char
     zs.opaque = NULL;
     zs.next_in = (Bytef *)inp;
     zs.avail_in = inplen;
     zs.next_out = out;
     JS_ASSERT(outlen);
     zs.avail_out = outlen;
     int ret = inflateInit(&zs);
     if (ret != Z_OK) {
-      JS_ASSERT(ret == Z_MEM_ERROR);
-      return false;
+        JS_ASSERT(ret == Z_MEM_ERROR);
+        return false;
     }
     ret = inflate(&zs, Z_FINISH);
     JS_ASSERT(ret == Z_STREAM_END);
     ret = inflateEnd(&zs);
     JS_ASSERT(ret == Z_OK);
     return true;
 }
 #endif
@@ -146,20 +164,20 @@ BinToVal(unsigned logscale, unsigned bin
 static unsigned
 ValToBin(unsigned logscale, uint32_t val)
 {
     unsigned bin;
 
     if (val <= 1)
         return val;
     bin = (logscale == 10)
-          ? (unsigned) ceil(log10((double) val))
-          : (logscale == 2)
-          ? (unsigned) JS_CEILING_LOG2W(val)
-          : val;
+        ? (unsigned) ceil(log10((double) val))
+        : (logscale == 2)
+        ? (unsigned) JS_CEILING_LOG2W(val)
+        : val;
     return JS_MIN(bin, 10);
 }
 
 void
 JS_BasicStatsAccum(JSBasicStats *bs, uint32_t val)
 {
     unsigned oldscale, newscale, bin;
     double mean;
--- a/js/src/jsutil.h
+++ b/js/src/jsutil.h
@@ -10,16 +10,18 @@
 
 #ifndef jsutil_h___
 #define jsutil_h___
 
 #include "mozilla/Attributes.h"
 
 #include "js/Utility.h"
 
+#include "zlib.h"
+
 /* Forward declarations. */
 struct JSContext;
 
 static JS_ALWAYS_INLINE void *
 js_memcpy(void *dst_, const void *src_, size_t len)
 {
     char *dst = (char *) dst_;
     const char *src = (const char *) src_;
@@ -330,24 +332,42 @@ ClearBitArrayElement(size_t *array, size
 
 static inline void
 ClearAllBitArrayElements(size_t *array, size_t length)
 {
     for (unsigned i = 0; i < length; ++i)
         array[i] = 0;
 }
 
-#if USE_ZLIB
-/*
- * Attempt to compress some bytes. Return true if compression produced a
- * string smaller than the input. The caller is responsible for allocating
- * |out| to a string the same length as the input.
- */
-bool TryCompressString(const unsigned char *inp, size_t inplen,
-                       unsigned char *out, size_t *outlen);
+#ifdef USE_ZLIB
+class Compressor
+{
+    // Number of bytes we should hand to zlib each compressMore() call.
+    static const size_t CHUNKSIZE = 2048;
+    z_stream zs;
+    const unsigned char *inp;
+    size_t inplen;
+  public:
+    Compressor(const unsigned char *inp, size_t inplen, unsigned char *out)
+        : inp(inp),
+        inplen(inplen)
+    {
+        JS_ASSERT(inplen > 0);
+        zs.opaque = NULL;
+        zs.next_in = (Bytef *)inp;
+        zs.avail_in = 0;
+        zs.next_out = out;
+        zs.avail_out = inplen;
+    }
+    bool init();
+    // Compress some of the input. Return true if it should be called again.
+    bool compressMore();
+    // Finalize compression. Return the length of the compressed input.
+    size_t finish();
+};
 
 /*
  * Decompress a string. The caller must know the length of the output and
  * allocate |out| to a string of that length.
  */
 bool DecompressString(const unsigned char *inp, size_t inplen,
                       unsigned char *out, size_t outlen);
 #endif