Bug 960367 - OdinMonkey: compress source stored in in cache file (r=sstangl)
authorLuke Wagner <luke@mozilla.com>
Fri, 17 Jan 2014 17:34:33 -0600
changeset 174318 a0a1851773db6d8c5f91e9d63530e33f51416811
parent 174317 cd4a9095e25cfc45a02b2d5b2e07388107cf7aad
child 174319 8526c7a387617c0d9db00c873e2b6bc3a0fbd5a4
push idunknown
push userunknown
push dateunknown
reviewerssstangl
bugs960367
milestone29.0a1
Bug 960367 - OdinMonkey: compress source stored in in cache file (r=sstangl)
js/src/jit/AsmJSModule.cpp
mfbt/Compression.h
--- a/js/src/jit/AsmJSModule.cpp
+++ b/js/src/jit/AsmJSModule.cpp
@@ -5,16 +5,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "jit/AsmJSModule.h"
 
 #ifndef XP_WIN
 # include <sys/mman.h>
 #endif
 
+#include "mozilla/Compression.h"
 #include "mozilla/PodOperations.h"
 
 #include "jslibmath.h"
 #include "jsmath.h"
 #include "jsprf.h"
 #ifdef XP_WIN
 # include "jswin.h"
 #endif
@@ -27,16 +28,17 @@
 #include "jsobjinlines.h"
 
 #include "frontend/ParseNode-inl.h"
 
 using namespace js;
 using namespace jit;
 using namespace frontend;
 using mozilla::PodEqual;
+using mozilla::Compression::LZ4;
 
 void
 AsmJSModule::initHeap(Handle<ArrayBufferObject*> heap, JSContext *cx)
 {
     JS_ASSERT(linked_);
     JS_ASSERT(!maybeHeap_);
     maybeHeap_ = heap;
     heapDatum() = heap->dataPointer();
@@ -809,103 +811,148 @@ struct PropertyNameWrapper
     }
     const uint8_t *deserialize(ExclusiveContext *cx, const uint8_t *cursor) {
         return DeserializeName(cx, cursor, &name);
     }
 };
 
 class ModuleChars
 {
-    uint32_t length_;
-    const jschar *begin_;
+  protected:
     uint32_t isFunCtor_;
     js::Vector<PropertyNameWrapper, 0, SystemAllocPolicy> funCtorArgs_;
 
   public:
     static uint32_t beginOffset(AsmJSParser &parser) {
       return parser.pc->maybeFunction->pn_pos.begin;
     }
 
     static uint32_t endOffset(AsmJSParser &parser) {
       return parser.tokenStream.peekTokenPos().end;
     }
+};
 
-    bool initFromParsedModule(AsmJSParser &parser, const AsmJSModule &module) {
+class ModuleCharsForStore : ModuleChars
+{
+    uint32_t uncompressedSize_;
+    uint32_t compressedSize_;
+    js::Vector<char, 0, SystemAllocPolicy> compressedBuffer_;
+
+  public:
+    bool init(AsmJSParser &parser, const AsmJSModule &module) {
+        JS_ASSERT(beginOffset(parser) < endOffset(parser));
+
+        uncompressedSize_ = (endOffset(parser) - beginOffset(parser)) * sizeof(jschar);
+        size_t maxCompressedSize = LZ4::maxCompressedSize(uncompressedSize_);
+        if (maxCompressedSize < uncompressedSize_)
+            return false;
+
+        if (!compressedBuffer_.resize(maxCompressedSize))
+            return false;
+
+        const jschar *chars = parser.tokenStream.rawBase() + beginOffset(parser);
+        const char *source = reinterpret_cast<const char*>(chars);
+        size_t compressedSize = LZ4::compress(source, uncompressedSize_, compressedBuffer_.begin());
+        if (!compressedSize || compressedSize > UINT32_MAX)
+            return false;
+
+        compressedSize_ = compressedSize;
+
         // For a function statement or named function expression:
         //   function f(x,y,z) { abc }
         // the range [beginOffset, endOffset) captures the source:
         //   f(x,y,z) { abc }
         // An unnamed function expression captures the same thing, sans 'f'.
         // Since asm.js modules do not contain any free variables, equality of
         // [beginOffset, endOffset) is sufficient to guarantee identical code
         // generation, modulo MachineId.
         //
         // For functions created with 'new Function', function arguments are
         // not present in the source so we must manually explicitly serialize
         // and match the formals as a Vector of PropertyName.
-        JS_ASSERT(beginOffset(parser) < endOffset(parser));
-        begin_ = parser.tokenStream.rawBase() + beginOffset(parser);
-        length_ = endOffset(parser) - beginOffset(parser);
         isFunCtor_ = parser.pc->isFunctionConstructorBody();
         if (isFunCtor_) {
             unsigned numArgs;
             ParseNode *arg = FunctionArgsList(parser.pc->maybeFunction, &numArgs);
             for (unsigned i = 0; i < numArgs; i++, arg = arg->pn_next) {
                 if (!funCtorArgs_.append(arg->name()))
                     return false;
             }
         }
+
         return true;
     }
 
     size_t serializedSize() const {
         return sizeof(uint32_t) +
-               length_ * sizeof(jschar) +
+               sizeof(uint32_t) +
+               compressedSize_ +
                sizeof(uint32_t) +
                (isFunCtor_ ? SerializedVectorSize(funCtorArgs_) : 0);
     }
 
     uint8_t *serialize(uint8_t *cursor) const {
-        cursor = WriteScalar<uint32_t>(cursor, length_);
-        cursor = WriteBytes(cursor, begin_, length_ * sizeof(jschar));
+        cursor = WriteScalar<uint32_t>(cursor, uncompressedSize_);
+        cursor = WriteScalar<uint32_t>(cursor, compressedSize_);
+        cursor = WriteBytes(cursor, compressedBuffer_.begin(), compressedSize_);
         cursor = WriteScalar<uint32_t>(cursor, isFunCtor_);
         if (isFunCtor_)
             cursor = SerializeVector(cursor, funCtorArgs_);
         return cursor;
     }
+};
 
+class ModuleCharsForLookup : ModuleChars
+{
+    js::Vector<jschar, 0, SystemAllocPolicy> chars_;
+
+  public:
     const uint8_t *deserialize(ExclusiveContext *cx, const uint8_t *cursor) {
-        cursor = ReadScalar<uint32_t>(cursor, &length_);
-        begin_ = reinterpret_cast<const jschar *>(cursor);
-        cursor += length_ * sizeof(jschar);
+        uint32_t uncompressedSize;
+        cursor = ReadScalar<uint32_t>(cursor, &uncompressedSize);
+
+        uint32_t compressedSize;
+        cursor = ReadScalar<uint32_t>(cursor, &compressedSize);
+
+        if (!chars_.resize(uncompressedSize / sizeof(jschar)))
+            return nullptr;
+
+        const char *source = reinterpret_cast<const char*>(cursor);
+        char *dest = reinterpret_cast<char*>(chars_.begin());
+        if (!LZ4::decompress(source, dest, uncompressedSize))
+            return nullptr;
+
+        cursor += compressedSize;
+
         cursor = ReadScalar<uint32_t>(cursor, &isFunCtor_);
         if (isFunCtor_)
             cursor = DeserializeVector(cx, cursor, &funCtorArgs_);
+
         return cursor;
     }
 
-    bool matchUnparsedModule(AsmJSParser &parser) const {
+    bool match(AsmJSParser &parser) const {
         const jschar *parseBegin = parser.tokenStream.rawBase() + beginOffset(parser);
         const jschar *parseLimit = parser.tokenStream.rawLimit();
         JS_ASSERT(parseLimit >= parseBegin);
-        if (uint32_t(parseLimit - parseBegin) < length_)
+        if (uint32_t(parseLimit - parseBegin) < chars_.length())
             return false;
-        if (!PodEqual(begin_, parseBegin, length_))
+        if (!PodEqual(chars_.begin(), parseBegin, chars_.length()))
             return false;
         if (isFunCtor_ != parser.pc->isFunctionConstructorBody())
             return false;
         if (isFunCtor_) {
             // For function statements, the closing } is included as the last
             // character of the matched source. For Function constructor,
             // parsing terminates with EOF which we must explicitly check. This
             // prevents
             //   new Function('"use asm"; function f() {} return f')
             // from incorrectly matching
             //   new Function('"use asm"; function f() {} return ff')
-            if (parseBegin + length_ != parseLimit)
+            if (parseBegin + chars_.length() != parseLimit)
                 return false;
             unsigned numArgs;
             ParseNode *arg = FunctionArgsList(parser.pc->maybeFunction, &numArgs);
             if (funCtorArgs_.length() != numArgs)
                 return false;
             for (unsigned i = 0; i < funCtorArgs_.length(); i++, arg = arg->pn_next) {
                 if (funCtorArgs_[i].name != arg->name())
                     return false;
@@ -937,18 +984,18 @@ js::StoreAsmJSModuleInCache(AsmJSParser 
                             const AsmJSModule &module,
                             const AsmJSStaticLinkData &linkData,
                             ExclusiveContext *cx)
 {
     MachineId machineId;
     if (!machineId.extractCurrentState(cx))
         return false;
 
-    ModuleChars moduleChars;
-    if (!moduleChars.initFromParsedModule(parser, module))
+    ModuleCharsForStore moduleChars;
+    if (!moduleChars.init(parser, module))
         return false;
 
     size_t serializedSize = machineId.serializedSize() +
                             moduleChars.serializedSize() +
                             module.serializedSize() +
                             linkData.serializedSize();
 
     JS::OpenAsmJSCacheEntryForWriteOp open = cx->asmJSCacheOps().openEntryForWrite;
@@ -1016,19 +1063,19 @@ js::LookupAsmJSModuleInCache(ExclusiveCo
 
     MachineId cachedMachineId;
     cursor = cachedMachineId.deserialize(cx, cursor);
     if (!cursor)
         return false;
     if (machineId != cachedMachineId)
         return true;
 
-    ModuleChars moduleChars;
+    ModuleCharsForLookup moduleChars;
     cursor = moduleChars.deserialize(cx, cursor);
-    if (!moduleChars.matchUnparsedModule(parser))
+    if (!moduleChars.match(parser))
         return true;
 
     ScopedJSDeletePtr<AsmJSModule> module(
         cx->new_<AsmJSModule>(parser.ss, parser.offsetOfCurrentAsmJSModule()));
     if (!module)
         return false;
     cursor = module->deserialize(cx, cursor);
     if (!cursor)
--- a/mfbt/Compression.h
+++ b/mfbt/Compression.h
@@ -29,17 +29,17 @@ class LZ4
 {
 
 public:
 
   /**
    * Compresses 'inputSize' bytes from 'source' into 'dest'.
    * Destination buffer must be already allocated,
    * and must be sized to handle worst cases situations (input data not compressible)
-   * Worst case size evaluation is provided by function LZ4_compressBound()
+   * Worst case size evaluation is provided by function maxCompressedSize()
    *
    * @param inputSize is the input size. Max supported value is ~1.9GB
    * @param return the number of bytes written in buffer dest
    */
   static MFBT_API size_t compress(const char* source, size_t inputSize, char* dest);
 
   /**
    * Compress 'inputSize' bytes from 'source' into an output buffer