Bug 847479 - Add smart filters borrowed from xz-utils to improve SeekableZStream compression rate. r=nfroyd
authorMike Hommey <mh+mozilla@glandium.org>
Wed, 06 Mar 2013 07:29:59 +0100
changeset 123911 eeba894662b8ad803b1670bdef3b121a59e0edc7
parent 123910 88226153a994b19a9942b360fd93ea7f3bc83efd
child 123912 777c81f4d4a422b750ecb94cc67158175929734c
push id24401
push useremorley@mozilla.com
push dateWed, 06 Mar 2013 16:09:28 +0000
treeherdermozilla-central@635f2c7660c8 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnfroyd
bugs847479
milestone22.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 847479 - Add smart filters borrowed from xz-utils to improve SeekableZStream compression rate. r=nfroyd
mozglue/linker/Makefile.in
mozglue/linker/SeekableZStream.cpp
mozglue/linker/SeekableZStream.h
mozglue/linker/szip.cpp
--- a/mozglue/linker/Makefile.in
+++ b/mozglue/linker/Makefile.in
@@ -30,8 +30,16 @@ HOST_LIBS = -lz
 CPPSRCS += \
   ElfLoader.cpp \
   CustomElf.cpp \
   Mappable.cpp \
   SeekableZStream.cpp \
   $(NULL)
 
 include $(topsrcdir)/config/rules.mk
+
+ifeq (arm,$(TARGET_CPU))
+ifdef MOZ_THUMB2
+HOST_CXXFLAGS += -DTARGET_THUMB
+else
+HOST_CXXFLAGS += -DTARGET_ARM
+endif
+endif
--- a/mozglue/linker/SeekableZStream.cpp
+++ b/mozglue/linker/SeekableZStream.cpp
@@ -24,16 +24,17 @@ SeekableZStream::Init(const void *buf, s
   }
 
   buffer = reinterpret_cast<const unsigned char *>(buf);
   totalSize = header->totalSize;
   chunkSize = header->chunkSize;
   lastChunkSize = header->lastChunkSize;
   windowBits = header->windowBits;
   offsetTable.Init(&header[1], header->nChunks);
+  filter = GetFilter(header->filter);
 
   /* Sanity check */
   if ((chunkSize == 0) ||
       (chunkSize % PAGE_SIZE) ||
       (chunkSize > 8 * PAGE_SIZE) ||
       (offsetTable.numElements() < 1) ||
       (lastChunkSize == 0) ||
       (lastChunkSize > chunkSize) ||
@@ -94,10 +95,85 @@ SeekableZStream::DecompressChunk(void *w
       != (length == chunkLen) ? Z_STREAM_END : Z_OK) {
     log("inflate failed: %s", zStream.msg);
     return false;
   }
   if (inflateEnd(&zStream) != Z_OK) {
     log("inflateEnd failed: %s", zStream.msg);
     return false;
   }
+  if (filter)
+    filter(chunk * chunkSize, UNFILTER, (unsigned char *)where, chunkLen);
+
   return true;
 }
+
+/* Branch/Call/Jump conversion filter for Thumb, derived from xz-utils
+ * by Igor Pavlov and Lasse Collin, published in the public domain */
+static void
+BCJ_Thumb_filter(off_t offset, SeekableZStream::FilterDirection dir,
+                 unsigned char *buf, size_t size)
+{
+  size_t i;
+  for (i = 0; i + 4 <= size; i += 2) {
+    if ((buf[i + 1] & 0xf8) == 0xf0 && (buf[i + 3] & 0xf8) == 0xf8) {
+      uint32_t src = (buf[i] << 11)
+                     | ((buf[i + 1] & 0x07) << 19)
+                     | buf[i + 2]
+                     | ((buf[i + 3] & 0x07) << 8);
+      src <<= 1;
+      uint32_t dest;
+      if (dir == SeekableZStream::FILTER)
+        dest = offset + (uint32_t)(i) + 4 + src;
+      else
+        dest = src - (offset + (uint32_t)(i) + 4);
+
+      dest >>= 1;
+      buf[i] = dest >> 11;
+      buf[i + 1] = 0xf0 | ((dest >> 19) & 0x07);
+      buf[i + 2] = dest;
+      buf[i + 3] = 0xf8 | ((dest >> 8) & 0x07);
+      i += 2;
+    }
+  }
+}
+
+/* Branch/Call/Jump conversion filter for ARM, derived from xz-utils
+ * by Igor Pavlov and Lasse Collin, published in the public domain */
+static void
+BCJ_ARM_filter(off_t offset, SeekableZStream::FilterDirection dir,
+               unsigned char *buf, size_t size)
+{
+  size_t i;
+  for (i = 0; i + 4 <= size; i += 4) {
+    if (buf[i + 3] == 0xeb) {
+      uint32_t src = buf[i]
+                     | (buf[i + 1] << 8)
+                     | (buf[i + 2] << 16);
+      src <<= 2;
+      uint32_t dest;
+      if (dir == SeekableZStream::FILTER)
+        dest = offset + (uint32_t)(i) + 8 + src;
+      else
+        dest = src - (offset + (uint32_t)(i) + 8);
+
+      dest >>= 2;
+      buf[i] = dest;
+      buf[i + 1] = dest >> 8;
+      buf[i + 2] = dest >> 16;
+    }
+  }
+}
+
+
+SeekableZStream::ZStreamFilter
+SeekableZStream::GetFilter(SeekableZStream::FilterId id)
+{
+  switch (id) {
+  case BCJ_THUMB:
+    return BCJ_Thumb_filter;
+  case BCJ_ARM:
+    return BCJ_ARM_filter;
+  default:
+    return NULL;
+  }
+  return NULL;
+}
--- a/mozglue/linker/SeekableZStream.h
+++ b/mozglue/linker/SeekableZStream.h
@@ -17,17 +17,18 @@
  * individual compressed chunk, then followed by the compressed chunks.
  */
 
 #pragma pack(1)
 struct SeekableZStreamHeader: public Zip::SignedEntity<SeekableZStreamHeader>
 {
   SeekableZStreamHeader()
   : Zip::SignedEntity<SeekableZStreamHeader>(magic)
-  , totalSize(0), chunkSize(0), nChunks(0), lastChunkSize(0), windowBits(0) { }
+  , totalSize(0), chunkSize(0), nChunks(0), lastChunkSize(0), windowBits(0)
+  , filter(0) { }
 
   /* Reuse Zip::SignedEntity to handle the magic number used in the Seekable
    * ZStream file format. The magic number is "SeZz". */
   static const uint32_t magic = 0x7a5a6553;
 
   /* Total size of the stream, including the 4 magic bytes. */
   le_uint32 totalSize;
 
@@ -38,18 +39,18 @@ struct SeekableZStreamHeader: public Zip
   le_uint32 nChunks;
 
   /* Size of last chunk (> 0, <= Chunk size) */
   le_uint16 lastChunkSize;
 
   /* windowBits value used when deflating */
   signed char windowBits;
 
-  /* Padding */
-  unsigned char unused;
+  /* Filter Id */
+  unsigned char filter;
 
   /* Maximum supported size for chunkSize */
   /* Can't use std::min here because it's not constexpr */
   static const size_t maxChunkSize =
     1 << ((sizeof(chunkSize) < sizeof(lastChunkSize) ?
            sizeof(chunkSize) : sizeof(lastChunkSize)) - 1);
 };
 #pragma pack()
@@ -89,16 +90,38 @@ public:
     return (chunk == offsetTable.numElements() - 1) ? lastChunkSize : chunkSize;
   }
 
   /* Returns the number of chunks */
   const size_t GetChunksNum() const {
     return offsetTable.numElements();
   }
 
+  /**
+   * Filters used to improve compression rate.
+   */
+  enum FilterDirection {
+    FILTER,
+    UNFILTER
+  };
+  typedef void (*ZStreamFilter)(off_t, FilterDirection,
+                                  unsigned char *, size_t);
+
+  enum FilterId {
+    NONE,
+    BCJ_THUMB,
+    BCJ_ARM,
+    FILTER_MAX
+  };
+  static ZStreamFilter GetFilter(FilterId id);
+
+  static ZStreamFilter GetFilter(uint16_t id) {
+    return GetFilter(static_cast<FilterId>(id));
+  }
+
 private:
   /* RAW Seekable SZtream buffer */
   const unsigned char *buffer;
 
   /* Total size of the stream, including the 4 magic bytes. */
   uint32_t totalSize;
 
   /* Chunk size */
@@ -107,11 +130,14 @@ private:
   /* Size of last chunk (> 0, <= Chunk size) */
   uint32_t lastChunkSize;
 
   /* windowBits value used when deflating */
   int windowBits;
 
   /* Offsets table */
   Array<le_uint32> offsetTable;
+
+  /* Filter */
+  ZStreamFilter filter;
 };
 
 #endif /* SeekableZStream_h */
--- a/mozglue/linker/szip.cpp
+++ b/mozglue/linker/szip.cpp
@@ -75,28 +75,41 @@ public:
 
 class SzipDecompress: public SzipAction
 {
 public:
   int run(const char *name, Buffer &origBuf,
           const char *outName, Buffer &outBuf);
 };
 
+
 class SzipCompress: public SzipAction
 {
 public:
   int run(const char *name, Buffer &origBuf,
           const char *outName, Buffer &outBuf);
 
-  SzipCompress(size_t aChunkSize)
+  SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter)
   : chunkSize(aChunkSize ? aChunkSize : 16384)
+  , filter(aFilter == SeekableZStream::FILTER_MAX ? DEFAULT_FILTER : aFilter)
   {}
 
 private:
+
+  const static SeekableZStream::FilterId DEFAULT_FILTER =
+#if defined(TARGET_THUMB)
+    SeekableZStream::BCJ_THUMB;
+#elif defined(TARGET_ARM)
+    SeekableZStream::BCJ_ARM;
+#else
+    SeekableZStream::NONE;
+#endif
+
   size_t chunkSize;
+  SeekableZStream::FilterId filter;
 };
 
 /* Decompress a seekable compressed stream */
 int SzipDecompress::run(const char *name, Buffer &origBuf,
                         const char *outName, Buffer &outBuf)
 {
   size_t origSize = origBuf.GetLength();
   if (origSize < sizeof(SeekableZStreamHeader)) {
@@ -148,24 +161,47 @@ int SzipCompress::run(const char *name, 
 
   SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader;
   le_uint32 *entry = reinterpret_cast<le_uint32 *>(&header[1]);
 
   /* Initialize header */
   header->chunkSize = chunkSize;
   header->totalSize = offset;
   header->windowBits = -15; // Raw stream, window size of 32k.
+  header->filter = filter;
 
   /* Initialize zlib structure */
   z_stream zStream;
   memset(&zStream, 0, sizeof(zStream));
   zStream.avail_out = origSize - offset;
   zStream.next_out = static_cast<Bytef*>(outBuf) + offset;
 
-  Bytef *origData = static_cast<Bytef*>(origBuf);
+  /* Filter buffer */
+  SeekableZStream::ZStreamFilter filter =
+    SeekableZStream::GetFilter(header->filter);
+  Buffer filteredData;
+  Bytef *origData;
+  if (filter) {
+    filteredData.Resize(origSize);
+    origData = filteredData;
+    memcpy(origData, origBuf, origSize);
+    size_t size = origSize;
+    Bytef *data = origData;
+    size_t avail = 0;
+    /* Filter needs to be applied in chunks. */
+    while (size) {
+      avail = std::min(size, chunkSize);
+      filter(data - origData, SeekableZStream::FILTER, data, avail);
+      size -= avail;
+      data += avail;
+    }
+  } else {
+    origData = origBuf;
+  }
+
   size_t avail = 0;
   size_t size = origSize;
   while (size) {
     avail = std::min(size, chunkSize);
 
     /* Compress chunk */
     int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits,
                            MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
@@ -228,16 +264,17 @@ bool GetSize(const char *str, size_t *ou
 }
 
 int main(int argc, char* argv[])
 {
   mozilla::ScopedDeletePtr<SzipAction> action;
   char **firstArg;
   bool compress = true;
   size_t chunkSize = 0;
+  SeekableZStream::FilterId filter = SeekableZStream::FILTER_MAX;
 
   for (firstArg = &argv[1]; argc > 3; argc--, firstArg++) {
     if (!firstArg[0] || firstArg[0][0] != '-')
       break;
     if (strcmp(firstArg[0], "-d") == 0) {
       compress = false;
     } else if (strcmp(firstArg[0], "-c") == 0) {
       firstArg++;
@@ -245,27 +282,40 @@ int main(int argc, char* argv[])
       if (!firstArg[0])
         break;
       if (!GetSize(firstArg[0], &chunkSize) || !chunkSize ||
           (chunkSize % 4096) ||
           (chunkSize > SeekableZStreamHeader::maxChunkSize)) {
         log("Invalid chunk size");
         return 1;
       }
+    } else if (strcmp(firstArg[0], "-f") == 0) {
+      firstArg++;
+      argc--;
+      if (!firstArg[0])
+        break;
+      if (strcmp(firstArg[0], "arm") == 0)
+        filter = SeekableZStream::BCJ_ARM;
+      else if (strcmp(firstArg[0], "thumb") == 0)
+        filter = SeekableZStream::BCJ_THUMB;
+      else {
+        log("Invalid filter");
+        return 1;
+      }
     }
   }
 
   if (argc != 3 || !firstArg[0] || !firstArg[1] ||
       (strcmp(firstArg[0], firstArg[1]) == 0)) {
-    log("usage: %s [-d] [-c CHUNKSIZE] in_file out_file", argv[0]);
+    log("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] in_file out_file", argv[0]);
     return 1;
   }
 
   if (compress) {
-    action = new SzipCompress(chunkSize);
+    action = new SzipCompress(chunkSize, filter);
   } else {
     if (chunkSize) {
       log("-c is incompatible with -d");
       return 1;
     }
     action = new SzipDecompress();
   }