Bug 686805 part 3 - Tool to generate seekable compressed streams. r=tglek
authorMike Hommey <mh+mozilla@glandium.org>
Wed, 22 Feb 2012 08:12:15 +0100
changeset 87348 fed61303b55b60385307edda90ff9aaa58c7020b
parent 87347 59237f456cdb05384424942052fbb7f96a24f592
child 87349 5af187d93f2c035d24d32dd8e17d1790112b4d7e
push id783
push userlsblakk@mozilla.com
push dateTue, 24 Apr 2012 17:33:42 +0000
treeherdermozilla-esr52@b6627f28b7ec [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstglek
bugs686805
milestone13.0a1
Bug 686805 part 3 - Tool to generate seekable compressed streams. r=tglek
mozglue/linker/Makefile.in
mozglue/linker/SeekableZStream.h
mozglue/linker/Utils.h
mozglue/linker/Zip.h
mozglue/linker/szip.cpp
--- a/mozglue/linker/Makefile.in
+++ b/mozglue/linker/Makefile.in
@@ -14,16 +14,27 @@ LIBRARY_NAME	= linker
 FORCE_STATIC_LIB= 1
 STL_FLAGS =
 
 CPPSRCS = \
   Zip.cpp \
   $(NULL)
 
 ifndef MOZ_OLD_LINKER
+HOST_PROGRAM = szip
+
+HOST_CPPSRCS = \
+  szip.cpp \
+  Assertions.cpp \
+  $(NULL)
+
+VPATH += $(topsrcdir)/mfbt
+
+HOST_LIBS = -lz
+
 CPPSRCS += \
   ElfLoader.cpp \
   CustomElf.cpp \
   Mappable.cpp \
   $(NULL)
 endif
 
 include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/SeekableZStream.h
@@ -0,0 +1,48 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef SeekableZStream_h
+#define SeekableZStream_h
+
+#include "Zip.h"
+
+/**
+ * Seekable compressed stream are created by splitting the original
+ * decompressed data in small chunks and compress these chunks
+ * individually.
+ *
+ * The seekable compressed file format consists in a header defined below,
+ * followed by a table of 32-bits words containing the offsets for each
+ * individual compressed chunk, then followed by the compressed chunks.
+ */
+
+#pragma pack(1)
+struct SeekableZStreamHeader: public Zip::SignedEntity<SeekableZStreamHeader>
+{
+  SeekableZStreamHeader()
+  : Zip::SignedEntity<SeekableZStreamHeader>(magic)
+  , totalSize(0), chunkSize(0), nChunks(0), lastChunkSize(0) { }
+
+  /* Reuse Zip::SignedEntity to handle the magic number used in the Seekable
+   * ZStream file format. The magic number is "SeZz". */
+  static const uint32_t magic = 0x7a5a6553;
+
+  /* Total size of the stream, including the 4 magic bytes. */
+  le_uint32 totalSize;
+
+  /* Chunk size */
+  le_uint32 chunkSize;
+
+  /* Number of chunks */
+  le_uint32 nChunks;
+
+  /* Size of last chunk (> 0, <= Chunk size) */
+  le_uint32 lastChunkSize;
+};
+#pragma pack()
+
+MOZ_STATIC_ASSERT(sizeof(SeekableZStreamHeader) == 5 * 4,
+                  "SeekableZStreamHeader should be 5 32-bits words");
+
+#endif /* SeekableZStream_h */
--- a/mozglue/linker/Utils.h
+++ b/mozglue/linker/Utils.h
@@ -11,40 +11,66 @@
 #include <unistd.h>
 #include "mozilla/Assertions.h"
 
 /**
  * On architectures that are little endian and that support unaligned reads,
  * we can use direct type, but on others, we want to have a special class
  * to handle conversion and alignment issues.
  */
-#if defined(__i386__) || defined(__x86_64__)
+#if !defined(DEBUG) && (defined(__i386__) || defined(__x86_64__))
 typedef uint16_t le_uint16;
 typedef uint32_t le_uint32;
 #else
 
 /**
  * Template that allows to find an unsigned int type from a (computed) bit size
  */
 template <int s> struct UInt { };
 template <> struct UInt<16> { typedef uint16_t Type; };
 template <> struct UInt<32> { typedef uint32_t Type; };
 
 /**
- * Template to read 2 n-bit sized words as a 2*n-bit sized word, doing
+ * Template to access 2 n-bit sized words as a 2*n-bit sized word, doing
  * conversion from little endian and avoiding alignment issues.
  */
 template <typename T>
 class le_to_cpu
 {
 public:
-  operator typename UInt<16 * sizeof(T)>::Type() const
+  typedef typename UInt<16 * sizeof(T)>::Type Type;
+
+  operator Type() const
   {
     return (b << (sizeof(T) * 8)) | a;
   }
+
+  const le_to_cpu& operator =(const Type &v)
+  {
+    a = v & ((1 << (sizeof(T) * 8)) - 1);
+    b = v >> (sizeof(T) * 8);
+    return *this;
+  }
+
+  le_to_cpu() { }
+  le_to_cpu(const Type &v)
+  {
+    operator =(v);
+  }
+
+  const le_to_cpu& operator +=(const Type &v)
+  {
+    return operator =(operator Type() + v);
+  }
+
+  const le_to_cpu& operator ++(int)
+  {
+    return operator =(operator Type() + 1);
+  }
+
 private:
   T a, b;
 };
 
 /**
  * Type definitions
  */
 typedef le_to_cpu<unsigned char> le_uint16;
--- a/mozglue/linker/Zip.h
+++ b/mozglue/linker/Zip.h
@@ -137,16 +137,17 @@ private:
 
   private:
     const char *buf;
     size_t length;
   };
 
 /* All the following types need to be packed */
 #pragma pack(1)
+public:
   /**
    * A Zip archive is an aggregate of entities which all start with a
    * signature giving their type. This template is to be used as a base
    * class for these entities.
    */
   template <typename T>
   class SignedEntity
   {
@@ -157,20 +158,23 @@ private:
      */
     static const T *validate(const void *buf)
     {
       const T *ret = static_cast<const T *>(buf);
       if (ret->signature == T::magic)
         return ret;
       return NULL;
     }
+
+    SignedEntity(uint32_t magic): signature(magic) { }
   private:
     le_uint32 signature;
   };
 
+private:
   /**
    * Header used to describe a Local File entry. The header is followed by
    * the file name and an extra field, then by the data stream.
    */
   struct LocalFile: public SignedEntity<LocalFile>
   {
     /* Signature for a Local File header */
     static const uint32_t magic = 0x04034b50;
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/szip.cpp
@@ -0,0 +1,136 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <sys/stat.h>
+#include <cstring>
+#include <zlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "mozilla/Assertions.h"
+#include "SeekableZStream.h"
+#include "Utils.h"
+#include "Logging.h"
+
+static const size_t CHUNK = 16384;
+
+/* Generate a seekable compressed stream. */
+
+int main(int argc, char* argv[])
+{
+  if (argc != 3 || !argv[1] || !argv[2] || (strcmp(argv[1], argv[2]) == 0)) {
+    log("usage: %s file_to_compress out_file", argv[0]);
+    return 1;
+  }
+
+  AutoCloseFD origFd = open(argv[1], O_RDONLY);
+  if (origFd == -1) {
+    log("Couldn't open %s: %s", argv[1], strerror(errno));
+    return 1;
+  }
+
+  struct stat st;
+  int ret = fstat(origFd, &st);
+  if (ret == -1) {
+    log("Couldn't seek %s: %s", argv[1], strerror(errno));
+    return 1;
+  }
+
+  size_t origSize = st.st_size;
+  log("Size = %lu", origSize);
+  if (origSize == 0) {
+    log("Won't compress %s: it's empty", argv[1]);
+    return 1;
+  }
+
+  /* Mmap the original file */
+  MappedPtr origBuf;
+  origBuf.Assign(mmap(NULL, origSize, PROT_READ, MAP_PRIVATE, origFd, 0), origSize);
+  if (origBuf == MAP_FAILED) {
+    log("Couldn't mmap %s: %s", argv[1], strerror(errno));
+    return 1;
+  }
+
+  /* Create the compressed file */
+  AutoCloseFD outFd = open(argv[2], O_RDWR | O_CREAT | O_TRUNC, 0666);
+  if (outFd == -1) {
+    log("Couldn't open %s: %s", argv[2], strerror(errno));
+    return 1;
+  }
+
+  /* Expected total number of chunks */
+  size_t nChunks = ((origSize + CHUNK - 1) / CHUNK);
+
+  /* The first chunk is going to be stored after the header and the offset
+   * table */
+  size_t offset = sizeof(SeekableZStreamHeader) + nChunks * sizeof(uint32_t);
+
+  /* Give enough room for the header and the offset table, and map them */
+  ret = posix_fallocate(outFd, 0, offset);
+  MOZ_ASSERT(ret == 0);
+  MappedPtr headerMap;
+  headerMap.Assign(mmap(NULL, offset, PROT_READ | PROT_WRITE, MAP_SHARED,
+                        outFd, 0), offset);
+  if (headerMap == MAP_FAILED) {
+    log("Couldn't mmap %s: %s", argv[1], strerror(errno));
+    return 1;
+  }
+
+  SeekableZStreamHeader *header = new (headerMap) SeekableZStreamHeader;
+  le_uint32 *entry = reinterpret_cast<le_uint32 *>(&header[1]);
+
+  /* Initialize header */
+  header->chunkSize = CHUNK;
+  header->totalSize = offset;
+
+  /* Seek at the end of the output file, where we're going to append
+   * compressed streams */
+  lseek(outFd, offset, SEEK_SET);
+
+  /* Initialize zlib structure */
+  z_stream zStream;
+  memset(&zStream, 0, sizeof(zStream));
+
+  /* Compression buffer */
+  AutoDeleteArray<Bytef> outBuf = new Bytef[CHUNK * 2];
+
+  Bytef *origData = static_cast<Bytef*>(origBuf);
+  size_t avail = 0;
+  while (origSize) {
+    avail = std::min(origSize, CHUNK);
+
+    /* Compress chunk */
+    ret = deflateInit(&zStream, 9);
+    MOZ_ASSERT(ret == Z_OK);
+    zStream.avail_in = avail;
+    zStream.next_in = origData;
+    zStream.avail_out = CHUNK * 2;
+    zStream.next_out = outBuf;
+    ret = deflate(&zStream, Z_FINISH);
+    MOZ_ASSERT(ret == Z_STREAM_END);
+    ret = deflateEnd(&zStream);
+    MOZ_ASSERT(ret == Z_OK);
+    MOZ_ASSERT(zStream.avail_out > 0);
+
+    /* Write chunk */
+    size_t len = write(outFd, outBuf, 2 * CHUNK - zStream.avail_out);
+    MOZ_ASSERT(len == 2 * CHUNK - zStream.avail_out);
+
+    /* Adjust headers */
+    header->totalSize += len;
+    *entry++ = offset;
+    header->nChunks++;
+
+    /* Prepare for next iteration */
+    origSize -= avail;
+    origData += avail;
+    offset += len;
+  }
+  header->lastChunkSize = avail;
+
+  MOZ_ASSERT(header->nChunks == nChunks);
+  log("Compressed size is %lu", offset);
+
+  return 0;
+}