mozglue/linker/szip.cpp
author Jim Blandy <jimb@mozilla.com>
Tue, 24 Jun 2014 22:12:07 -0700
changeset 199513 69d61e42d5dfbf4588b72449249ff3e7f2125304
parent 197506 79eab42e54839fef24c3e1604466e64a1c082277
child 201849 ba853c3d60404339b34dfb3d4671fd61c2be5dbe
permissions -rw-r--r--
Bug 914753: Make Emacs file variable header lines correct, or at least consistent. DONTBUILD r=ehsan The -*- file variable lines -*- establish per-file settings that Emacs will pick up. This patch makes the following changes to those lines (and touches nothing else): - Never set the buffer's mode. Years ago, Emacs did not have a good JavaScript mode, so it made sense to use Java or C++ mode in .js files. However, Emacs has had js-mode for years now; it's perfectly serviceable, and is available and enabled by default in all major Emacs packagings. Selecting a mode in the -*- file variable line -*- is almost always the wrong thing to do anyway. It overrides Emacs's default choice, which is (now) reasonable; and even worse, it overrides settings the user might have made in their '.emacs' file for that file extension. It's only useful when there's something specific about that particular file that makes a particular mode appropriate. - Correctly propagate settings that establish the correct indentation level for this file: c-basic-offset and js2-basic-offset should be js-indent-level. Whatever value they're given should be preserved; different parts of our tree use different indentation styles. - We don't use tabs in Mozilla JS code. Always set indent-tabs-mode: nil. Remove tab-width: settings, at least in files that don't contain tab characters. - Remove js2-mode settings that belong in the user's .emacs file, like js2-skip-preprocessor-directives.

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include <algorithm>
#include <map>
#include <sys/stat.h>
#include <string>
#include <sstream>
#include <cstring>
#include <cstdlib>
#include <zlib.h>
#include <fcntl.h>
#include <errno.h>
#include "mozilla/Assertions.h"
#include "mozilla/Scoped.h"
#include "SeekableZStream.h"
#include "Utils.h"
#include "Logging.h"

Logging Logging::Singleton;

const char *filterName[] = {
  "none",
  "thumb",
  "arm",
  "x86",
  "auto"
};

/* Maximum supported size for chunkSize */
static const size_t maxChunkSize =
  1 << (8 * std::min(sizeof(((SeekableZStreamHeader *)nullptr)->chunkSize),
                     sizeof(((SeekableZStreamHeader *)nullptr)->lastChunkSize)) - 1);

class Buffer: public MappedPtr
{
public:
  virtual ~Buffer() { }

  virtual bool Resize(size_t size)
  {
    MemoryRange buf = mmap(nullptr, size, PROT_READ | PROT_WRITE,
                           MAP_PRIVATE | MAP_ANON, -1, 0);
    if (buf == MAP_FAILED)
      return false;
    if (*this != MAP_FAILED)
      memcpy(buf, *this, std::min(size, GetLength()));
    Assign(buf);
    return true;
  }

  bool Fill(Buffer &other)
  {
    size_t size = other.GetLength();
    if (!size || !Resize(size))
      return false;
    memcpy(static_cast<void *>(*this), static_cast<void *>(other), size);
    return true;
  }
};

class FileBuffer: public Buffer
{
public:
  bool Init(const char *name, bool writable_ = false)
  {
    fd = open(name, writable_ ? O_RDWR | O_CREAT | O_TRUNC : O_RDONLY, 0666);
    if (fd == -1)
      return false;
    writable = writable_;
    return true;
  }

  virtual bool Resize(size_t size)
  {
    if (writable) {
      if (ftruncate(fd, size) == -1)
        return false;
    }
    Assign(MemoryRange::mmap(nullptr, size,
                             PROT_READ | (writable ? PROT_WRITE : 0),
                             writable ? MAP_SHARED : MAP_PRIVATE, fd, 0));
    return this != MAP_FAILED;
  }

  int getFd()
  {
    return fd;
  }

private:
  AutoCloseFD fd;
  bool writable;
};

class FilteredBuffer: public Buffer
{
public:
  void Filter(Buffer &other, SeekableZStream::FilterId filter, size_t chunkSize)
  {
    SeekableZStream::ZStreamFilter filterCB =
      SeekableZStream::GetFilter(filter);
    MOZ_ASSERT(filterCB);
    Fill(other);
    size_t size = other.GetLength();
    Bytef *data = reinterpret_cast<Bytef *>(static_cast<void *>(*this));
    size_t avail = 0;
    /* Filter needs to be applied in chunks. */
    while (size) {
      avail = std::min(size, chunkSize);
      filterCB(data - static_cast<unsigned char *>(static_cast<void *>(*this)),
               SeekableZStream::FILTER, data, avail);
      size -= avail;
      data += avail;
    }
  }
};

template <typename T>
class Dictionary: public Buffer
{
  typedef T piece;
  typedef std::pair<piece, int> stat_pair;

  static bool stat_cmp(stat_pair a, stat_pair b)
  {
    return a.second < b.second;
  }

public:
  Dictionary(Buffer &inBuf, size_t size)
  {
    if (!size || !Resize(size))
      return;
    DEBUG_LOG("Creating dictionary");
    piece *origBufPieces = reinterpret_cast<piece *>(
                           static_cast<void *>(inBuf));
    std::map<piece, int> stats;
    for (unsigned int i = 0; i < inBuf.GetLength() / sizeof(piece); i++) {
      stats[origBufPieces[i]]++;
    }
    std::vector<stat_pair> statsVec(stats.begin(), stats.end());
    std::sort(statsVec.begin(), statsVec.end(), stat_cmp);

    piece *dictPieces = reinterpret_cast<piece *>(
                        static_cast<void *>(*this));
    typename std::vector<stat_pair>::reverse_iterator it = statsVec.rbegin();
    for (int i = size / sizeof(piece); i > 0 && it < statsVec.rend();
         i--, ++it) {
      dictPieces[i - 1] = it->first;
    }
  }
};

class SzipAction
{
public:
  virtual int run(const char *name, Buffer &origBuf,
                  const char *outName, Buffer &outBuf) = 0;

  virtual ~SzipAction() {}
};

class SzipDecompress: public SzipAction
{
public:
  int run(const char *name, Buffer &origBuf,
          const char *outName, Buffer &outBuf);
};


class SzipCompress: public SzipAction
{
public:
  int run(const char *name, Buffer &origBuf,
          const char *outName, Buffer &outBuf);

  SzipCompress(size_t aChunkSize, SeekableZStream::FilterId aFilter,
               size_t aDictSize)
  : chunkSize(aChunkSize ? aChunkSize : 16384)
  , filter(aFilter)
  , dictSize(aDictSize)
  {}

  const static signed char winSizeLog = 15;
  const static size_t winSize = 1 << winSizeLog;

  const static SeekableZStream::FilterId DEFAULT_FILTER =
#if defined(TARGET_THUMB)
    SeekableZStream::BCJ_THUMB;
#elif defined(TARGET_ARM)
    SeekableZStream::BCJ_ARM;
#elif defined(TARGET_X86)
    SeekableZStream::BCJ_X86;
#else
    SeekableZStream::NONE;
#endif

private:

  int do_compress(Buffer &origBuf, Buffer &outBuf, const unsigned char *aDict,
                  size_t aDictSize, SeekableZStream::FilterId aFilter);

  size_t chunkSize;
  SeekableZStream::FilterId filter;
  size_t dictSize;
};

/* Decompress a seekable compressed stream */
int SzipDecompress::run(const char *name, Buffer &origBuf,
                        const char *outName, Buffer &outBuf)
{
  size_t origSize = origBuf.GetLength();
  if (origSize < sizeof(SeekableZStreamHeader)) {
    ERROR("%s is not compressed", name);
    return 0;
  }

  SeekableZStream zstream;
  if (!zstream.Init(origBuf, origSize))
    return 0;

  size_t size = zstream.GetUncompressedSize();

  /* Give enough room for the uncompressed data */
  if (!outBuf.Resize(size)) {
    ERROR("Error resizing %s: %s", outName, strerror(errno));
    return 1;
  }

  if (!zstream.Decompress(outBuf, 0, size))
    return 1;

  return 0;
}

/* Generate a seekable compressed stream. */
int SzipCompress::run(const char *name, Buffer &origBuf,
                      const char *outName, Buffer &outBuf)
{
  size_t origSize = origBuf.GetLength();
  if (origSize == 0) {
    ERROR("Won't compress %s: it's empty", name);
    return 1;
  }
  if (SeekableZStreamHeader::validate(origBuf)) {
    WARN("Skipping %s: it's already a szip", name);
    return 0;
  }
  bool compressed = false;
  LOG("Size = %" PRIuSize, origSize);

  /* Allocate a buffer the size of the uncompressed data: we don't want
   * a compressed file larger than that anyways. */
  if (!outBuf.Resize(origSize)) {
    ERROR("Couldn't allocate output buffer: %s", strerror(errno));
    return 1;
  }

  /* Find the most appropriate filter */
  SeekableZStream::FilterId firstFilter, lastFilter;
  bool scanFilters;
  if (filter == SeekableZStream::FILTER_MAX) {
    firstFilter = SeekableZStream::NONE;
    lastFilter = SeekableZStream::FILTER_MAX;
    scanFilters = true;
  } else {
    firstFilter = lastFilter = filter;
    ++lastFilter;
    scanFilters = false;
  }

  mozilla::ScopedDeletePtr<Buffer> filteredBuf;
  Buffer *origData;
  for (SeekableZStream::FilterId f = firstFilter; f < lastFilter; ++f) {
    FilteredBuffer *filteredTmp = nullptr;
    Buffer tmpBuf;
    if (f != SeekableZStream::NONE) {
      DEBUG_LOG("Applying filter \"%s\"", filterName[f]);
      filteredTmp = new FilteredBuffer();
      filteredTmp->Filter(origBuf, f, chunkSize);
      origData = filteredTmp;
    } else {
      origData = &origBuf;
    }
    if (dictSize  && !scanFilters) {
      filteredBuf = filteredTmp;
      break;
    }
    DEBUG_LOG("Compressing with no dictionary");
    if (do_compress(*origData, tmpBuf, nullptr, 0, f) == 0) {
      if (tmpBuf.GetLength() < outBuf.GetLength()) {
        outBuf.Fill(tmpBuf);
        compressed = true;
        filter = f;
        filteredBuf = filteredTmp;
        continue;
      }
    }
    delete filteredTmp;
  }

  origData = filteredBuf ? filteredBuf : &origBuf;

  if (dictSize) {
    Dictionary<uint64_t> dict(*origData, dictSize ? SzipCompress::winSize : 0);

    /* Find the most appropriate dictionary size */
    size_t firstDictSize, lastDictSize;
    if (dictSize == (size_t) -1) {
      /* If we scanned for filters, we effectively already tried dictSize=0 */
      firstDictSize = scanFilters ? 4096 : 0;
      lastDictSize = SzipCompress::winSize;
    } else {
      firstDictSize = lastDictSize = dictSize;
    }

    Buffer tmpBuf;
    for (size_t d = firstDictSize; d <= lastDictSize; d += 4096) {
      DEBUG_LOG("Compressing with dictionary of size %" PRIuSize, d);
      if (do_compress(*origData, tmpBuf, static_cast<unsigned char *>(dict)
                      + SzipCompress::winSize - d, d, filter))
        continue;
      if (!compressed || tmpBuf.GetLength() < outBuf.GetLength()) {
        outBuf.Fill(tmpBuf);
        compressed = true;
        dictSize = d;
      }
    }
  }

  if (!compressed) {
    outBuf.Fill(origBuf);
    LOG("Not compressed");
    return 0;
  }

  if (dictSize == (size_t) -1)
    dictSize = 0;

  DEBUG_LOG("Used filter \"%s\" and dictionary size of %" PRIuSize,
            filterName[filter], dictSize);
  LOG("Compressed size is %" PRIuSize, outBuf.GetLength());

  /* Sanity check */
  Buffer tmpBuf;
  SzipDecompress decompress;
  if (decompress.run("buffer", outBuf, "buffer", tmpBuf))
    return 1;

  size_t size = tmpBuf.GetLength();
  if (size != origSize) {
    ERROR("Compression error: %" PRIuSize " != %" PRIuSize, size, origSize);
    return 1;
  }
  if (memcmp(static_cast<void *>(origBuf), static_cast<void *>(tmpBuf), size)) {
    ERROR("Compression error: content mismatch");
    return 1;
  }
  return 0;
}

int SzipCompress::do_compress(Buffer &origBuf, Buffer &outBuf,
                              const unsigned char *aDict, size_t aDictSize,
                              SeekableZStream::FilterId aFilter)
{
  size_t origSize = origBuf.GetLength();
  MOZ_ASSERT(origSize != 0);

  /* Expected total number of chunks */
  size_t nChunks = ((origSize + chunkSize - 1) / chunkSize);

  /* The first chunk is going to be stored after the header, the dictionary
   * and the offset table */
  size_t offset = sizeof(SeekableZStreamHeader) + aDictSize
                  + nChunks * sizeof(uint32_t);

  if (offset >= origSize)
    return 1;

    /* Allocate a buffer the size of the uncompressed data: we don't want
   * a compressed file larger than that anyways. */
  if (!outBuf.Resize(origSize)) {
    ERROR("Couldn't allocate output buffer: %s", strerror(errno));
    return 1;
  }

  SeekableZStreamHeader *header = new (outBuf) SeekableZStreamHeader;
  unsigned char *dictionary = static_cast<unsigned char *>(
                              outBuf + sizeof(SeekableZStreamHeader));
  le_uint32 *entry =
    reinterpret_cast<le_uint32 *>(dictionary + aDictSize);

  /* Initialize header */
  header->chunkSize = chunkSize;
  header->dictSize = aDictSize;
  header->totalSize = offset;
  header->windowBits = -SzipCompress::winSizeLog; // Raw stream,
                                                  // window size of 32k.
  header->filter = aFilter;
  if (aDictSize)
    memcpy(dictionary, aDict, aDictSize);

  /* Initialize zlib structure */
  z_stream zStream;
  memset(&zStream, 0, sizeof(zStream));
  zStream.avail_out = origSize - offset;
  zStream.next_out = static_cast<Bytef*>(outBuf) + offset;

  size_t avail = 0;
  size_t size = origSize;
  unsigned char *data = reinterpret_cast<unsigned char *>(
                        static_cast<void *>(origBuf));
  while (size) {
    avail = std::min(size, chunkSize);

    /* Compress chunk */
    int ret = deflateInit2(&zStream, 9, Z_DEFLATED, header->windowBits,
                           MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
    if (aDictSize)
      deflateSetDictionary(&zStream, dictionary, aDictSize);
    MOZ_ASSERT(ret == Z_OK);
    zStream.avail_in = avail;
    zStream.next_in = data;
    ret = deflate(&zStream, Z_FINISH);
    MOZ_ASSERT(ret == Z_STREAM_END);
    ret = deflateEnd(&zStream);
    MOZ_ASSERT(ret == Z_OK);
    if (zStream.avail_out <= 0)
      return 1;

    size_t len = origSize - offset - zStream.avail_out;

    /* Adjust headers */
    header->totalSize += len;
    *entry++ = offset;
    header->nChunks++;

    /* Prepare for next iteration */
    size -= avail;
    data += avail;
    offset += len;
  }
  header->lastChunkSize = avail;
  MOZ_ASSERT(header->totalSize == offset);
  MOZ_ASSERT(header->nChunks == nChunks);

  if (!outBuf.Resize(offset)) {
    ERROR("Error truncating output: %s", strerror(errno));
    return 1;
  }

  return 0;

}

bool GetSize(const char *str, size_t *out)
{
  char *end;
  MOZ_ASSERT(out);
  errno = 0;
  *out = strtol(str, &end, 10);
  return (!errno && !*end);
}

int main(int argc, char* argv[])
{
  mozilla::ScopedDeletePtr<SzipAction> action;
  char **firstArg;
  bool compress = true;
  size_t chunkSize = 0;
  SeekableZStream::FilterId filter = SzipCompress::DEFAULT_FILTER;
  size_t dictSize = (size_t) 0;

  Logging::Init();

  for (firstArg = &argv[1]; argc > 2; argc--, firstArg++) {
    if (!firstArg[0] || firstArg[0][0] != '-')
      break;
    if (strcmp(firstArg[0], "-d") == 0) {
      compress = false;
    } else if (strcmp(firstArg[0], "-c") == 0) {
      firstArg++;
      argc--;
      if (!firstArg[0])
        break;
      if (!GetSize(firstArg[0], &chunkSize) || !chunkSize ||
          (chunkSize % 4096) || (chunkSize > maxChunkSize)) {
        ERROR("Invalid chunk size");
        return 1;
      }
    } else if (strcmp(firstArg[0], "-f") == 0) {
      firstArg++;
      argc--;
      if (!firstArg[0])
        break;
      bool matched = false;
      for (unsigned int i = 0; i < sizeof(filterName) / sizeof(char *); ++i) {
        if (strcmp(firstArg[0], filterName[i]) == 0) {
          filter = static_cast<SeekableZStream::FilterId>(i);
          matched = true;
          break;
        }
      }
      if (!matched) {
        ERROR("Invalid filter");
        return 1;
      }
    } else if (strcmp(firstArg[0], "-D") == 0) {
      firstArg++;
      argc--;
      if (!firstArg[0])
        break;
      if (strcmp(firstArg[0], "auto") == 0) {
        dictSize = -1;
      } else if (!GetSize(firstArg[0], &dictSize) || (dictSize >= 1 << 16)) {
        ERROR("Invalid dictionary size");
        return 1;
      }
    }
  }

  if (argc != 2 || !firstArg[0]) {
    LOG("usage: %s [-d] [-c CHUNKSIZE] [-f FILTER] [-D DICTSIZE] file",
        argv[0]);
    return 1;
  }

  if (compress) {
    action = new SzipCompress(chunkSize, filter, dictSize);
  } else {
    if (chunkSize) {
      ERROR("-c is incompatible with -d");
      return 1;
    }
    if (dictSize) {
      ERROR("-D is incompatible with -d");
      return 1;
    }
    action = new SzipDecompress();
  }

  std::stringstream tmpOutStream;
  tmpOutStream << firstArg[0] << ".sz." << getpid();
  std::string tmpOut(tmpOutStream.str());
  int ret;
  struct stat st;
  {
    FileBuffer origBuf;
    if (!origBuf.Init(firstArg[0])) {
      ERROR("Couldn't open %s: %s", firstArg[0], strerror(errno));
      return 1;
    }

    ret = fstat(origBuf.getFd(), &st);
    if (ret == -1) {
      ERROR("Couldn't stat %s: %s", firstArg[0], strerror(errno));
      return 1;
    }

    size_t origSize = st.st_size;

    /* Mmap the original file */
    if (!origBuf.Resize(origSize)) {
      ERROR("Couldn't mmap %s: %s", firstArg[0], strerror(errno));
      return 1;
    }

    /* Create the compressed file */
    FileBuffer outBuf;
    if (!outBuf.Init(tmpOut.c_str(), true)) {
      ERROR("Couldn't open %s: %s", tmpOut.c_str(), strerror(errno));
      return 1;
    }

    ret = action->run(firstArg[0], origBuf, tmpOut.c_str(), outBuf);
    if ((ret == 0) && (fstat(outBuf.getFd(), &st) == -1)) {
      st.st_size = 0;
    }
  }

  if ((ret == 0) && st.st_size) {
    rename(tmpOut.c_str(), firstArg[0]);
  } else {
    unlink(tmpOut.c_str());
  }
  return ret;
}