Bug 683127 part 1 - Simple Zip reader for the new ELF Linker. r=tglek,r=mwu
authorMike Hommey <mh+mozilla@glandium.org>
Wed, 11 Jan 2012 11:10:55 +0100
changeset 84256 12f6fad6692480cf9273698fb4029672834d250c
parent 84255 2a6cac678c94d9fe78c11b837b270e3304fc2df7
child 84257 85c7cdc1a91601eaf43562b736bd65b768d64d14
push id21832
push userbmo@edmorley.co.uk
push dateWed, 11 Jan 2012 17:04:15 +0000
treeherdermozilla-central@40c9f9ff9fd5 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstglek, mwu
bugs683127
milestone12.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 683127 part 1 - Simple Zip reader for the new ELF Linker. r=tglek,r=mwu
mozglue/linker/Logging.h
mozglue/linker/Makefile.in
mozglue/linker/Utils.h
mozglue/linker/Zip.cpp
mozglue/linker/Zip.h
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Logging.h
@@ -0,0 +1,22 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Logging_h
+#define Logging_h
+
+#ifdef ANDROID
+#include <android/log.h>
+#define log(...) __android_log_print(ANDROID_LOG_ERROR, "GeckoLinker", __VA_ARGS__)
+#else
+#include <cstdio>
+#define log(format, ...) fprintf(stderr, format "\n", ##__VA_ARGS__)
+#endif
+
+#ifdef MOZ_DEBUG_LINKER
+#define debug log
+#else
+#define debug(...)
+#endif
+
+#endif /* Logging_h */
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Makefile.in
@@ -0,0 +1,21 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DEPTH		= ../..
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= mozglue
+LIBRARY_NAME	= linker
+FORCE_STATIC_LIB= 1
+STL_FLAGS =
+
+CPPSRCS = \
+  Zip.cpp \
+  $(NULL)
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Utils.h
@@ -0,0 +1,93 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Utils_h
+#define Utils_h
+
+#include <stdint.h>
+
+/**
+ * On architectures that are little endian and that support unaligned reads,
+ * we can use direct type, but on others, we want to have a special class
+ * to handle conversion and alignment issues.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+typedef uint16_t le_uint16;
+typedef uint32_t le_uint32;
+#else
+
+/**
+ * Template that allows to find an unsigned int type from a (computed) bit size
+ */
+template <int s> struct UInt { };
+template <> struct UInt<16> { typedef uint16_t Type; };
+template <> struct UInt<32> { typedef uint32_t Type; };
+
+/**
+ * Template to read 2 n-bit sized words as a 2*n-bit sized word, doing
+ * conversion from little endian and avoiding alignment issues.
+ */
+template <typename T>
+class le_to_cpu
+{
+public:
+  operator typename UInt<16 * sizeof(T)>::Type() const
+  {
+    return (b << (sizeof(T) * 8)) | a;
+  }
+private:
+  T a, b;
+};
+
+/**
+ * Type definitions
+ */
+typedef le_to_cpu<unsigned char> le_uint16;
+typedef le_to_cpu<le_uint16> le_uint32;
+#endif
+
+/**
+ * AutoCloseFD is a RAII wrapper for POSIX file descriptors
+ */
+class AutoCloseFD
+{
+public:
+  AutoCloseFD(): fd(-1) { }
+  AutoCloseFD(int fd): fd(fd) { }
+  ~AutoCloseFD()
+  {
+    if (fd != -1)
+      close(fd);
+  }
+
+  operator int() const
+  {
+    return fd;
+  }
+
+  int forget()
+  {
+    int _fd = fd;
+    fd = -1;
+    return _fd;
+  }
+
+  bool operator ==(int other) const
+  {
+    return fd == other;
+  }
+
+  int operator =(int other)
+  {
+    if (fd != -1)
+      close(fd);
+    fd = other;
+    return fd;
+  }
+
+private:
+  int fd;
+};
+
+#endif /* Utils_h */
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Zip.cpp
@@ -0,0 +1,180 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <cstdlib>
+#include <algorithm>
+#include "Logging.h"
+#include "Zip.h"
+
+Zip::Zip(const char *filename, ZipCollection *collection)
+: name(strdup(filename))
+, mapped(MAP_FAILED)
+, nextDir(NULL)
+, entries(NULL)
+, parent(collection)
+{
+  /* Open and map the file in memory */
+  AutoCloseFD fd(open(name, O_RDONLY));
+  if (fd == -1) {
+    log("Error opening %s: %s", filename, strerror(errno));
+    return;
+  }
+  struct stat st;
+  if (fstat(fd, &st) == -1) {
+    log("Error stating %s: %s", filename, strerror(errno));
+    return;
+  }
+  size = st.st_size;
+  if (size <= sizeof(CentralDirectoryEnd)) {
+    log("Error reading %s: too short", filename);
+    return;
+  }
+  mapped = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
+  if (mapped == MAP_FAILED) {
+    log("Error mmapping %s: %s", filename, strerror(errno));
+    return;
+  }
+  debug("Mapped %s @%p", filename, mapped);
+
+  /* Store the first Local File entry */
+  nextFile = LocalFile::validate(mapped);
+}
+
+Zip::~Zip()
+{
+  if (parent)
+    parent->Forget(this);
+  if (mapped != MAP_FAILED) {
+    munmap(mapped, size);
+    debug("Unmapped %s @%p", name, mapped);
+  }
+  free(name);
+}
+
+bool
+Zip::GetStream(const char *path, Zip::Stream *out) const
+{
+  debug("%s - GetFile %s", name, path);
+  /* Fast path: if the Local File header on store matches, we can return the
+   * corresponding stream right away.
+   * However, the Local File header may not contain enough information, in
+   * which case the 3rd bit on the generalFlag is set. Unfortunately, this
+   * bit is also set in some archives even when we do have the data (most
+   * notably the android packages as built by the Mozilla build system).
+   * So instead of testing the generalFlag bit, only use the fast path when
+   * we haven't read the central directory entries yet, and when the
+   * compressed size as defined in the header is not filled (which is a
+   * normal condition for the bit to be set). */
+  if (nextFile && nextFile->GetName().Equals(path) &&
+      !entries && (nextFile->compressedSize != 0)) {
+    debug("%s - %s was next file: fast path", name, path);
+    /* Fill Stream info from Local File header content */
+    const char *data = reinterpret_cast<const char *>(nextFile->GetData());
+    out->compressedBuf = data;
+    out->compressedSize = nextFile->compressedSize;
+    out->uncompressedSize = nextFile->uncompressedSize;
+    out->type = static_cast<Stream::Type>(uint16_t(nextFile->compression));
+
+    /* Find the next Local File header. It is usually simply following the
+     * compressed stream, but in cases where the 3rd bit of the generalFlag
+     * is set, there is a Data Descriptor header before. */
+    data += nextFile->compressedSize;
+    if ((nextFile->generalFlag & 0x8) && DataDescriptor::validate(data)) {
+      data += sizeof(DataDescriptor);
+    }
+    nextFile = LocalFile::validate(data);
+    return true;
+  }
+
+  /* If the directory entry we have in store doesn't match, scan the Central
+   * Directory for the entry corresponding to the given path */
+  if (!nextDir || !nextDir->GetName().Equals(path)) {
+    const DirectoryEntry *entry = GetFirstEntry();
+    debug("%s - Scan directory entries in search for %s", name, path);
+    while (entry && !entry->GetName().Equals(path)) {
+      entry = entry->GetNext();
+    }
+    nextDir = entry;
+  }
+  if (!nextDir) {
+    debug("%s - Couldn't find %s", name, path);
+    return false;
+  }
+
+  /* Find the Local File header corresponding to the Directory entry that
+   * was found. */
+  nextFile = LocalFile::validate(static_cast<const char *>(mapped)
+                             + nextDir->offset);
+  if (!nextFile) {
+    log("%s - Couldn't find the Local File header for %s", name, path);
+    return false;
+  }
+
+  /* Fill Stream info from Directory entry content */
+  const char *data = reinterpret_cast<const char *>(nextFile->GetData());
+  out->compressedBuf = data;
+  out->compressedSize = nextDir->compressedSize;
+  out->uncompressedSize = nextDir->uncompressedSize;
+  out->type = static_cast<Stream::Type>(uint16_t(nextDir->compression));
+
+  /* Store the next directory entry */
+  nextDir = nextDir->GetNext();
+  nextFile = NULL;
+  return true;
+}
+
+const Zip::DirectoryEntry *
+Zip::GetFirstEntry() const
+{
+  if (entries || mapped == MAP_FAILED)
+    return entries; // entries is NULL in the second case above
+
+  const CentralDirectoryEnd *end = NULL;
+  const char *_end = static_cast<const char *>(mapped) + size
+                     - sizeof(CentralDirectoryEnd);
+
+  /* Scan for the Central Directory End */
+  for (; _end > mapped && !end; _end--)
+    end = CentralDirectoryEnd::validate(_end);
+  if (!end) {
+    log("%s - Couldn't find end of central directory record", name);
+    return NULL;
+  }
+
+  entries = DirectoryEntry::validate(static_cast<const char *>(mapped)
+                                 + end->offset);
+  if (!entries) {
+    log("%s - Couldn't find central directory record", name);
+  }
+  return entries;
+}
+
+mozilla::TemporaryRef<Zip>
+ZipCollection::GetZip(const char *path)
+{
+  /* Search the list of Zips we already have for a match */
+  for (std::vector<Zip *>::iterator it = zips.begin(); it < zips.end(); ++it) {
+    if (strcmp((*it)->GetName(), path) == 0)
+      return *it;
+  }
+  Zip *zip = new Zip(path, this);
+  zips.push_back(zip);
+  return zip;
+}
+
+void
+ZipCollection::Forget(Zip *zip)
+{
+  debug("ZipCollection::Forget(\"%s\")", zip->GetName());
+  std::vector<Zip *>::iterator it = std::find(zips.begin(), zips.end(), zip);
+  if (*it == zip)
+    zips.erase(it);
+  else
+    debug("ZipCollection::Forget: didn't find \"%s\" in bookkeeping", zip->GetName());
+}
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Zip.h
@@ -0,0 +1,311 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Zip_h
+#define Zip_h
+
+#include <cstring>
+#include <stdint.h>
+#include <vector>
+#include "Utils.h"
+#include "mozilla/RefPtr.h"
+
+/**
+ * Forward declaration
+ */
+class ZipCollection;
+
+/**
+ * Class to handle access to Zip archive streams. The Zip archive is mapped
+ * in memory, and streams are direct references to that mapped memory.
+ * Zip files are assumed to be correctly formed. No boundary checks are
+ * performed, which means hand-crafted malicious Zip archives can make the
+ * code fail in bad ways. However, since the only intended use is to load
+ * libraries from Zip archives, there is no interest in making this code
+ * safe, since the libraries could contain malicious code anyways.
+ */
+class Zip: public mozilla::RefCounted<Zip>
+{
+public:
+  /**
+   * Create a Zip instance for the given file name. In case of error, the
+   * Zip instance is still created but methods will error out.
+   */
+  Zip(const char *filename, ZipCollection *collection = NULL);
+
+  /**
+   * Destructor
+   */
+  ~Zip();
+
+  /**
+   * Class used to access Zip archive item streams
+   */
+  class Stream
+  {
+  public:
+    /**
+     * Stream types
+     */
+    enum Type {
+      STORE = 0,
+      DEFLATE = 8
+    };
+
+    /**
+     * Constructor
+     */
+    Stream(): compressedBuf(NULL), compressedSize(0), uncompressedSize(0)
+            , type(STORE) { }
+
+    const void *GetBuffer() { return compressedBuf; }
+    size_t GetSize() { return compressedSize; }
+    size_t GetUncompressedSize() { return uncompressedSize; }
+    Type GetType() { return type; }
+
+  protected:
+    friend class Zip;
+    const void *compressedBuf;
+    size_t compressedSize;
+    size_t uncompressedSize;
+    Type type;
+  };
+
+  /**
+   * Returns a stream from the Zip archive.
+   */
+  bool GetStream(const char *path, Stream *out) const;
+
+  /**
+   * Returns the file name of the archive
+   */
+  const char *GetName() const
+  {
+    return name;
+  }
+
+private:
+  /* File name of the archive */
+  char *name;
+  /* Address where the Zip archive is mapped */
+  void *mapped;
+  /* Size of the archive */
+  size_t size;
+
+  /**
+   * Strings (file names, comments, etc.) in the Zip headers are NOT zero
+   * terminated. This class is a helper around them.
+   */
+  class StringBuf
+  {
+  public:
+    /**
+     * Constructor
+     */
+    StringBuf(const char *buf, size_t length): buf(buf), length(length) { }
+
+    /**
+     * Returns whether the string has the same content as the given zero
+     * terminated string.
+     */
+    bool Equals(const char *str) const
+    {
+      return strncmp(str, buf, length) == 0;
+    }
+
+  private:
+    const char *buf;
+    size_t length;
+  };
+
+/* All the following types need to be packed */
+#pragma pack(1)
+  /**
+   * A Zip archive is an aggregate of entities which all start with a
+   * signature giving their type. This template is to be used as a base
+   * class for these entities.
+   */
+  template <typename T>
+  class SignedEntity
+  {
+  public:
+    /**
+     * Equivalent to reinterpret_cast<const T *>(buf), with an additional
+     * check of the signature.
+     */
+    static const T *validate(const void *buf)
+    {
+      const T *ret = static_cast<const T *>(buf);
+      if (ret->signature == T::magic)
+        return ret;
+      return NULL;
+    }
+  private:
+    le_uint32 signature;
+  };
+
+  /**
+   * Header used to describe a Local File entry. The header is followed by
+   * the file name and an extra field, then by the data stream.
+   */
+  struct LocalFile: public SignedEntity<LocalFile>
+  {
+    /* Signature for a Local File header */
+    static const uint32_t magic = 0x04034b50;
+
+    /**
+     * Returns the file name
+     */
+    StringBuf GetName() const
+    {
+      return StringBuf(reinterpret_cast<const char *>(this) + sizeof(*this),
+                       filenameSize);
+    }
+
+    /**
+     * Returns a pointer to the data associated with this header
+     */
+    const void *GetData() const
+    {
+      return reinterpret_cast<const char *>(this) + sizeof(*this)
+             + filenameSize + extraFieldSize;
+    }
+    
+    le_uint16 minVersion;
+    le_uint16 generalFlag;
+    le_uint16 compression;
+    le_uint16 lastModifiedTime;
+    le_uint16 lastModifiedDate;
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+    le_uint16 filenameSize;
+    le_uint16 extraFieldSize;
+  };
+
+  /**
+   * In some cases, when a zip archive is created, compressed size and CRC
+   * are not known when writing the Local File header. In these cases, the
+   * 3rd bit of the general flag in the Local File header is set, and there
+   * is an additional header following the compressed data.
+   */
+  struct DataDescriptor: public SignedEntity<DataDescriptor>
+  {
+    /* Signature for a Data Descriptor header */
+    static const uint32_t magic = 0x08074b50;
+
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+  };
+
+  /**
+   * Header used to describe a Central Directory Entry. The header is
+   * followed by the file name, an extra field, and a comment.
+   */
+  struct DirectoryEntry: public SignedEntity<DirectoryEntry>
+  {
+    /* Signature for a Central Directory Entry header */
+    static const uint32_t magic = 0x02014b50;
+
+    /**
+     * Returns the file name
+     */
+    StringBuf GetName() const
+    {
+      return StringBuf(reinterpret_cast<const char *>(this) + sizeof(*this),
+                       filenameSize);
+    }
+
+    /**
+     * Returns  the Central Directory Entry following this one.
+     */
+    const DirectoryEntry *GetNext() const
+    {
+      return validate(reinterpret_cast<const char *>(this) + sizeof(*this)
+                      + filenameSize + extraFieldSize + fileCommentSize);
+    }
+
+    le_uint16 creatorVersion;
+    le_uint16 minVersion;
+    le_uint16 generalFlag;
+    le_uint16 compression;
+    le_uint16 lastModifiedTime;
+    le_uint16 lastModifiedDate;
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+    le_uint16 filenameSize;
+    le_uint16 extraFieldSize;
+    le_uint16 fileCommentSize;
+    le_uint16 diskNum;
+    le_uint16 internalAttributes;
+    le_uint32 externalAttributes;
+    le_uint32 offset;
+  };
+
+  /**
+   * Header used to describe the End of Central Directory Record.
+   */
+  struct CentralDirectoryEnd: public SignedEntity<CentralDirectoryEnd>
+  {
+    /* Signature for the End of Central Directory Record */
+    static const uint32_t magic = 0x06054b50;
+
+    le_uint16 diskNum;
+    le_uint16 startDisk;
+    le_uint16 recordsOnDisk;
+    le_uint16 records;
+    le_uint32 size;
+    le_uint32 offset;
+    le_uint16 commentSize;
+  };
+#pragma pack()
+
+  /**
+   * Returns the first Directory entry
+   */
+  const DirectoryEntry *GetFirstEntry() const;
+
+  /* Pointer to the Local File Entry following the last one GetStream() used.
+   * This is used by GetStream to avoid scanning the Directory Entries when the
+   * requested entry is that one. */
+  mutable const LocalFile *nextFile;
+
+  /* Likewise for the next Directory entry */
+  mutable const DirectoryEntry *nextDir;
+
+  /* Pointer to the Directory entries */
+  mutable const DirectoryEntry *entries;
+
+  /* ZipCollection containing this Zip */
+  mutable ZipCollection *parent;
+};
+
+/**
+ * Class for bookkeeping Zip instances
+ */
+class ZipCollection
+{
+public:
+  /**
+   * Get a Zip instance for the given path. If there is an existing one
+   * already, return that one, otherwise create a new one.
+   */
+  mozilla::TemporaryRef<Zip> GetZip(const char *path);
+
+protected:
+  /**
+   * Forget about the given Zip instance. This method is meant to be called
+   * by the Zip destructor.
+   */
+  friend Zip::~Zip();
+  void Forget(Zip *zip);
+
+private:
+  /* Zip instances bookkept in this collection */
+  std::vector<Zip *> zips;
+};
+
+#endif /* Zip_h */