Bug 683127 part 1 - Simple Zip reader for the new ELF Linker. r=tglek,r=mwu
authorMike Hommey <mh+mozilla@glandium.org>
Wed, 11 Jan 2012 11:10:55 +0100
changeset 84226 12f6fad6692480cf9273698fb4029672834d250c
parent 84225 2a6cac678c94d9fe78c11b837b270e3304fc2df7
child 84227 85c7cdc1a91601eaf43562b736bd65b768d64d14
push id674
push userffxbld
push dateTue, 13 Mar 2012 21:17:50 +0000
treeherdermozilla-esr52@bbc7014db2de [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstglek, mwu
bugs683127
milestone12.0a1
Bug 683127 part 1 - Simple Zip reader for the new ELF Linker. r=tglek,r=mwu
mozglue/linker/Logging.h
mozglue/linker/Makefile.in
mozglue/linker/Utils.h
mozglue/linker/Zip.cpp
mozglue/linker/Zip.h
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Logging.h
@@ -0,0 +1,22 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Logging_h
+#define Logging_h
+
+#ifdef ANDROID
+#include <android/log.h>
+#define log(...) __android_log_print(ANDROID_LOG_ERROR, "GeckoLinker", __VA_ARGS__)
+#else
+#include <cstdio>
+#define log(format, ...) fprintf(stderr, format "\n", ##__VA_ARGS__)
+#endif
+
+#ifdef MOZ_DEBUG_LINKER
+#define debug log
+#else
+#define debug(...)
+#endif
+
+#endif /* Logging_h */
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Makefile.in
@@ -0,0 +1,21 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DEPTH		= ../..
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= mozglue
+LIBRARY_NAME	= linker
+FORCE_STATIC_LIB= 1
+STL_FLAGS =
+
+CPPSRCS = \
+  Zip.cpp \
+  $(NULL)
+
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Utils.h
@@ -0,0 +1,93 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Utils_h
+#define Utils_h
+
+#include <stdint.h>
+
+/**
+ * On architectures that are little endian and that support unaligned reads,
+ * we can use direct type, but on others, we want to have a special class
+ * to handle conversion and alignment issues.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+typedef uint16_t le_uint16;
+typedef uint32_t le_uint32;
+#else
+
+/**
+ * Template that allows to find an unsigned int type from a (computed) bit size
+ */
+template <int s> struct UInt { };
+template <> struct UInt<16> { typedef uint16_t Type; };
+template <> struct UInt<32> { typedef uint32_t Type; };
+
+/**
+ * Template to read 2 n-bit sized words as a 2*n-bit sized word, doing
+ * conversion from little endian and avoiding alignment issues.
+ */
+template <typename T>
+class le_to_cpu
+{
+public:
+  operator typename UInt<16 * sizeof(T)>::Type() const
+  {
+    return (b << (sizeof(T) * 8)) | a;
+  }
+private:
+  T a, b;
+};
+
+/**
+ * Type definitions
+ */
+typedef le_to_cpu<unsigned char> le_uint16;
+typedef le_to_cpu<le_uint16> le_uint32;
+#endif
+
+/**
+ * AutoCloseFD is a RAII wrapper for POSIX file descriptors
+ */
+class AutoCloseFD
+{
+public:
+  AutoCloseFD(): fd(-1) { }
+  AutoCloseFD(int fd): fd(fd) { }
+  ~AutoCloseFD()
+  {
+    if (fd != -1)
+      close(fd);
+  }
+
+  operator int() const
+  {
+    return fd;
+  }
+
+  int forget()
+  {
+    int _fd = fd;
+    fd = -1;
+    return _fd;
+  }
+
+  bool operator ==(int other) const
+  {
+    return fd == other;
+  }
+
+  int operator =(int other)
+  {
+    if (fd != -1)
+      close(fd);
+    fd = other;
+    return fd;
+  }
+
+private:
+  int fd;
+};
+
+#endif /* Utils_h */
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Zip.cpp
@@ -0,0 +1,180 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <cstdlib>
+#include <algorithm>
+#include "Logging.h"
+#include "Zip.h"
+
+Zip::Zip(const char *filename, ZipCollection *collection)
+: name(strdup(filename))
+, mapped(MAP_FAILED)
+, nextDir(NULL)
+, entries(NULL)
+, parent(collection)
+{
+  /* Open and map the file in memory */
+  AutoCloseFD fd(open(name, O_RDONLY));
+  if (fd == -1) {
+    log("Error opening %s: %s", filename, strerror(errno));
+    return;
+  }
+  struct stat st;
+  if (fstat(fd, &st) == -1) {
+    log("Error stating %s: %s", filename, strerror(errno));
+    return;
+  }
+  size = st.st_size;
+  if (size <= sizeof(CentralDirectoryEnd)) {
+    log("Error reading %s: too short", filename);
+    return;
+  }
+  mapped = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
+  if (mapped == MAP_FAILED) {
+    log("Error mmapping %s: %s", filename, strerror(errno));
+    return;
+  }
+  debug("Mapped %s @%p", filename, mapped);
+
+  /* Store the first Local File entry */
+  nextFile = LocalFile::validate(mapped);
+}
+
+Zip::~Zip()
+{
+  if (parent)
+    parent->Forget(this);
+  if (mapped != MAP_FAILED) {
+    munmap(mapped, size);
+    debug("Unmapped %s @%p", name, mapped);
+  }
+  free(name);
+}
+
+bool
+Zip::GetStream(const char *path, Zip::Stream *out) const
+{
+  debug("%s - GetFile %s", name, path);
+  /* Fast path: if the Local File header on store matches, we can return the
+   * corresponding stream right away.
+   * However, the Local File header may not contain enough information, in
+   * which case the 3rd bit on the generalFlag is set. Unfortunately, this
+   * bit is also set in some archives even when we do have the data (most
+   * notably the android packages as built by the Mozilla build system).
+   * So instead of testing the generalFlag bit, only use the fast path when
+   * we haven't read the central directory entries yet, and when the
+   * compressed size as defined in the header is not filled (which is a
+   * normal condition for the bit to be set). */
+  if (nextFile && nextFile->GetName().Equals(path) &&
+      !entries && (nextFile->compressedSize != 0)) {
+    debug("%s - %s was next file: fast path", name, path);
+    /* Fill Stream info from Local File header content */
+    const char *data = reinterpret_cast<const char *>(nextFile->GetData());
+    out->compressedBuf = data;
+    out->compressedSize = nextFile->compressedSize;
+    out->uncompressedSize = nextFile->uncompressedSize;
+    out->type = static_cast<Stream::Type>(uint16_t(nextFile->compression));
+
+    /* Find the next Local File header. It is usually simply following the
+     * compressed stream, but in cases where the 3rd bit of the generalFlag
+     * is set, there is a Data Descriptor header before. */
+    data += nextFile->compressedSize;
+    if ((nextFile->generalFlag & 0x8) && DataDescriptor::validate(data)) {
+      data += sizeof(DataDescriptor);
+    }
+    nextFile = LocalFile::validate(data);
+    return true;
+  }
+
+  /* If the directory entry we have in store doesn't match, scan the Central
+   * Directory for the entry corresponding to the given path */
+  if (!nextDir || !nextDir->GetName().Equals(path)) {
+    const DirectoryEntry *entry = GetFirstEntry();
+    debug("%s - Scan directory entries in search for %s", name, path);
+    while (entry && !entry->GetName().Equals(path)) {
+      entry = entry->GetNext();
+    }
+    nextDir = entry;
+  }
+  if (!nextDir) {
+    debug("%s - Couldn't find %s", name, path);
+    return false;
+  }
+
+  /* Find the Local File header corresponding to the Directory entry that
+   * was found. */
+  nextFile = LocalFile::validate(static_cast<const char *>(mapped)
+                             + nextDir->offset);
+  if (!nextFile) {
+    log("%s - Couldn't find the Local File header for %s", name, path);
+    return false;
+  }
+
+  /* Fill Stream info from Directory entry content */
+  const char *data = reinterpret_cast<const char *>(nextFile->GetData());
+  out->compressedBuf = data;
+  out->compressedSize = nextDir->compressedSize;
+  out->uncompressedSize = nextDir->uncompressedSize;
+  out->type = static_cast<Stream::Type>(uint16_t(nextDir->compression));
+
+  /* Store the next directory entry */
+  nextDir = nextDir->GetNext();
+  nextFile = NULL;
+  return true;
+}
+
+const Zip::DirectoryEntry *
+Zip::GetFirstEntry() const
+{
+  if (entries || mapped == MAP_FAILED)
+    return entries; // entries is NULL in the second case above
+
+  const CentralDirectoryEnd *end = NULL;
+  const char *_end = static_cast<const char *>(mapped) + size
+                     - sizeof(CentralDirectoryEnd);
+
+  /* Scan for the Central Directory End */
+  for (; _end > mapped && !end; _end--)
+    end = CentralDirectoryEnd::validate(_end);
+  if (!end) {
+    log("%s - Couldn't find end of central directory record", name);
+    return NULL;
+  }
+
+  entries = DirectoryEntry::validate(static_cast<const char *>(mapped)
+                                 + end->offset);
+  if (!entries) {
+    log("%s - Couldn't find central directory record", name);
+  }
+  return entries;
+}
+
+mozilla::TemporaryRef<Zip>
+ZipCollection::GetZip(const char *path)
+{
+  /* Search the list of Zips we already have for a match */
+  for (std::vector<Zip *>::iterator it = zips.begin(); it < zips.end(); ++it) {
+    if (strcmp((*it)->GetName(), path) == 0)
+      return *it;
+  }
+  Zip *zip = new Zip(path, this);
+  zips.push_back(zip);
+  return zip;
+}
+
+void
+ZipCollection::Forget(Zip *zip)
+{
+  debug("ZipCollection::Forget(\"%s\")", zip->GetName());
+  std::vector<Zip *>::iterator it = std::find(zips.begin(), zips.end(), zip);
+  if (*it == zip)
+    zips.erase(it);
+  else
+    debug("ZipCollection::Forget: didn't find \"%s\" in bookkeeping", zip->GetName());
+}
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/Zip.h
@@ -0,0 +1,311 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef Zip_h
+#define Zip_h
+
+#include <cstring>
+#include <stdint.h>
+#include <vector>
+#include "Utils.h"
+#include "mozilla/RefPtr.h"
+
+/**
+ * Forward declaration
+ */
+class ZipCollection;
+
+/**
+ * Class to handle access to Zip archive streams. The Zip archive is mapped
+ * in memory, and streams are direct references to that mapped memory.
+ * Zip files are assumed to be correctly formed. No boundary checks are
+ * performed, which means hand-crafted malicious Zip archives can make the
+ * code fail in bad ways. However, since the only intended use is to load
+ * libraries from Zip archives, there is no interest in making this code
+ * safe, since the libraries could contain malicious code anyways.
+ */
+class Zip: public mozilla::RefCounted<Zip>
+{
+public:
+  /**
+   * Create a Zip instance for the given file name. In case of error, the
+   * Zip instance is still created but methods will error out.
+   */
+  Zip(const char *filename, ZipCollection *collection = NULL);
+
+  /**
+   * Destructor
+   */
+  ~Zip();
+
+  /**
+   * Class used to access Zip archive item streams
+   */
+  class Stream
+  {
+  public:
+    /**
+     * Stream types
+     */
+    enum Type {
+      STORE = 0,
+      DEFLATE = 8
+    };
+
+    /**
+     * Constructor
+     */
+    Stream(): compressedBuf(NULL), compressedSize(0), uncompressedSize(0)
+            , type(STORE) { }
+
+    const void *GetBuffer() { return compressedBuf; }
+    size_t GetSize() { return compressedSize; }
+    size_t GetUncompressedSize() { return uncompressedSize; }
+    Type GetType() { return type; }
+
+  protected:
+    friend class Zip;
+    const void *compressedBuf;
+    size_t compressedSize;
+    size_t uncompressedSize;
+    Type type;
+  };
+
+  /**
+   * Returns a stream from the Zip archive.
+   */
+  bool GetStream(const char *path, Stream *out) const;
+
+  /**
+   * Returns the file name of the archive
+   */
+  const char *GetName() const
+  {
+    return name;
+  }
+
+private:
+  /* File name of the archive */
+  char *name;
+  /* Address where the Zip archive is mapped */
+  void *mapped;
+  /* Size of the archive */
+  size_t size;
+
+  /**
+   * Strings (file names, comments, etc.) in the Zip headers are NOT zero
+   * terminated. This class is a helper around them.
+   */
+  class StringBuf
+  {
+  public:
+    /**
+     * Constructor
+     */
+    StringBuf(const char *buf, size_t length): buf(buf), length(length) { }
+
+    /**
+     * Returns whether the string has the same content as the given zero
+     * terminated string.
+     */
+    bool Equals(const char *str) const
+    {
+      return strncmp(str, buf, length) == 0;
+    }
+
+  private:
+    const char *buf;
+    size_t length;
+  };
+
+/* All the following types need to be packed */
+#pragma pack(1)
+  /**
+   * A Zip archive is an aggregate of entities which all start with a
+   * signature giving their type. This template is to be used as a base
+   * class for these entities.
+   */
+  template <typename T>
+  class SignedEntity
+  {
+  public:
+    /**
+     * Equivalent to reinterpret_cast<const T *>(buf), with an additional
+     * check of the signature.
+     */
+    static const T *validate(const void *buf)
+    {
+      const T *ret = static_cast<const T *>(buf);
+      if (ret->signature == T::magic)
+        return ret;
+      return NULL;
+    }
+  private:
+    le_uint32 signature;
+  };
+
+  /**
+   * Header used to describe a Local File entry. The header is followed by
+   * the file name and an extra field, then by the data stream.
+   */
+  struct LocalFile: public SignedEntity<LocalFile>
+  {
+    /* Signature for a Local File header */
+    static const uint32_t magic = 0x04034b50;
+
+    /**
+     * Returns the file name
+     */
+    StringBuf GetName() const
+    {
+      return StringBuf(reinterpret_cast<const char *>(this) + sizeof(*this),
+                       filenameSize);
+    }
+
+    /**
+     * Returns a pointer to the data associated with this header
+     */
+    const void *GetData() const
+    {
+      return reinterpret_cast<const char *>(this) + sizeof(*this)
+             + filenameSize + extraFieldSize;
+    }
+    
+    le_uint16 minVersion;
+    le_uint16 generalFlag;
+    le_uint16 compression;
+    le_uint16 lastModifiedTime;
+    le_uint16 lastModifiedDate;
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+    le_uint16 filenameSize;
+    le_uint16 extraFieldSize;
+  };
+
+  /**
+   * In some cases, when a zip archive is created, compressed size and CRC
+   * are not known when writing the Local File header. In these cases, the
+   * 3rd bit of the general flag in the Local File header is set, and there
+   * is an additional header following the compressed data.
+   */
+  struct DataDescriptor: public SignedEntity<DataDescriptor>
+  {
+    /* Signature for a Data Descriptor header */
+    static const uint32_t magic = 0x08074b50;
+
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+  };
+
+  /**
+   * Header used to describe a Central Directory Entry. The header is
+   * followed by the file name, an extra field, and a comment.
+   */
+  struct DirectoryEntry: public SignedEntity<DirectoryEntry>
+  {
+    /* Signature for a Central Directory Entry header */
+    static const uint32_t magic = 0x02014b50;
+
+    /**
+     * Returns the file name
+     */
+    StringBuf GetName() const
+    {
+      return StringBuf(reinterpret_cast<const char *>(this) + sizeof(*this),
+                       filenameSize);
+    }
+
+    /**
+     * Returns  the Central Directory Entry following this one.
+     */
+    const DirectoryEntry *GetNext() const
+    {
+      return validate(reinterpret_cast<const char *>(this) + sizeof(*this)
+                      + filenameSize + extraFieldSize + fileCommentSize);
+    }
+
+    le_uint16 creatorVersion;
+    le_uint16 minVersion;
+    le_uint16 generalFlag;
+    le_uint16 compression;
+    le_uint16 lastModifiedTime;
+    le_uint16 lastModifiedDate;
+    le_uint32 CRC32;
+    le_uint32 compressedSize;
+    le_uint32 uncompressedSize;
+    le_uint16 filenameSize;
+    le_uint16 extraFieldSize;
+    le_uint16 fileCommentSize;
+    le_uint16 diskNum;
+    le_uint16 internalAttributes;
+    le_uint32 externalAttributes;
+    le_uint32 offset;
+  };
+
+  /**
+   * Header used to describe the End of Central Directory Record.
+   */
+  struct CentralDirectoryEnd: public SignedEntity<CentralDirectoryEnd>
+  {
+    /* Signature for the End of Central Directory Record */
+    static const uint32_t magic = 0x06054b50;
+
+    le_uint16 diskNum;
+    le_uint16 startDisk;
+    le_uint16 recordsOnDisk;
+    le_uint16 records;
+    le_uint32 size;
+    le_uint32 offset;
+    le_uint16 commentSize;
+  };
+#pragma pack()
+
+  /**
+   * Returns the first Directory entry
+   */
+  const DirectoryEntry *GetFirstEntry() const;
+
+  /* Pointer to the Local File Entry following the last one GetStream() used.
+   * This is used by GetStream to avoid scanning the Directory Entries when the
+   * requested entry is that one. */
+  mutable const LocalFile *nextFile;
+
+  /* Likewise for the next Directory entry */
+  mutable const DirectoryEntry *nextDir;
+
+  /* Pointer to the Directory entries */
+  mutable const DirectoryEntry *entries;
+
+  /* ZipCollection containing this Zip */
+  mutable ZipCollection *parent;
+};
+
+/**
+ * Class for bookkeeping Zip instances
+ */
+class ZipCollection
+{
+public:
+  /**
+   * Get a Zip instance for the given path. If there is an existing one
+   * already, return that one, otherwise create a new one.
+   */
+  mozilla::TemporaryRef<Zip> GetZip(const char *path);
+
+protected:
+  /**
+   * Forget about the given Zip instance. This method is meant to be called
+   * by the Zip destructor.
+   */
+  friend Zip::~Zip();
+  void Forget(Zip *zip);
+
+private:
+  /* Zip instances bookkept in this collection */
+  std::vector<Zip *> zips;
+};
+
+#endif /* Zip_h */