Bug 686805 part 3 - Tool to generate seekable compressed streams. r=tglek
--- a/mozglue/linker/Makefile.in
+++ b/mozglue/linker/Makefile.in
@@ -14,16 +14,27 @@ LIBRARY_NAME = linker
FORCE_STATIC_LIB= 1
STL_FLAGS =
CPPSRCS = \
Zip.cpp \
$(NULL)
ifndef MOZ_OLD_LINKER
+HOST_PROGRAM = szip
+
+HOST_CPPSRCS = \
+ szip.cpp \
+ Assertions.cpp \
+ $(NULL)
+
+VPATH += $(topsrcdir)/mfbt
+
+HOST_LIBS = -lz
+
CPPSRCS += \
ElfLoader.cpp \
CustomElf.cpp \
Mappable.cpp \
$(NULL)
endif
include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/SeekableZStream.h
@@ -0,0 +1,48 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef SeekableZStream_h
+#define SeekableZStream_h
+
+#include "Zip.h"
+
+/**
+ * Seekable compressed stream are created by splitting the original
+ * decompressed data in small chunks and compress these chunks
+ * individually.
+ *
+ * The seekable compressed file format consists in a header defined below,
+ * followed by a table of 32-bits words containing the offsets for each
+ * individual compressed chunk, then followed by the compressed chunks.
+ */
+
+#pragma pack(1)
+struct SeekableZStreamHeader: public Zip::SignedEntity<SeekableZStreamHeader>
+{
+ SeekableZStreamHeader()
+ : Zip::SignedEntity<SeekableZStreamHeader>(magic)
+ , totalSize(0), chunkSize(0), nChunks(0), lastChunkSize(0) { }
+
+ /* Reuse Zip::SignedEntity to handle the magic number used in the Seekable
+ * ZStream file format. The magic number is "SeZz". */
+ static const uint32_t magic = 0x7a5a6553;
+
+ /* Total size of the stream, including the 4 magic bytes. */
+ le_uint32 totalSize;
+
+ /* Chunk size */
+ le_uint32 chunkSize;
+
+ /* Number of chunks */
+ le_uint32 nChunks;
+
+ /* Size of last chunk (> 0, <= Chunk size) */
+ le_uint32 lastChunkSize;
+};
+#pragma pack()
+
+MOZ_STATIC_ASSERT(sizeof(SeekableZStreamHeader) == 5 * 4,
+ "SeekableZStreamHeader should be 5 32-bits words");
+
+#endif /* SeekableZStream_h */
--- a/mozglue/linker/Utils.h
+++ b/mozglue/linker/Utils.h
@@ -11,40 +11,66 @@
#include <unistd.h>
#include "mozilla/Assertions.h"
/**
* On architectures that are little endian and that support unaligned reads,
* we can use direct type, but on others, we want to have a special class
* to handle conversion and alignment issues.
*/
-#if defined(__i386__) || defined(__x86_64__)
+#if !defined(DEBUG) && (defined(__i386__) || defined(__x86_64__))
typedef uint16_t le_uint16;
typedef uint32_t le_uint32;
#else
/**
* Template that allows to find an unsigned int type from a (computed) bit size
*/
template <int s> struct UInt { };
template <> struct UInt<16> { typedef uint16_t Type; };
template <> struct UInt<32> { typedef uint32_t Type; };
/**
- * Template to read 2 n-bit sized words as a 2*n-bit sized word, doing
+ * Template to access 2 n-bit sized words as a 2*n-bit sized word, doing
* conversion from little endian and avoiding alignment issues.
*/
template <typename T>
class le_to_cpu
{
public:
- operator typename UInt<16 * sizeof(T)>::Type() const
+ typedef typename UInt<16 * sizeof(T)>::Type Type;
+
+ operator Type() const
{
return (b << (sizeof(T) * 8)) | a;
}
+
+ const le_to_cpu& operator =(const Type &v)
+ {
+ a = v & ((1 << (sizeof(T) * 8)) - 1);
+ b = v >> (sizeof(T) * 8);
+ return *this;
+ }
+
+ le_to_cpu() { }
+ le_to_cpu(const Type &v)
+ {
+ operator =(v);
+ }
+
+ const le_to_cpu& operator +=(const Type &v)
+ {
+ return operator =(operator Type() + v);
+ }
+
+ const le_to_cpu& operator ++(int)
+ {
+ return operator =(operator Type() + 1);
+ }
+
private:
T a, b;
};
/**
* Type definitions
*/
typedef le_to_cpu<unsigned char> le_uint16;
--- a/mozglue/linker/Zip.h
+++ b/mozglue/linker/Zip.h
@@ -137,16 +137,17 @@ private:
private:
const char *buf;
size_t length;
};
/* All the following types need to be packed */
#pragma pack(1)
+public:
/**
* A Zip archive is an aggregate of entities which all start with a
* signature giving their type. This template is to be used as a base
* class for these entities.
*/
template <typename T>
class SignedEntity
{
@@ -157,20 +158,23 @@ private:
*/
static const T *validate(const void *buf)
{
const T *ret = static_cast<const T *>(buf);
if (ret->signature == T::magic)
return ret;
return NULL;
}
+
+ SignedEntity(uint32_t magic): signature(magic) { }
private:
le_uint32 signature;
};
+private:
/**
* Header used to describe a Local File entry. The header is followed by
* the file name and an extra field, then by the data stream.
*/
struct LocalFile: public SignedEntity<LocalFile>
{
/* Signature for a Local File header */
static const uint32_t magic = 0x04034b50;
new file mode 100644
--- /dev/null
+++ b/mozglue/linker/szip.cpp
@@ -0,0 +1,136 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <sys/stat.h>
+#include <cstring>
+#include <zlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "mozilla/Assertions.h"
+#include "SeekableZStream.h"
+#include "Utils.h"
+#include "Logging.h"
+
+static const size_t CHUNK = 16384;
+
+/* Generate a seekable compressed stream. */
+
+int main(int argc, char* argv[])
+{
+ if (argc != 3 || !argv[1] || !argv[2] || (strcmp(argv[1], argv[2]) == 0)) {
+ log("usage: %s file_to_compress out_file", argv[0]);
+ return 1;
+ }
+
+ AutoCloseFD origFd = open(argv[1], O_RDONLY);
+ if (origFd == -1) {
+ log("Couldn't open %s: %s", argv[1], strerror(errno));
+ return 1;
+ }
+
+ struct stat st;
+ int ret = fstat(origFd, &st);
+ if (ret == -1) {
+ log("Couldn't seek %s: %s", argv[1], strerror(errno));
+ return 1;
+ }
+
+ size_t origSize = st.st_size;
+ log("Size = %lu", origSize);
+ if (origSize == 0) {
+ log("Won't compress %s: it's empty", argv[1]);
+ return 1;
+ }
+
+ /* Mmap the original file */
+ MappedPtr origBuf;
+ origBuf.Assign(mmap(NULL, origSize, PROT_READ, MAP_PRIVATE, origFd, 0), origSize);
+ if (origBuf == MAP_FAILED) {
+ log("Couldn't mmap %s: %s", argv[1], strerror(errno));
+ return 1;
+ }
+
+ /* Create the compressed file */
+ AutoCloseFD outFd = open(argv[2], O_RDWR | O_CREAT | O_TRUNC, 0666);
+ if (outFd == -1) {
+ log("Couldn't open %s: %s", argv[2], strerror(errno));
+ return 1;
+ }
+
+ /* Expected total number of chunks */
+ size_t nChunks = ((origSize + CHUNK - 1) / CHUNK);
+
+ /* The first chunk is going to be stored after the header and the offset
+ * table */
+ size_t offset = sizeof(SeekableZStreamHeader) + nChunks * sizeof(uint32_t);
+
+ /* Give enough room for the header and the offset table, and map them */
+ ret = posix_fallocate(outFd, 0, offset);
+ MOZ_ASSERT(ret == 0);
+ MappedPtr headerMap;
+ headerMap.Assign(mmap(NULL, offset, PROT_READ | PROT_WRITE, MAP_SHARED,
+ outFd, 0), offset);
+ if (headerMap == MAP_FAILED) {
+ log("Couldn't mmap %s: %s", argv[1], strerror(errno));
+ return 1;
+ }
+
+ SeekableZStreamHeader *header = new (headerMap) SeekableZStreamHeader;
+ le_uint32 *entry = reinterpret_cast<le_uint32 *>(&header[1]);
+
+ /* Initialize header */
+ header->chunkSize = CHUNK;
+ header->totalSize = offset;
+
+ /* Seek at the end of the output file, where we're going to append
+ * compressed streams */
+ lseek(outFd, offset, SEEK_SET);
+
+ /* Initialize zlib structure */
+ z_stream zStream;
+ memset(&zStream, 0, sizeof(zStream));
+
+ /* Compression buffer */
+ AutoDeleteArray<Bytef> outBuf = new Bytef[CHUNK * 2];
+
+ Bytef *origData = static_cast<Bytef*>(origBuf);
+ size_t avail = 0;
+ while (origSize) {
+ avail = std::min(origSize, CHUNK);
+
+ /* Compress chunk */
+ ret = deflateInit(&zStream, 9);
+ MOZ_ASSERT(ret == Z_OK);
+ zStream.avail_in = avail;
+ zStream.next_in = origData;
+ zStream.avail_out = CHUNK * 2;
+ zStream.next_out = outBuf;
+ ret = deflate(&zStream, Z_FINISH);
+ MOZ_ASSERT(ret == Z_STREAM_END);
+ ret = deflateEnd(&zStream);
+ MOZ_ASSERT(ret == Z_OK);
+ MOZ_ASSERT(zStream.avail_out > 0);
+
+ /* Write chunk */
+ size_t len = write(outFd, outBuf, 2 * CHUNK - zStream.avail_out);
+ MOZ_ASSERT(len == 2 * CHUNK - zStream.avail_out);
+
+ /* Adjust headers */
+ header->totalSize += len;
+ *entry++ = offset;
+ header->nChunks++;
+
+ /* Prepare for next iteration */
+ origSize -= avail;
+ origData += avail;
+ offset += len;
+ }
+ header->lastChunkSize = avail;
+
+ MOZ_ASSERT(header->nChunks == nChunks);
+ log("Compressed size is %lu", offset);
+
+ return 0;
+}