Bug 632404 - Preload dependent libraries at startup. r=tglek,r=bsmedberg
authorMike Hommey <mh+mozilla@glandium.org>
Mon, 16 May 2011 10:25:05 +0200
changeset 71227 cc18551d5cc3
parent 71226 c4bbac1f178b
child 71228 b58ba54bdcbd
push id20512
push usermlamouri@mozilla.com
push date2011-06-17 13:30 +0000
treeherdermozilla-central@9ac190a247ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstglek, bsmedberg
bugs632404
milestone7.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 632404 - Preload dependent libraries at startup. r=tglek,r=bsmedberg
browser/app/nsBrowserApp.cpp
xpcom/glue/standalone/nsGlueLinking.h
xpcom/glue/standalone/nsGlueLinkingDlopen.cpp
xpcom/glue/standalone/nsGlueLinkingOS2.cpp
xpcom/glue/standalone/nsGlueLinkingOSX.cpp
xpcom/glue/standalone/nsGlueLinkingWin.cpp
xpcom/glue/standalone/nsXPCOMGlue.cpp
xpcom/glue/standalone/nsXPCOMGlue.h
--- a/browser/app/nsBrowserApp.cpp
+++ b/browser/app/nsBrowserApp.cpp
@@ -204,16 +204,31 @@ int main(int argc, char* argv[])
   }
 
   char *lastSlash = strrchr(exePath, XPCOM_FILE_PATH_SEPARATOR[0]);
   if (!lastSlash || (lastSlash - exePath > MAXPATHLEN - sizeof(XPCOM_DLL) - 1))
     return 255;
 
   strcpy(++lastSlash, XPCOM_DLL);
 
+#ifdef XP_WIN
+  // GetProcessIoCounters().ReadOperationCount seems to have little to
+  // do with actual read operations. It reports 0 or 1 at this stage
+  // in the program. Luckily 1 coincides with when prefetch is
+  // enabled. If Windows prefetch didn't happen we can do our own
+  // faster dll preloading.
+  IO_COUNTERS ioCounters;
+  if (GetProcessIoCounters(GetCurrentProcess(), &ioCounters)
+      && !ioCounters.ReadOperationCount)
+#endif
+  {
+      XPCOMGlueEnablePreload();
+  }
+
+
   rv = XPCOMGlueStartup(exePath);
   if (NS_FAILED(rv)) {
     Output("Couldn't load XPCOM.\n");
     return 255;
   }
 
   rv = XPCOMGlueLoadXULFunctions(kXULFuncs);
   if (NS_FAILED(rv)) {
--- a/xpcom/glue/standalone/nsGlueLinking.h
+++ b/xpcom/glue/standalone/nsGlueLinking.h
@@ -43,14 +43,14 @@
 #define XPCOM_DEPENDENT_LIBS_LIST "dependentlibs.list"
 
 NS_HIDDEN_(nsresult)
 XPCOMGlueLoad(const char *xpcomFile, GetFrozenFunctionsFunc *func NS_OUTPARAM);
 
 NS_HIDDEN_(void)
 XPCOMGlueUnload();
 
-typedef void (*DependentLibsCallback)(const char *aDependentLib);
+typedef void (*DependentLibsCallback)(const char *aDependentLib, PRBool do_preload);
 
 NS_HIDDEN_(void)
 XPCOMGlueLoadDependentLibs(const char *xpcomDir, DependentLibsCallback cb);
 
 #endif // nsGlueLinking_h__
--- a/xpcom/glue/standalone/nsGlueLinkingDlopen.cpp
+++ b/xpcom/glue/standalone/nsGlueLinkingDlopen.cpp
@@ -16,16 +16,17 @@
  *
  * The Initial Developer of the Original Code is
  * Benjamin Smedberg <benjamin@smedbergs.us>
  *
  * Portions created by the Initial Developer are Copyright (C) 2005
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
+ *   Mike Hommey <mh@glandium.org>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -35,16 +36,24 @@
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "nsGlueLinking.h"
 #include "nsXPCOMGlue.h"
 #include "nscore.h"
 
+#ifdef LINUX
+#define _GNU_SOURCE 
+#include <fcntl.h>
+#include <unistd.h>
+#include <elf.h>
+#include <limits.h>
+#endif
+
 #include <errno.h>
 #include <dlfcn.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 
 #if defined(SUNOS4) || defined(NEXTSTEP) || \
     (defined(OPENBSD) || defined(NETBSD)) && !defined(__ELF__)
@@ -133,19 +142,79 @@ AppendDependentLib(void *libHandle)
         return;
 
     d->next = sTop;
     d->libHandle = libHandle;
 
     sTop = d;
 }
 
+#ifdef LINUX
+static const unsigned int bufsize = 4096;
+
+#ifdef HAVE_64BIT_OS
+typedef Elf64_Ehdr Elf_Ehdr;
+typedef Elf64_Phdr Elf_Phdr;
+static const unsigned char ELFCLASS = ELFCLASS64;
+typedef Elf64_Off Elf_Off;
+#else
+typedef Elf32_Ehdr Elf_Ehdr;
+typedef Elf32_Phdr Elf_Phdr;
+static const unsigned char ELFCLASS = ELFCLASS32;
+typedef Elf32_Off Elf_Off;
+#endif
+
 static void
-ReadDependentCB(const char *aDependentLib)
+preload(const char *file)
 {
+    union {
+        char buf[bufsize];
+        Elf_Ehdr ehdr;
+    } elf;
+    int fd = open(file, O_RDONLY);
+    if (fd < 0)
+        return;
+    // Read ELF header (ehdr) and program header table (phdr).
+    // We check that the ELF magic is found, that the ELF class matches
+    // our own, and that the program header table as defined in the ELF
+    // headers fits in the buffer we read.
+    if ((read(fd, elf.buf, bufsize) <= 0) ||
+        (memcmp(elf.buf, ELFMAG, 4)) ||
+        (elf.ehdr.e_ident[EI_CLASS] != ELFCLASS) ||
+        (elf.ehdr.e_phoff + elf.ehdr.e_phentsize * elf.ehdr.e_phnum >= bufsize)) {
+        close(fd);
+        return;
+    }
+    // The program header table contains segment definitions. One such
+    // segment type is PT_LOAD, which describes how the dynamic loader
+    // is going to map the file in memory. We use that information to
+    // find the biggest offset from the library that will be mapped in
+    // memory.
+    Elf_Phdr *phdr = (Elf_Phdr *)&elf.buf[elf.ehdr.e_phoff];
+    Elf_Off end = 0;
+    for (int phnum = elf.ehdr.e_phnum; phnum; phdr++, phnum--)
+        if ((phdr->p_type == PT_LOAD) &&
+            (end < phdr->p_offset + phdr->p_filesz))
+            end = phdr->p_offset + phdr->p_filesz;
+    // Let the kernel read ahead what the dynamic loader is going to
+    // map in memory soon after.
+    if (end > 0) {
+        readahead(fd, 0, end);
+    }
+    close(fd);
+}
+#endif
+
+static void
+ReadDependentCB(const char *aDependentLib, PRBool do_preload)
+{
+#ifdef LINUX
+    if (do_preload)
+        preload(aDependentLib);
+#endif
     void *libHandle = dlopen(aDependentLib, RTLD_GLOBAL | RTLD_LAZY);
     if (!libHandle)
         return;
 
     AppendDependentLib(libHandle);
 }
 
 nsresult
--- a/xpcom/glue/standalone/nsGlueLinkingOS2.cpp
+++ b/xpcom/glue/standalone/nsGlueLinkingOS2.cpp
@@ -63,17 +63,17 @@ AppendDependentLib(HMODULE libHandle)
 
     d->next = sTop;
     d->libHandle = libHandle;
 
     sTop = d;
 }
 
 static void
-ReadDependentCB(const char *aDependentLib)
+ReadDependentCB(const char *aDependentLib, PRBool do_preload)
 {
     CHAR pszError[_MAX_PATH];
     ULONG ulrc = NO_ERROR;
     HMODULE h;
 
     ulrc = DosLoadModule(pszError, _MAX_PATH, aDependentLib, &h);
 
     if (ulrc != NO_ERROR)
--- a/xpcom/glue/standalone/nsGlueLinkingOSX.cpp
+++ b/xpcom/glue/standalone/nsGlueLinkingOSX.cpp
@@ -16,16 +16,17 @@
  *
  * The Initial Developer of the Original Code is
  * Benjamin Smedberg <benjamin@smedbergs.us>
  *
  * Portions created by the Initial Developer are Copyright (C) 2005
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
+ *   Mike Hommey <mh@glandium.org>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -40,21 +41,141 @@
 #include "nsXPCOMGlue.h"
 
 #include <mach-o/dyld.h>
 #include <sys/param.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 
+#include <fcntl.h>
+#include <unistd.h>
+#include <mach/machine.h>
+#include <mach-o/fat.h>
+#include <mach-o/loader.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <limits.h>
+
+#if defined(__i386__)
+static const uint32_t CPU_TYPE = CPU_TYPE_X86;
+#elif defined(__x86_64__)
+static const uint32_t CPU_TYPE = CPU_TYPE_X86_64;
+#elif defined(__ppc__)
+static const uint32_t CPU_TYPE = CPU_TYPE_POWERPC;
+#elif defined(__ppc64__)
+static const uint32_t CPU_TYPE = CPU_TYPE_POWERPC64;
+#else
+#error Unsupported CPU type
+#endif
+
+#ifdef HAVE_64BIT_OS
+#undef LC_SEGMENT
+#define LC_SEGMENT LC_SEGMENT_64
+#undef MH_MAGIC
+#define MH_MAGIC MH_MAGIC_64
+#define cpu_mach_header mach_header_64
+#define segment_command segment_command_64
+#else
+#define cpu_mach_header mach_header
+#endif
+
+class ScopedMMap
+{
+public:
+    ScopedMMap(const char *file): buf(NULL) {
+        fd = open(file, O_RDONLY);
+        if (fd < 0)
+            return;
+        struct stat st;
+        if (fstat(fd, &st) < 0)
+            return;
+        size = st.st_size;
+        buf = (char *)mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    }
+    ~ScopedMMap() {
+        if (buf)
+            munmap(buf, size);
+        if (fd >= 0)
+            close(fd);
+    }
+    operator char *() { return buf; }
+    int getFd() { return fd; }
+private:
+    int fd;
+    char *buf;
+    size_t size;
+};
+
+static void
+preload(const char *file)
+{
+    ScopedMMap buf(file);
+    char *base = buf;
+    if (!base)
+        return;
+
+    // An OSX binary might either be a fat (universal) binary or a
+    // Mach-O binary. A fat binary actually embeds several Mach-O
+    // binaries. If we have a fat binary, find the offset where the
+    // Mach-O binary for our CPU type can be found.
+    struct fat_header *fh = (struct fat_header *)base;
+
+    if (OSSwapBigToHostInt32(fh->magic) == FAT_MAGIC) {
+        uint32_t nfat_arch = OSSwapBigToHostInt32(fh->nfat_arch);
+        struct fat_arch *arch = (struct fat_arch *)&buf[sizeof(struct fat_header)];
+        for (; nfat_arch; arch++, nfat_arch--) {
+            if (OSSwapBigToHostInt32(arch->cputype) == CPU_TYPE) {
+                base += OSSwapBigToHostInt32(arch->offset);
+                break;
+            }
+        }
+        if (base == buf)
+            return;
+    }
+
+    // Check Mach-O magic in the Mach header
+    struct cpu_mach_header *mh = (struct cpu_mach_header *)base;
+    if (mh->magic != MH_MAGIC)
+        return;
+
+    // The Mach header is followed by a sequence of load commands.
+    // Each command has a header containing the command type and the
+    // command size. LD_SEGMENT commands describes how the dynamic
+    // loader is going to map the file in memory. We use that
+    // information to find the biggest offset from the library that
+    // will be mapped in memory.
+    char *cmd = &base[sizeof(struct cpu_mach_header)];
+    off_t end = 0;
+    for (uint32_t ncmds = mh->ncmds; ncmds; ncmds--) {
+        struct segment_command *sh = (struct segment_command *)cmd;
+        if (sh->cmd != LC_SEGMENT)
+            continue;
+        if (end < sh->fileoff + sh->filesize)
+            end = sh->fileoff + sh->filesize;
+        cmd += sh->cmdsize;
+    }
+    // Let the kernel read ahead what the dynamic loader is going to
+    // map in memory soon after. The F_RDADVISE fcntl is equivalent
+    // to Linux' readahead() system call.
+    if (end > 0) {
+        struct radvisory ra;
+        ra.ra_offset = (base - buf);
+        ra.ra_count = end;
+        fcntl(buf.getFd(), F_RDADVISE, &ra);
+    }
+}
+
 static const mach_header* sXULLibImage;
 
 static void
-ReadDependentCB(const char *aDependentLib)
+ReadDependentCB(const char *aDependentLib, PRBool do_preload)
 {
+    if (do_preload)
+        preload(aDependentLib);
     (void) NSAddImage(aDependentLib,
                       NSADDIMAGE_OPTION_RETURN_ON_ERROR |
                       NSADDIMAGE_OPTION_MATCH_FILENAME_BY_INSTALLNAME);
 }
 
 static void*
 LookupSymbol(const mach_header* aLib, const char* aSymbolName)
 {
--- a/xpcom/glue/standalone/nsGlueLinkingWin.cpp
+++ b/xpcom/glue/standalone/nsGlueLinkingWin.cpp
@@ -65,21 +65,44 @@ AppendDependentLib(HINSTANCE libHandle)
 
     d->next = sTop;
     d->libHandle = libHandle;
 
     sTop = d;
 }
 
 static void
-ReadDependentCB(const char *aDependentLib)
+preload(LPCWSTR dll)
+{
+    HANDLE fd = CreateFileW(dll, GENERIC_READ, FILE_SHARE_READ,
+                            NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
+    char buf[64 * 1024];
+
+    if (fd == INVALID_HANDLE_VALUE)
+        return;
+  
+    DWORD dwBytesRead;
+    // Do dummy reads to trigger kernel-side readhead via FILE_FLAG_SEQUENTIAL_SCAN.
+    // Abort when underfilling because during testing the buffers are read fully
+    // A buffer that's not keeping up would imply that readahead isn't working right
+    while (ReadFile(fd, buf, sizeof(buf), &dwBytesRead, NULL) && dwBytesRead == sizeof(buf))
+        /* Nothing */;
+  
+    CloseHandle(fd);
+}
+
+static void
+ReadDependentCB(const char *aDependentLib, PRBool do_preload)
 {
     wchar_t wideDependentLib[MAX_PATH];
     MultiByteToWideChar(CP_UTF8, 0, aDependentLib, -1, wideDependentLib, MAX_PATH);
 
+    if (do_preload)
+        preload(wideDependentLib);
+
     HINSTANCE h =
         LoadLibraryExW(wideDependentLib, NULL, MOZ_LOADLIBRARY_FLAGS);
 
     if (!h)
         return;
 
     AppendDependentLib(h);
 }
--- a/xpcom/glue/standalone/nsXPCOMGlue.cpp
+++ b/xpcom/glue/standalone/nsXPCOMGlue.cpp
@@ -50,16 +50,23 @@
 #ifdef XP_WIN
 #include <windows.h>
 #include <mbstring.h>
 #include <malloc.h>
 #define snprintf _snprintf
 #endif
 
 static XPCOMFunctions xpcomFunctions;
+static PRBool do_preload = PR_FALSE;
+
+extern "C"
+void XPCOMGlueEnablePreload()
+{
+    do_preload = PR_TRUE;
+}
 
 extern "C"
 nsresult XPCOMGlueStartup(const char* xpcomFile)
 {
     xpcomFunctions.version = XPCOM_GLUE_VERSION;
     xpcomFunctions.size    = sizeof(XPCOMFunctions);
 
     GetFrozenFunctionsFunc func = nsnull;
@@ -123,17 +130,17 @@ XPCOMGlueLoadDependentLibs(const char *x
         // cut the trailing newline, if present
         if (buffer[l - 1] == '\n')
             buffer[l - 1] = '\0';
 
         char buffer2[MAXPATHLEN];
         snprintf(buffer2, sizeof(buffer2),
                  "%s" XPCOM_FILE_PATH_SEPARATOR "%s",
                  xpcomDir, buffer);
-        cb(buffer2);
+        cb(buffer2, do_preload);
     }
 
     fclose(flist);
 }
 
 extern "C"
 nsresult XPCOMGlueShutdown()
 {
--- a/xpcom/glue/standalone/nsXPCOMGlue.h
+++ b/xpcom/glue/standalone/nsXPCOMGlue.h
@@ -43,16 +43,22 @@
 
 #ifdef XPCOM_GLUE
 
 /**
  * The following functions are only available in the standalone glue.
  */
 
 /**
+ * Enabled preloading of dynamically loaded libraries
+ */
+extern "C" NS_HIDDEN_(void)
+XPCOMGlueEnablePreload();
+
+/**
  * Initialize the XPCOM glue by dynamically linking against the XPCOM
  * shared library indicated by xpcomFile.
  */
 extern "C" NS_HIDDEN_(nsresult)
 XPCOMGlueStartup(const char* xpcomFile);
 
 typedef void (*NSFuncPtr)();