Bug 632404 - Preload dependent libraries at startup. r=tglek,r=bsmedberg
authorMike Hommey <mh+mozilla@glandium.org>
Mon, 16 May 2011 10:25:05 +0200
changeset 71227 cc18551d5cc3
parent 71226 c4bbac1f178b
child 71228 b58ba54bdcbd
push id20512
push usermlamouri@mozilla.com
push date2011-06-17 13:30 +0000
treeherdermozilla-central@9ac190a247ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstglek, bsmedberg
bugs632404
milestone7.0a1
Bug 632404 - Preload dependent libraries at startup. r=tglek,r=bsmedberg
browser/app/nsBrowserApp.cpp
xpcom/glue/standalone/nsGlueLinking.h
xpcom/glue/standalone/nsGlueLinkingDlopen.cpp
xpcom/glue/standalone/nsGlueLinkingOS2.cpp
xpcom/glue/standalone/nsGlueLinkingOSX.cpp
xpcom/glue/standalone/nsGlueLinkingWin.cpp
xpcom/glue/standalone/nsXPCOMGlue.cpp
xpcom/glue/standalone/nsXPCOMGlue.h
--- a/browser/app/nsBrowserApp.cpp
+++ b/browser/app/nsBrowserApp.cpp
@@ -204,16 +204,31 @@ int main(int argc, char* argv[])
   }
 
   char *lastSlash = strrchr(exePath, XPCOM_FILE_PATH_SEPARATOR[0]);
   if (!lastSlash || (lastSlash - exePath > MAXPATHLEN - sizeof(XPCOM_DLL) - 1))
     return 255;
 
   strcpy(++lastSlash, XPCOM_DLL);
 
+#ifdef XP_WIN
+  // GetProcessIoCounters().ReadOperationCount seems to have little to
+  // do with actual read operations. It reports 0 or 1 at this stage
+  // in the program. Luckily 1 coincides with when prefetch is
+  // enabled. If Windows prefetch didn't happen we can do our own
+  // faster dll preloading.
+  IO_COUNTERS ioCounters;
+  if (GetProcessIoCounters(GetCurrentProcess(), &ioCounters)
+      && !ioCounters.ReadOperationCount)
+#endif
+  {
+      XPCOMGlueEnablePreload();
+  }
+
+
   rv = XPCOMGlueStartup(exePath);
   if (NS_FAILED(rv)) {
     Output("Couldn't load XPCOM.\n");
     return 255;
   }
 
   rv = XPCOMGlueLoadXULFunctions(kXULFuncs);
   if (NS_FAILED(rv)) {
--- a/xpcom/glue/standalone/nsGlueLinking.h
+++ b/xpcom/glue/standalone/nsGlueLinking.h
@@ -43,14 +43,14 @@
 #define XPCOM_DEPENDENT_LIBS_LIST "dependentlibs.list"
 
 NS_HIDDEN_(nsresult)
 XPCOMGlueLoad(const char *xpcomFile, GetFrozenFunctionsFunc *func NS_OUTPARAM);
 
 NS_HIDDEN_(void)
 XPCOMGlueUnload();
 
-typedef void (*DependentLibsCallback)(const char *aDependentLib);
+typedef void (*DependentLibsCallback)(const char *aDependentLib, PRBool do_preload);
 
 NS_HIDDEN_(void)
 XPCOMGlueLoadDependentLibs(const char *xpcomDir, DependentLibsCallback cb);
 
 #endif // nsGlueLinking_h__
--- a/xpcom/glue/standalone/nsGlueLinkingDlopen.cpp
+++ b/xpcom/glue/standalone/nsGlueLinkingDlopen.cpp
@@ -16,16 +16,17 @@
  *
  * The Initial Developer of the Original Code is
  * Benjamin Smedberg <benjamin@smedbergs.us>
  *
  * Portions created by the Initial Developer are Copyright (C) 2005
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
+ *   Mike Hommey <mh@glandium.org>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -35,16 +36,24 @@
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "nsGlueLinking.h"
 #include "nsXPCOMGlue.h"
 #include "nscore.h"
 
+#ifdef LINUX
+#define _GNU_SOURCE 
+#include <fcntl.h>
+#include <unistd.h>
+#include <elf.h>
+#include <limits.h>
+#endif
+
 #include <errno.h>
 #include <dlfcn.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 
 #if defined(SUNOS4) || defined(NEXTSTEP) || \
     (defined(OPENBSD) || defined(NETBSD)) && !defined(__ELF__)
@@ -133,19 +142,79 @@ AppendDependentLib(void *libHandle)
         return;
 
     d->next = sTop;
     d->libHandle = libHandle;
 
     sTop = d;
 }
 
+#ifdef LINUX
+static const unsigned int bufsize = 4096;
+
+#ifdef HAVE_64BIT_OS
+typedef Elf64_Ehdr Elf_Ehdr;
+typedef Elf64_Phdr Elf_Phdr;
+static const unsigned char ELFCLASS = ELFCLASS64;
+typedef Elf64_Off Elf_Off;
+#else
+typedef Elf32_Ehdr Elf_Ehdr;
+typedef Elf32_Phdr Elf_Phdr;
+static const unsigned char ELFCLASS = ELFCLASS32;
+typedef Elf32_Off Elf_Off;
+#endif
+
 static void
-ReadDependentCB(const char *aDependentLib)
+preload(const char *file)
 {
+    union {
+        char buf[bufsize];
+        Elf_Ehdr ehdr;
+    } elf;
+    int fd = open(file, O_RDONLY);
+    if (fd < 0)
+        return;
+    // Read ELF header (ehdr) and program header table (phdr).
+    // We check that the ELF magic is found, that the ELF class matches
+    // our own, and that the program header table as defined in the ELF
+    // headers fits in the buffer we read.
+    if ((read(fd, elf.buf, bufsize) <= 0) ||
+        (memcmp(elf.buf, ELFMAG, 4)) ||
+        (elf.ehdr.e_ident[EI_CLASS] != ELFCLASS) ||
+        (elf.ehdr.e_phoff + elf.ehdr.e_phentsize * elf.ehdr.e_phnum >= bufsize)) {
+        close(fd);
+        return;
+    }
+    // The program header table contains segment definitions. One such
+    // segment type is PT_LOAD, which describes how the dynamic loader
+    // is going to map the file in memory. We use that information to
+    // find the biggest offset from the library that will be mapped in
+    // memory.
+    Elf_Phdr *phdr = (Elf_Phdr *)&elf.buf[elf.ehdr.e_phoff];
+    Elf_Off end = 0;
+    for (int phnum = elf.ehdr.e_phnum; phnum; phdr++, phnum--)
+        if ((phdr->p_type == PT_LOAD) &&
+            (end < phdr->p_offset + phdr->p_filesz))
+            end = phdr->p_offset + phdr->p_filesz;
+    // Let the kernel read ahead what the dynamic loader is going to
+    // map in memory soon after.
+    if (end > 0) {
+        readahead(fd, 0, end);
+    }
+    close(fd);
+}
+#endif
+
+static void
+ReadDependentCB(const char *aDependentLib, PRBool do_preload)
+{
+#ifdef LINUX
+    if (do_preload)
+        preload(aDependentLib);
+#endif
     void *libHandle = dlopen(aDependentLib, RTLD_GLOBAL | RTLD_LAZY);
     if (!libHandle)
         return;
 
     AppendDependentLib(libHandle);
 }
 
 nsresult
--- a/xpcom/glue/standalone/nsGlueLinkingOS2.cpp
+++ b/xpcom/glue/standalone/nsGlueLinkingOS2.cpp
@@ -63,17 +63,17 @@ AppendDependentLib(HMODULE libHandle)
 
     d->next = sTop;
     d->libHandle = libHandle;
 
     sTop = d;
 }
 
 static void
-ReadDependentCB(const char *aDependentLib)
+ReadDependentCB(const char *aDependentLib, PRBool do_preload)
 {
     CHAR pszError[_MAX_PATH];
     ULONG ulrc = NO_ERROR;
     HMODULE h;
 
     ulrc = DosLoadModule(pszError, _MAX_PATH, aDependentLib, &h);
 
     if (ulrc != NO_ERROR)
--- a/xpcom/glue/standalone/nsGlueLinkingOSX.cpp
+++ b/xpcom/glue/standalone/nsGlueLinkingOSX.cpp
@@ -16,16 +16,17 @@
  *
  * The Initial Developer of the Original Code is
  * Benjamin Smedberg <benjamin@smedbergs.us>
  *
  * Portions created by the Initial Developer are Copyright (C) 2005
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
+ *   Mike Hommey <mh@glandium.org>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -40,21 +41,141 @@
 #include "nsXPCOMGlue.h"
 
 #include <mach-o/dyld.h>
 #include <sys/param.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 
+#include <fcntl.h>
+#include <unistd.h>
+#include <mach/machine.h>
+#include <mach-o/fat.h>
+#include <mach-o/loader.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <limits.h>
+
+#if defined(__i386__)
+static const uint32_t CPU_TYPE = CPU_TYPE_X86;
+#elif defined(__x86_64__)
+static const uint32_t CPU_TYPE = CPU_TYPE_X86_64;
+#elif defined(__ppc__)
+static const uint32_t CPU_TYPE = CPU_TYPE_POWERPC;
+#elif defined(__ppc64__)
+static const uint32_t CPU_TYPE = CPU_TYPE_POWERPC64;
+#else
+#error Unsupported CPU type
+#endif
+
+#ifdef HAVE_64BIT_OS
+#undef LC_SEGMENT
+#define LC_SEGMENT LC_SEGMENT_64
+#undef MH_MAGIC
+#define MH_MAGIC MH_MAGIC_64
+#define cpu_mach_header mach_header_64
+#define segment_command segment_command_64
+#else
+#define cpu_mach_header mach_header
+#endif
+
+class ScopedMMap
+{
+public:
+    ScopedMMap(const char *file): buf(NULL) {
+        fd = open(file, O_RDONLY);
+        if (fd < 0)
+            return;
+        struct stat st;
+        if (fstat(fd, &st) < 0)
+            return;
+        size = st.st_size;
+        buf = (char *)mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    }
+    ~ScopedMMap() {
+        if (buf)
+            munmap(buf, size);
+        if (fd >= 0)
+            close(fd);
+    }
+    operator char *() { return buf; }
+    int getFd() { return fd; }
+private:
+    int fd;
+    char *buf;
+    size_t size;
+};
+
+static void
+preload(const char *file)
+{
+    ScopedMMap buf(file);
+    char *base = buf;
+    if (!base)
+        return;
+
+    // An OSX binary might either be a fat (universal) binary or a
+    // Mach-O binary. A fat binary actually embeds several Mach-O
+    // binaries. If we have a fat binary, find the offset where the
+    // Mach-O binary for our CPU type can be found.
+    struct fat_header *fh = (struct fat_header *)base;
+
+    if (OSSwapBigToHostInt32(fh->magic) == FAT_MAGIC) {
+        uint32_t nfat_arch = OSSwapBigToHostInt32(fh->nfat_arch);
+        struct fat_arch *arch = (struct fat_arch *)&buf[sizeof(struct fat_header)];
+        for (; nfat_arch; arch++, nfat_arch--) {
+            if (OSSwapBigToHostInt32(arch->cputype) == CPU_TYPE) {
+                base += OSSwapBigToHostInt32(arch->offset);
+                break;
+            }
+        }
+        if (base == buf)
+            return;
+    }
+
+    // Check Mach-O magic in the Mach header
+    struct cpu_mach_header *mh = (struct cpu_mach_header *)base;
+    if (mh->magic != MH_MAGIC)
+        return;
+
+    // The Mach header is followed by a sequence of load commands.
+    // Each command has a header containing the command type and the
+    // command size. LD_SEGMENT commands describes how the dynamic
+    // loader is going to map the file in memory. We use that
+    // information to find the biggest offset from the library that
+    // will be mapped in memory.
+    char *cmd = &base[sizeof(struct cpu_mach_header)];
+    off_t end = 0;
+    for (uint32_t ncmds = mh->ncmds; ncmds; ncmds--) {
+        struct segment_command *sh = (struct segment_command *)cmd;
+        if (sh->cmd != LC_SEGMENT)
+            continue;
+        if (end < sh->fileoff + sh->filesize)
+            end = sh->fileoff + sh->filesize;
+        cmd += sh->cmdsize;
+    }
+    // Let the kernel read ahead what the dynamic loader is going to
+    // map in memory soon after. The F_RDADVISE fcntl is equivalent
+    // to Linux' readahead() system call.
+    if (end > 0) {
+        struct radvisory ra;
+        ra.ra_offset = (base - buf);
+        ra.ra_count = end;
+        fcntl(buf.getFd(), F_RDADVISE, &ra);
+    }
+}
+
 static const mach_header* sXULLibImage;
 
 static void
-ReadDependentCB(const char *aDependentLib)
+ReadDependentCB(const char *aDependentLib, PRBool do_preload)
 {
+    if (do_preload)
+        preload(aDependentLib);
     (void) NSAddImage(aDependentLib,
                       NSADDIMAGE_OPTION_RETURN_ON_ERROR |
                       NSADDIMAGE_OPTION_MATCH_FILENAME_BY_INSTALLNAME);
 }
 
 static void*
 LookupSymbol(const mach_header* aLib, const char* aSymbolName)
 {
--- a/xpcom/glue/standalone/nsGlueLinkingWin.cpp
+++ b/xpcom/glue/standalone/nsGlueLinkingWin.cpp
@@ -65,21 +65,44 @@ AppendDependentLib(HINSTANCE libHandle)
 
     d->next = sTop;
     d->libHandle = libHandle;
 
     sTop = d;
 }
 
 static void
-ReadDependentCB(const char *aDependentLib)
+preload(LPCWSTR dll)
+{
+    HANDLE fd = CreateFileW(dll, GENERIC_READ, FILE_SHARE_READ,
+                            NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
+    char buf[64 * 1024];
+
+    if (fd == INVALID_HANDLE_VALUE)
+        return;
+  
+    DWORD dwBytesRead;
+    // Do dummy reads to trigger kernel-side readhead via FILE_FLAG_SEQUENTIAL_SCAN.
+    // Abort when underfilling because during testing the buffers are read fully
+    // A buffer that's not keeping up would imply that readahead isn't working right
+    while (ReadFile(fd, buf, sizeof(buf), &dwBytesRead, NULL) && dwBytesRead == sizeof(buf))
+        /* Nothing */;
+  
+    CloseHandle(fd);
+}
+
+static void
+ReadDependentCB(const char *aDependentLib, PRBool do_preload)
 {
     wchar_t wideDependentLib[MAX_PATH];
     MultiByteToWideChar(CP_UTF8, 0, aDependentLib, -1, wideDependentLib, MAX_PATH);
 
+    if (do_preload)
+        preload(wideDependentLib);
+
     HINSTANCE h =
         LoadLibraryExW(wideDependentLib, NULL, MOZ_LOADLIBRARY_FLAGS);
 
     if (!h)
         return;
 
     AppendDependentLib(h);
 }
--- a/xpcom/glue/standalone/nsXPCOMGlue.cpp
+++ b/xpcom/glue/standalone/nsXPCOMGlue.cpp
@@ -50,16 +50,23 @@
 #ifdef XP_WIN
 #include <windows.h>
 #include <mbstring.h>
 #include <malloc.h>
 #define snprintf _snprintf
 #endif
 
 static XPCOMFunctions xpcomFunctions;
+static PRBool do_preload = PR_FALSE;
+
+extern "C"
+void XPCOMGlueEnablePreload()
+{
+    do_preload = PR_TRUE;
+}
 
 extern "C"
 nsresult XPCOMGlueStartup(const char* xpcomFile)
 {
     xpcomFunctions.version = XPCOM_GLUE_VERSION;
     xpcomFunctions.size    = sizeof(XPCOMFunctions);
 
     GetFrozenFunctionsFunc func = nsnull;
@@ -123,17 +130,17 @@ XPCOMGlueLoadDependentLibs(const char *x
         // cut the trailing newline, if present
         if (buffer[l - 1] == '\n')
             buffer[l - 1] = '\0';
 
         char buffer2[MAXPATHLEN];
         snprintf(buffer2, sizeof(buffer2),
                  "%s" XPCOM_FILE_PATH_SEPARATOR "%s",
                  xpcomDir, buffer);
-        cb(buffer2);
+        cb(buffer2, do_preload);
     }
 
     fclose(flist);
 }
 
 extern "C"
 nsresult XPCOMGlueShutdown()
 {
--- a/xpcom/glue/standalone/nsXPCOMGlue.h
+++ b/xpcom/glue/standalone/nsXPCOMGlue.h
@@ -43,16 +43,22 @@
 
 #ifdef XPCOM_GLUE
 
 /**
  * The following functions are only available in the standalone glue.
  */
 
 /**
+ * Enabled preloading of dynamically loaded libraries
+ */
+extern "C" NS_HIDDEN_(void)
+XPCOMGlueEnablePreload();
+
+/**
  * Initialize the XPCOM glue by dynamically linking against the XPCOM
  * shared library indicated by xpcomFile.
  */
 extern "C" NS_HIDDEN_(nsresult)
 XPCOMGlueStartup(const char* xpcomFile);
 
 typedef void (*NSFuncPtr)();