Bug 683127 part 10 - Allow debug symbols to be found under gdb without extracted libraries. r=tglek,r=mwu
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 20 Jan 2012 09:49:03 +0100
changeset 84942 41c7ad654949db5393d22a95d8dd4d233d47f244
parent 84941 229140e62d7b49f34cb140a639d21f92aaa39d97
child 84943 27e66973c882c3d33e051b00bfec2e9eeb75d923
push idunknown
push userunknown
push dateunknown
reviewerstglek, mwu
bugs683127
milestone12.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 683127 part 10 - Allow debug symbols to be found under gdb without extracted libraries. r=tglek,r=mwu
mozglue/linker/CustomElf.cpp
mozglue/linker/CustomElf.h
mozglue/linker/ElfLoader.cpp
mozglue/linker/ElfLoader.h
mozglue/linker/Utils.h
--- a/mozglue/linker/CustomElf.cpp
+++ b/mozglue/linker/CustomElf.cpp
@@ -171,17 +171,17 @@ CustomElf::Load(Mappable *mappable, cons
   }
   if (!dyn) {
     log("%s: No PT_DYNAMIC segment found", elf->GetPath());
     return NULL;
   }
 
   /* Reserve enough memory to map the complete virtual address space for this
    * library. */
-  elf->base.Init(mmap(NULL, max_vaddr, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
+  elf->base.Assign(mmap(NULL, max_vaddr, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
                       -1, 0), max_vaddr);
   if (elf->base == MAP_FAILED) {
     log("%s: Failed to mmap", elf->GetPath());
     return NULL;
   }
 
   /* Load and initialize library */
   for (std::vector<const Phdr *>::iterator it = pt_loads.begin();
@@ -190,16 +190,21 @@ CustomElf::Load(Mappable *mappable, cons
       return NULL;
 
   /* We're not going to mmap anymore */
   mappable->finalize();
 
   report_mapping(const_cast<char *>(elf->GetName()), elf->base,
                  (max_vaddr + PAGE_SIZE - 1) & PAGE_MASK, 0);
 
+  elf->l_addr = elf->base;
+  elf->l_name = elf->GetPath();
+  elf->l_ld = elf->GetPtr<Dyn>(dyn->p_vaddr);
+  ElfLoader::Singleton.Register(elf);
+
   if (!elf->InitDyn(dyn))
     return NULL;
 
   debug("CustomElf::Load(\"%s\", %x) = %p", path, flags,
         static_cast<void *>(elf));
   return elf;
 }
 
@@ -208,16 +213,17 @@ CustomElf::~CustomElf()
   debug("CustomElf::~CustomElf(%p [\"%s\"])",
         reinterpret_cast<void *>(this), GetPath());
   CallFini();
   /* Normally, __cxa_finalize is called by the .fini function. However,
    * Android NDK before r6b doesn't do that. Our wrapped cxa_finalize only
    * calls destructors once, so call it in all cases. */
   ElfLoader::__wrap_cxa_finalize(this);
   delete mappable;
+  ElfLoader::Singleton.Forget(this);
 }
 
 namespace {
 
 /**
  * Hash function for symbol lookup, as defined in ELF standard for System V
  */
 unsigned long
--- a/mozglue/linker/CustomElf.h
+++ b/mozglue/linker/CustomElf.h
@@ -203,18 +203,19 @@ struct Rela: public Elf_(Rela)
 } /* namespace Elf */
 
 class Mappable;
 
 /**
  * Library Handle class for ELF libraries we don't let the system linker
  * handle.
  */
-class CustomElf: public LibHandle
+class CustomElf: public LibHandle, private ElfLoader::link_map
 {
+  friend class ElfLoader;
 public:
   /**
    * Returns a new CustomElf using the given file descriptor to map ELF
    * content. The file descriptor ownership is stolen, and it will be closed
    * in CustomElf's destructor if an instance is created, or by the Load
    * method otherwise. The path corresponds to the file descriptor, and flags
    * are the same kind of flags that would be given to dlopen(), though
    * currently, none are supported and the behaviour is more or less that of
--- a/mozglue/linker/ElfLoader.cpp
+++ b/mozglue/linker/ElfLoader.cpp
@@ -10,16 +10,24 @@
 #include <fcntl.h>
 #include "ElfLoader.h"
 #include "CustomElf.h"
 #include "Mappable.h"
 #include "Logging.h"
 
 using namespace mozilla;
 
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#ifndef PAGE_MASK
+#define PAGE_MASK (~ (PAGE_SIZE - 1))
+#endif
+
 /**
  * dlfcn.h replacements functions
  */
 
 void *
 __wrap_dlopen(const char *path, int flags)
 {
   RefPtr<LibHandle> handle = ElfLoader::Singleton.Load(path, flags);
@@ -87,17 +95,16 @@ LeafName(const char *path)
 
 } /* Anonymous namespace */
 
 /**
  * LibHandle
  */
 LibHandle::~LibHandle()
 {
-  ElfLoader::Singleton.Forget(this);
   free(path);
 }
 
 const char *
 LibHandle::GetName() const
 {
   return path ? LeafName(path) : NULL;
 }
@@ -113,28 +120,32 @@ SystemElf::Load(const char *path, int fl
   if (path && path[0] == '/' && (access(path, F_OK) == -1)){
     debug("dlopen(\"%s\", %x) = %p", path, flags, (void *)NULL);
     return NULL;
   }
 
   void *handle = dlopen(path, flags);
   debug("dlopen(\"%s\", %x) = %p", path, flags, handle);
   ElfLoader::Singleton.lastError = dlerror();
-  if (handle)
-    return new SystemElf(path, handle);
+  if (handle) {
+    SystemElf *elf = new SystemElf(path, handle);
+    ElfLoader::Singleton.Register(elf);
+    return elf;
+  }
   return NULL;
 }
 
 SystemElf::~SystemElf()
 {
   if (!dlhandle)
     return;
   debug("dlclose(%p [\"%s\"])", dlhandle, GetPath());
   dlclose(dlhandle);
   ElfLoader::Singleton.lastError = dlerror();
+  ElfLoader::Singleton.Forget(this);
 }
 
 void *
 SystemElf::GetSymbolPtr(const char *symbol) const
 {
   void *sym = dlsym(dlhandle, symbol);
   debug("dlsym(%p [\"%s\"], \"%s\") = %p", dlhandle, GetPath(), symbol, sym);
   ElfLoader::Singleton.lastError = dlerror();
@@ -151,17 +162,16 @@ ElfLoader ElfLoader::Singleton;
 TemporaryRef<LibHandle>
 ElfLoader::Load(const char *path, int flags, LibHandle *parent)
 {
   RefPtr<LibHandle> handle;
 
   /* Handle dlopen(NULL) directly. */
   if (!path) {
     handle = SystemElf::Load(NULL, flags);
-    handles.push_back(handle);
     return handle;
   }
 
   /* TODO: Handle relative paths correctly */
   const char *name = LeafName(path);
 
   /* Search the list of handles we already have for a match. When the given
    * path is not absolute, compare file names, otherwise compare full paths. */
@@ -234,41 +244,47 @@ ElfLoader::Load(const char *path, int fl
   if (!handle && abs_path)
     handle = SystemElf::Load(name, flags);
 
   delete [] abs_path;
   debug("ElfLoader::Load(\"%s\", 0x%x, %p [\"%s\"]) = %p", requested_path, flags,
         reinterpret_cast<void *>(parent), parent ? parent->GetPath() : "",
         static_cast<void *>(handle));
 
-  /* Bookkeeping */
-  if (handle)
-    handles.push_back(handle);
-
   return handle;
 }
 
 mozilla::TemporaryRef<LibHandle>
 ElfLoader::GetHandleByPtr(void *addr)
 {
   /* Scan the list of handles we already have for a match */
   for (LibHandleList::iterator it = handles.begin(); it < handles.end(); ++it) {
     if ((*it)->Contains(addr))
       return *it;
   }
   return NULL;
 }
 
 void
+ElfLoader::Register(LibHandle *handle)
+{
+  handles.push_back(handle);
+  if (dbg && !handle->IsSystemElf())
+    dbg->Add(static_cast<CustomElf *>(handle));
+}
+
+void
 ElfLoader::Forget(LibHandle *handle)
 {
   LibHandleList::iterator it = std::find(handles.begin(), handles.end(), handle);
   if (it != handles.end()) {
     debug("ElfLoader::Forget(%p [\"%s\"])", reinterpret_cast<void *>(handle),
                                             handle->GetPath());
+    if (dbg && !handle->IsSystemElf())
+      dbg->Remove(static_cast<CustomElf *>(handle));
     handles.erase(it);
   } else {
     debug("ElfLoader::Forget(%p [\"%s\"]): Handle not found",
           reinterpret_cast<void *>(handle), handle->GetPath());
   }
 }
 
 ElfLoader::~ElfLoader()
@@ -354,8 +370,199 @@ ElfLoader::DestructorCaller::Call()
 {
   if (destructor) {
     debug("ElfLoader::DestructorCaller::Call(%p, %p, %p)",
           FunctionPtr(destructor), object, dso_handle);
     destructor(object);
     destructor = NULL;
   }
 }
+
+void
+ElfLoader::InitDebugger()
+{
+  /* Find ELF auxiliary vectors.
+   *
+   * The kernel stores the following data on the stack when starting a
+   * program:
+   *   argc
+   *   argv[0] (pointer into argv strings defined below)
+   *   argv[1] (likewise)
+   *   ...
+   *   argv[argc - 1] (likewise)
+   *   NULL
+   *   envp[0] (pointer into environment strings defined below)
+   *   envp[1] (likewise)
+   *   ...
+   *   envp[n] (likewise)
+   *   NULL
+   *   auxv[0] (first ELF auxiliary vector)
+   *   auxv[1] (second ELF auxiliary vector)
+   *   ...
+   *   auxv[p] (last ELF auxiliary vector)
+   *   (AT_NULL, NULL)
+   *   padding
+   *   argv strings, separated with '\0'
+   *   environment strings, separated with '\0'
+   *   NULL
+   *
+   * What we are after are the auxv values defined by the following struct.
+   */
+  struct AuxVector {
+    Elf::Addr type;
+    Elf::Addr value;
+  };
+
+  /* Pointer to the environment variables list */
+  extern char **environ;
+
+  /* The environment may have changed since the program started, in which
+   * case the environ variables list isn't the list the kernel put on stack
+   * anymore. But in this new list, variables that didn't change still point
+   * to the strings the kernel put on stack. It is quite unlikely that two
+   * modified environment variables point to two consecutive strings in memory,
+   * so we assume that if two consecutive environment variables point to two
+   * consecutive strings, we found strings the kernel put on stack. */
+  char **env;
+  for (env = environ; *env; env++)
+    if (*env + strlen(*env) + 1 == env[1])
+      break;
+  if (!*env)
+    return;
+
+  /* Next, we scan the stack backwards to find a pointer to one of those
+   * strings we found above, which will give us the location of the original
+   * envp list. As we are looking for pointers, we need to look at 32-bits or
+   * 64-bits aligned values, depening on the architecture. */
+  char **scan = reinterpret_cast<char **>(
+                reinterpret_cast<uintptr_t>(*env) & ~(sizeof(void *) - 1));
+  while (*env != *scan)
+    scan--;
+
+  /* Finally, scan forward to find the last environment variable pointer and
+   * thus the first auxiliary vector. */
+  while (*scan++);
+  AuxVector *auxv = reinterpret_cast<AuxVector *>(scan);
+
+  /* The two values of interest in the auxiliary vectors are AT_PHDR and
+   * AT_PHNUM, which gives us the the location and size of the ELF program
+   * headers. */
+  Array<Elf::Phdr> phdrs;
+  char *base = NULL;
+  while (auxv->type) {
+    if (auxv->type == AT_PHDR) {
+      phdrs.Init(reinterpret_cast<Elf::Phdr*>(auxv->value));
+      /* Assume the base address is the first byte of the same page */
+      base = reinterpret_cast<char *>(auxv->value & PAGE_MASK);
+    }
+    if (auxv->type == AT_PHNUM)
+      phdrs.Init(auxv->value);
+    auxv++;
+  }
+
+  if (!phdrs) {
+    debug("Couldn't find program headers");
+    return;
+  }
+
+  /* In some cases, the address for the program headers we get from the
+   * auxiliary vectors is not mapped, because of the PT_LOAD segments
+   * definitions in the program executable. Trying to map anonymous memory
+   * with a hint giving the base address will return a different address
+   * if something is mapped there, and the base address otherwise. */
+  MappedPtr mem(mmap(base, PAGE_SIZE, PROT_NONE,
+                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), PAGE_SIZE);
+  if (mem == base) {
+    /* If program headers aren't mapped, try to map them */
+    int fd = open("/proc/self/exe", O_RDONLY);
+    if (fd == -1) {
+      debug("Failed to open /proc/self/exe");
+      return;
+    }
+    mem.Assign(mmap(base, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0), PAGE_SIZE);
+    /* If we don't manage to map at the right address, just give up. */
+    if (mem != base) {
+      debug("Couldn't read program headers");
+      return;
+    }
+  }
+  /* Sanity check: the first bytes at the base address should be an ELF
+   * header. */
+  if (!Elf::Ehdr::validate(base)) {
+     debug("Couldn't find program base");
+     return;
+  }
+
+  /* Search for the program PT_DYNAMIC segment */
+  Array<Elf::Dyn> dyns;
+  for (Array<Elf::Phdr>::iterator phdr = phdrs.begin(); phdr < phdrs.end();
+       ++phdr) {
+    /* While the program headers are expected within the first mapped page of
+     * the program executable, the executable PT_LOADs may actually make them
+     * loaded at an address that is not the wanted base address of the
+     * library. We thus need to adjust the base address, compensating for the
+     * virtual address of the PT_LOAD segment corresponding to offset 0. */
+    if (phdr->p_type == PT_LOAD && phdr->p_offset == 0)
+      base -= phdr->p_vaddr;
+    if (phdr->p_type == PT_DYNAMIC)
+      dyns.Init(base + phdr->p_vaddr, phdr->p_filesz);
+  }
+  if (!dyns) {
+    debug("Failed to find PT_DYNAMIC section in program");
+    return;
+  }
+
+  /* Search for the DT_DEBUG information */
+  for (Array<Elf::Dyn>::iterator dyn = dyns.begin(); dyn < dyns.end(); ++dyn) {
+    if (dyn->d_tag == DT_DEBUG) {
+      dbg = reinterpret_cast<r_debug *>(dyn->d_un.d_ptr);
+      break;
+    }
+  }
+  debug("DT_DEBUG points at %p", dbg);
+}
+
+/**
+ * The system linker maintains a doubly linked list of library it loads
+ * for use by the debugger. Unfortunately, it also uses the list pointers
+ * in a lot of operations and adding our data in the list is likely to
+ * trigger crashes when the linker tries to use data we don't provide or
+ * that fall off the amount data we allocated. Fortunately, the linker only
+ * traverses the list forward and accesses the head of the list from a
+ * private pointer instead of using the value in the r_debug structure.
+ * This means we can safely add members at the beginning of the list.
+ * Unfortunately, gdb checks the coherency of l_prev values, so we have
+ * to adjust the l_prev value for the first element the system linker
+ * knows about. Fortunately, it doesn't use l_prev, and the first element
+ * is not ever going to be released before our elements, since it is the
+ * program executable, so the system linker should not be changing
+ * r_debug::r_map.
+ */
+void
+ElfLoader::r_debug::Add(ElfLoader::link_map *map)
+{
+  if (!r_brk)
+    return;
+  r_state = RT_ADD;
+  r_brk();
+  map->l_prev = NULL;
+  map->l_next = r_map;
+  r_map->l_prev = map;
+  r_map = map;
+  r_state = RT_CONSISTENT;
+  r_brk();
+}
+
+void
+ElfLoader::r_debug::Remove(ElfLoader::link_map *map)
+{
+  if (!r_brk)
+    return;
+  r_state = RT_DELETE;
+  r_brk();
+  if (r_map == map)
+    r_map = map->l_next;
+  else
+    map->l_prev->l_next = map->l_next;
+  map->l_next->l_prev = map->l_prev;
+  r_state = RT_CONSISTENT;
+  r_brk();
+}
--- a/mozglue/linker/ElfLoader.h
+++ b/mozglue/linker/ElfLoader.h
@@ -200,31 +200,36 @@ public:
    * its virtual address space, i.e. the library handle for which
    * LibHandle::Contains returns true. Its purpose is to allow to
    * implement dladdr().
    */
   mozilla::TemporaryRef<LibHandle> GetHandleByPtr(void *addr);
 
 protected:
   /**
+   * Registers the given handle. This method is meant to be called by
+   * LibHandle subclass creators.
+   */
+  void Register(LibHandle *handle);
+
+  /**
    * Forget about the given handle. This method is meant to be called by
-   * the LibHandle destructor.
+   * LibHandle subclass destructors.
    */
-  friend LibHandle::~LibHandle();
   void Forget(LibHandle *handle);
 
   /* Last error. Used for dlerror() */
   friend class SystemElf;
   friend const char *__wrap_dlerror(void);
   friend void *__wrap_dlsym(void *handle, const char *symbol);
   friend int __wrap_dlclose(void *handle);
   const char *lastError;
 
 private:
-  ElfLoader() { }
+  ElfLoader() { InitDebugger(); }
   ~ElfLoader();
 
   /* Bookkeeping */
   typedef std::vector<LibHandle *> LibHandleList;
   LibHandleList handles;
 
 protected:
   friend class CustomElf;
@@ -287,11 +292,63 @@ protected:
   };
 
 private:
   /* Keep track of all registered destructors */
   std::vector<DestructorCaller> destructors;
 
   /* Keep track of Zips used for library loading */
   ZipCollection zips;
+
+public:
+  /* Loaded object descriptor for the debugger interface below*/
+  struct link_map {
+    /* Base address of the loaded object. */
+    const void *l_addr;
+    /* File name */
+    const char *l_name;
+    /* Address of the PT_DYNAMIC segment. */
+    const void *l_ld;
+    /* Double linked list of loaded objects. */
+    link_map *l_next, *l_prev;
+  };
+
+private:
+  /* Data structure used by the linker to give details about shared objects it
+   * loaded to debuggers. This is normally defined in link.h, but Android
+   * headers lack this file. This also gives the opportunity to make it C++. */
+  class r_debug {
+  public:
+    /* Make the debugger aware of a new loaded object */
+    void Add(link_map *map);
+
+    /* Make the debugger aware of the unloading of an object */
+    void Remove(link_map *map);
+
+  private:
+    /* Version number of the protocol. */
+    int r_version;
+
+    /* Head of the linked list of loaded objects. */
+    struct link_map *r_map;
+
+    /* Function to be called when updates to the linked list of loaded objects
+     * are going to occur. The function is to be called before and after
+     * changes. */
+    void (*r_brk)(void);
+
+    /* Indicates to the debugger what state the linked list of loaded objects
+     * is in when the function above is called. */
+    enum {
+      RT_CONSISTENT, /* Changes are complete */
+      RT_ADD,        /* Beginning to add a new object */
+      RT_DELETE      /* Beginning to remove an object */
+    } r_state;
+  };
+  r_debug *dbg;
+
+  /**
+   * Initializes the pointer to the debugger data structure.
+   */
+  void InitDebugger();
 };
 
 #endif /* ElfLoader_h */
--- a/mozglue/linker/Utils.h
+++ b/mozglue/linker/Utils.h
@@ -178,17 +178,19 @@ AUTOCLEAN_TEMPLATE(AutoDeleteArray, Auto
  */
 template <typename T>
 class GenericMappedPtr
 {
 public:
   GenericMappedPtr(void *buf, size_t length): buf(buf), length(length) { }
   GenericMappedPtr(): buf(MAP_FAILED), length(0) { }
 
-  void Init(void *b, size_t len) {
+  void Assign(void *b, size_t len) {
+    if (buf != MAP_FAILED)
+      static_cast<T *>(this)->munmap(buf, length);
     buf = b;
     length = len;
   }
 
   ~GenericMappedPtr()
   {
     if (buf != MAP_FAILED)
       static_cast<T *>(this)->munmap(buf, length);