Bug 683127 part 10 - Allow debug symbols to be found under gdb without extracted libraries. r=tglek,r=mwu
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 20 Jan 2012 09:49:03 +0100
changeset 84980 41c7ad654949db5393d22a95d8dd4d233d47f244
parent 84979 229140e62d7b49f34cb140a639d21f92aaa39d97
child 84981 27e66973c882c3d33e051b00bfec2e9eeb75d923
push id459
push userrcampbell@mozilla.com
push dateSat, 21 Jan 2012 15:34:19 +0000
treeherderfx-team@d43360499b86 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstglek, mwu
bugs683127
milestone12.0a1
Bug 683127 part 10 - Allow debug symbols to be found under gdb without extracted libraries. r=tglek,r=mwu
mozglue/linker/CustomElf.cpp
mozglue/linker/CustomElf.h
mozglue/linker/ElfLoader.cpp
mozglue/linker/ElfLoader.h
mozglue/linker/Utils.h
--- a/mozglue/linker/CustomElf.cpp
+++ b/mozglue/linker/CustomElf.cpp
@@ -171,17 +171,17 @@ CustomElf::Load(Mappable *mappable, cons
   }
   if (!dyn) {
     log("%s: No PT_DYNAMIC segment found", elf->GetPath());
     return NULL;
   }
 
   /* Reserve enough memory to map the complete virtual address space for this
    * library. */
-  elf->base.Init(mmap(NULL, max_vaddr, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
+  elf->base.Assign(mmap(NULL, max_vaddr, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
                       -1, 0), max_vaddr);
   if (elf->base == MAP_FAILED) {
     log("%s: Failed to mmap", elf->GetPath());
     return NULL;
   }
 
   /* Load and initialize library */
   for (std::vector<const Phdr *>::iterator it = pt_loads.begin();
@@ -190,16 +190,21 @@ CustomElf::Load(Mappable *mappable, cons
       return NULL;
 
   /* We're not going to mmap anymore */
   mappable->finalize();
 
   report_mapping(const_cast<char *>(elf->GetName()), elf->base,
                  (max_vaddr + PAGE_SIZE - 1) & PAGE_MASK, 0);
 
+  elf->l_addr = elf->base;
+  elf->l_name = elf->GetPath();
+  elf->l_ld = elf->GetPtr<Dyn>(dyn->p_vaddr);
+  ElfLoader::Singleton.Register(elf);
+
   if (!elf->InitDyn(dyn))
     return NULL;
 
   debug("CustomElf::Load(\"%s\", %x) = %p", path, flags,
         static_cast<void *>(elf));
   return elf;
 }
 
@@ -208,16 +213,17 @@ CustomElf::~CustomElf()
   debug("CustomElf::~CustomElf(%p [\"%s\"])",
         reinterpret_cast<void *>(this), GetPath());
   CallFini();
   /* Normally, __cxa_finalize is called by the .fini function. However,
    * Android NDK before r6b doesn't do that. Our wrapped cxa_finalize only
    * calls destructors once, so call it in all cases. */
   ElfLoader::__wrap_cxa_finalize(this);
   delete mappable;
+  ElfLoader::Singleton.Forget(this);
 }
 
 namespace {
 
 /**
  * Hash function for symbol lookup, as defined in ELF standard for System V
  */
 unsigned long
--- a/mozglue/linker/CustomElf.h
+++ b/mozglue/linker/CustomElf.h
@@ -203,18 +203,19 @@ struct Rela: public Elf_(Rela)
 } /* namespace Elf */
 
 class Mappable;
 
 /**
  * Library Handle class for ELF libraries we don't let the system linker
  * handle.
  */
-class CustomElf: public LibHandle
+class CustomElf: public LibHandle, private ElfLoader::link_map
 {
+  friend class ElfLoader;
 public:
   /**
    * Returns a new CustomElf using the given file descriptor to map ELF
    * content. The file descriptor ownership is stolen, and it will be closed
    * in CustomElf's destructor if an instance is created, or by the Load
    * method otherwise. The path corresponds to the file descriptor, and flags
    * are the same kind of flags that would be given to dlopen(), though
    * currently, none are supported and the behaviour is more or less that of
--- a/mozglue/linker/ElfLoader.cpp
+++ b/mozglue/linker/ElfLoader.cpp
@@ -10,16 +10,24 @@
 #include <fcntl.h>
 #include "ElfLoader.h"
 #include "CustomElf.h"
 #include "Mappable.h"
 #include "Logging.h"
 
 using namespace mozilla;
 
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#ifndef PAGE_MASK
+#define PAGE_MASK (~ (PAGE_SIZE - 1))
+#endif
+
 /**
  * dlfcn.h replacements functions
  */
 
 void *
 __wrap_dlopen(const char *path, int flags)
 {
   RefPtr<LibHandle> handle = ElfLoader::Singleton.Load(path, flags);
@@ -87,17 +95,16 @@ LeafName(const char *path)
 
 } /* Anonymous namespace */
 
 /**
  * LibHandle
  */
 LibHandle::~LibHandle()
 {
-  ElfLoader::Singleton.Forget(this);
   free(path);
 }
 
 const char *
 LibHandle::GetName() const
 {
   return path ? LeafName(path) : NULL;
 }
@@ -113,28 +120,32 @@ SystemElf::Load(const char *path, int fl
   if (path && path[0] == '/' && (access(path, F_OK) == -1)){
     debug("dlopen(\"%s\", %x) = %p", path, flags, (void *)NULL);
     return NULL;
   }
 
   void *handle = dlopen(path, flags);
   debug("dlopen(\"%s\", %x) = %p", path, flags, handle);
   ElfLoader::Singleton.lastError = dlerror();
-  if (handle)
-    return new SystemElf(path, handle);
+  if (handle) {
+    SystemElf *elf = new SystemElf(path, handle);
+    ElfLoader::Singleton.Register(elf);
+    return elf;
+  }
   return NULL;
 }
 
 SystemElf::~SystemElf()
 {
   if (!dlhandle)
     return;
   debug("dlclose(%p [\"%s\"])", dlhandle, GetPath());
   dlclose(dlhandle);
   ElfLoader::Singleton.lastError = dlerror();
+  ElfLoader::Singleton.Forget(this);
 }
 
 void *
 SystemElf::GetSymbolPtr(const char *symbol) const
 {
   void *sym = dlsym(dlhandle, symbol);
   debug("dlsym(%p [\"%s\"], \"%s\") = %p", dlhandle, GetPath(), symbol, sym);
   ElfLoader::Singleton.lastError = dlerror();
@@ -151,17 +162,16 @@ ElfLoader ElfLoader::Singleton;
 TemporaryRef<LibHandle>
 ElfLoader::Load(const char *path, int flags, LibHandle *parent)
 {
   RefPtr<LibHandle> handle;
 
   /* Handle dlopen(NULL) directly. */
   if (!path) {
     handle = SystemElf::Load(NULL, flags);
-    handles.push_back(handle);
     return handle;
   }
 
   /* TODO: Handle relative paths correctly */
   const char *name = LeafName(path);
 
   /* Search the list of handles we already have for a match. When the given
    * path is not absolute, compare file names, otherwise compare full paths. */
@@ -234,41 +244,47 @@ ElfLoader::Load(const char *path, int fl
   if (!handle && abs_path)
     handle = SystemElf::Load(name, flags);
 
   delete [] abs_path;
   debug("ElfLoader::Load(\"%s\", 0x%x, %p [\"%s\"]) = %p", requested_path, flags,
         reinterpret_cast<void *>(parent), parent ? parent->GetPath() : "",
         static_cast<void *>(handle));
 
-  /* Bookkeeping */
-  if (handle)
-    handles.push_back(handle);
-
   return handle;
 }
 
 mozilla::TemporaryRef<LibHandle>
 ElfLoader::GetHandleByPtr(void *addr)
 {
   /* Scan the list of handles we already have for a match */
   for (LibHandleList::iterator it = handles.begin(); it < handles.end(); ++it) {
     if ((*it)->Contains(addr))
       return *it;
   }
   return NULL;
 }
 
 void
+ElfLoader::Register(LibHandle *handle)
+{
+  handles.push_back(handle);
+  if (dbg && !handle->IsSystemElf())
+    dbg->Add(static_cast<CustomElf *>(handle));
+}
+
+void
 ElfLoader::Forget(LibHandle *handle)
 {
   LibHandleList::iterator it = std::find(handles.begin(), handles.end(), handle);
   if (it != handles.end()) {
     debug("ElfLoader::Forget(%p [\"%s\"])", reinterpret_cast<void *>(handle),
                                             handle->GetPath());
+    if (dbg && !handle->IsSystemElf())
+      dbg->Remove(static_cast<CustomElf *>(handle));
     handles.erase(it);
   } else {
     debug("ElfLoader::Forget(%p [\"%s\"]): Handle not found",
           reinterpret_cast<void *>(handle), handle->GetPath());
   }
 }
 
 ElfLoader::~ElfLoader()
@@ -354,8 +370,199 @@ ElfLoader::DestructorCaller::Call()
 {
   if (destructor) {
     debug("ElfLoader::DestructorCaller::Call(%p, %p, %p)",
           FunctionPtr(destructor), object, dso_handle);
     destructor(object);
     destructor = NULL;
   }
 }
+
+void
+ElfLoader::InitDebugger()
+{
+  /* Find ELF auxiliary vectors.
+   *
+   * The kernel stores the following data on the stack when starting a
+   * program:
+   *   argc
+   *   argv[0] (pointer into argv strings defined below)
+   *   argv[1] (likewise)
+   *   ...
+   *   argv[argc - 1] (likewise)
+   *   NULL
+   *   envp[0] (pointer into environment strings defined below)
+   *   envp[1] (likewise)
+   *   ...
+   *   envp[n] (likewise)
+   *   NULL
+   *   auxv[0] (first ELF auxiliary vector)
+   *   auxv[1] (second ELF auxiliary vector)
+   *   ...
+   *   auxv[p] (last ELF auxiliary vector)
+   *   (AT_NULL, NULL)
+   *   padding
+   *   argv strings, separated with '\0'
+   *   environment strings, separated with '\0'
+   *   NULL
+   *
+   * What we are after are the auxv values defined by the following struct.
+   */
+  struct AuxVector {
+    Elf::Addr type;
+    Elf::Addr value;
+  };
+
+  /* Pointer to the environment variables list */
+  extern char **environ;
+
+  /* The environment may have changed since the program started, in which
+   * case the environ variables list isn't the list the kernel put on stack
+   * anymore. But in this new list, variables that didn't change still point
+   * to the strings the kernel put on stack. It is quite unlikely that two
+   * modified environment variables point to two consecutive strings in memory,
+   * so we assume that if two consecutive environment variables point to two
+   * consecutive strings, we found strings the kernel put on stack. */
+  char **env;
+  for (env = environ; *env; env++)
+    if (*env + strlen(*env) + 1 == env[1])
+      break;
+  if (!*env)
+    return;
+
+  /* Next, we scan the stack backwards to find a pointer to one of those
+   * strings we found above, which will give us the location of the original
+   * envp list. As we are looking for pointers, we need to look at 32-bits or
+   * 64-bits aligned values, depening on the architecture. */
+  char **scan = reinterpret_cast<char **>(
+                reinterpret_cast<uintptr_t>(*env) & ~(sizeof(void *) - 1));
+  while (*env != *scan)
+    scan--;
+
+  /* Finally, scan forward to find the last environment variable pointer and
+   * thus the first auxiliary vector. */
+  while (*scan++);
+  AuxVector *auxv = reinterpret_cast<AuxVector *>(scan);
+
+  /* The two values of interest in the auxiliary vectors are AT_PHDR and
+   * AT_PHNUM, which gives us the the location and size of the ELF program
+   * headers. */
+  Array<Elf::Phdr> phdrs;
+  char *base = NULL;
+  while (auxv->type) {
+    if (auxv->type == AT_PHDR) {
+      phdrs.Init(reinterpret_cast<Elf::Phdr*>(auxv->value));
+      /* Assume the base address is the first byte of the same page */
+      base = reinterpret_cast<char *>(auxv->value & PAGE_MASK);
+    }
+    if (auxv->type == AT_PHNUM)
+      phdrs.Init(auxv->value);
+    auxv++;
+  }
+
+  if (!phdrs) {
+    debug("Couldn't find program headers");
+    return;
+  }
+
+  /* In some cases, the address for the program headers we get from the
+   * auxiliary vectors is not mapped, because of the PT_LOAD segments
+   * definitions in the program executable. Trying to map anonymous memory
+   * with a hint giving the base address will return a different address
+   * if something is mapped there, and the base address otherwise. */
+  MappedPtr mem(mmap(base, PAGE_SIZE, PROT_NONE,
+                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), PAGE_SIZE);
+  if (mem == base) {
+    /* If program headers aren't mapped, try to map them */
+    int fd = open("/proc/self/exe", O_RDONLY);
+    if (fd == -1) {
+      debug("Failed to open /proc/self/exe");
+      return;
+    }
+    mem.Assign(mmap(base, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0), PAGE_SIZE);
+    /* If we don't manage to map at the right address, just give up. */
+    if (mem != base) {
+      debug("Couldn't read program headers");
+      return;
+    }
+  }
+  /* Sanity check: the first bytes at the base address should be an ELF
+   * header. */
+  if (!Elf::Ehdr::validate(base)) {
+     debug("Couldn't find program base");
+     return;
+  }
+
+  /* Search for the program PT_DYNAMIC segment */
+  Array<Elf::Dyn> dyns;
+  for (Array<Elf::Phdr>::iterator phdr = phdrs.begin(); phdr < phdrs.end();
+       ++phdr) {
+    /* While the program headers are expected within the first mapped page of
+     * the program executable, the executable PT_LOADs may actually make them
+     * loaded at an address that is not the wanted base address of the
+     * library. We thus need to adjust the base address, compensating for the
+     * virtual address of the PT_LOAD segment corresponding to offset 0. */
+    if (phdr->p_type == PT_LOAD && phdr->p_offset == 0)
+      base -= phdr->p_vaddr;
+    if (phdr->p_type == PT_DYNAMIC)
+      dyns.Init(base + phdr->p_vaddr, phdr->p_filesz);
+  }
+  if (!dyns) {
+    debug("Failed to find PT_DYNAMIC section in program");
+    return;
+  }
+
+  /* Search for the DT_DEBUG information */
+  for (Array<Elf::Dyn>::iterator dyn = dyns.begin(); dyn < dyns.end(); ++dyn) {
+    if (dyn->d_tag == DT_DEBUG) {
+      dbg = reinterpret_cast<r_debug *>(dyn->d_un.d_ptr);
+      break;
+    }
+  }
+  debug("DT_DEBUG points at %p", dbg);
+}
+
+/**
+ * The system linker maintains a doubly linked list of library it loads
+ * for use by the debugger. Unfortunately, it also uses the list pointers
+ * in a lot of operations and adding our data in the list is likely to
+ * trigger crashes when the linker tries to use data we don't provide or
+ * that fall off the amount data we allocated. Fortunately, the linker only
+ * traverses the list forward and accesses the head of the list from a
+ * private pointer instead of using the value in the r_debug structure.
+ * This means we can safely add members at the beginning of the list.
+ * Unfortunately, gdb checks the coherency of l_prev values, so we have
+ * to adjust the l_prev value for the first element the system linker
+ * knows about. Fortunately, it doesn't use l_prev, and the first element
+ * is not ever going to be released before our elements, since it is the
+ * program executable, so the system linker should not be changing
+ * r_debug::r_map.
+ */
+void
+ElfLoader::r_debug::Add(ElfLoader::link_map *map)
+{
+  if (!r_brk)
+    return;
+  r_state = RT_ADD;
+  r_brk();
+  map->l_prev = NULL;
+  map->l_next = r_map;
+  r_map->l_prev = map;
+  r_map = map;
+  r_state = RT_CONSISTENT;
+  r_brk();
+}
+
+void
+ElfLoader::r_debug::Remove(ElfLoader::link_map *map)
+{
+  if (!r_brk)
+    return;
+  r_state = RT_DELETE;
+  r_brk();
+  if (r_map == map)
+    r_map = map->l_next;
+  else
+    map->l_prev->l_next = map->l_next;
+  map->l_next->l_prev = map->l_prev;
+  r_state = RT_CONSISTENT;
+  r_brk();
+}
--- a/mozglue/linker/ElfLoader.h
+++ b/mozglue/linker/ElfLoader.h
@@ -200,31 +200,36 @@ public:
    * its virtual address space, i.e. the library handle for which
    * LibHandle::Contains returns true. Its purpose is to allow to
    * implement dladdr().
    */
   mozilla::TemporaryRef<LibHandle> GetHandleByPtr(void *addr);
 
 protected:
   /**
+   * Registers the given handle. This method is meant to be called by
+   * LibHandle subclass creators.
+   */
+  void Register(LibHandle *handle);
+
+  /**
    * Forget about the given handle. This method is meant to be called by
-   * the LibHandle destructor.
+   * LibHandle subclass destructors.
    */
-  friend LibHandle::~LibHandle();
   void Forget(LibHandle *handle);
 
   /* Last error. Used for dlerror() */
   friend class SystemElf;
   friend const char *__wrap_dlerror(void);
   friend void *__wrap_dlsym(void *handle, const char *symbol);
   friend int __wrap_dlclose(void *handle);
   const char *lastError;
 
 private:
-  ElfLoader() { }
+  ElfLoader() { InitDebugger(); }
   ~ElfLoader();
 
   /* Bookkeeping */
   typedef std::vector<LibHandle *> LibHandleList;
   LibHandleList handles;
 
 protected:
   friend class CustomElf;
@@ -287,11 +292,63 @@ protected:
   };
 
 private:
   /* Keep track of all registered destructors */
   std::vector<DestructorCaller> destructors;
 
   /* Keep track of Zips used for library loading */
   ZipCollection zips;
+
+public:
+  /* Loaded object descriptor for the debugger interface below*/
+  struct link_map {
+    /* Base address of the loaded object. */
+    const void *l_addr;
+    /* File name */
+    const char *l_name;
+    /* Address of the PT_DYNAMIC segment. */
+    const void *l_ld;
+    /* Double linked list of loaded objects. */
+    link_map *l_next, *l_prev;
+  };
+
+private:
+  /* Data structure used by the linker to give details about shared objects it
+   * loaded to debuggers. This is normally defined in link.h, but Android
+   * headers lack this file. This also gives the opportunity to make it C++. */
+  class r_debug {
+  public:
+    /* Make the debugger aware of a new loaded object */
+    void Add(link_map *map);
+
+    /* Make the debugger aware of the unloading of an object */
+    void Remove(link_map *map);
+
+  private:
+    /* Version number of the protocol. */
+    int r_version;
+
+    /* Head of the linked list of loaded objects. */
+    struct link_map *r_map;
+
+    /* Function to be called when updates to the linked list of loaded objects
+     * are going to occur. The function is to be called before and after
+     * changes. */
+    void (*r_brk)(void);
+
+    /* Indicates to the debugger what state the linked list of loaded objects
+     * is in when the function above is called. */
+    enum {
+      RT_CONSISTENT, /* Changes are complete */
+      RT_ADD,        /* Beginning to add a new object */
+      RT_DELETE      /* Beginning to remove an object */
+    } r_state;
+  };
+  r_debug *dbg;
+
+  /**
+   * Initializes the pointer to the debugger data structure.
+   */
+  void InitDebugger();
 };
 
 #endif /* ElfLoader_h */
--- a/mozglue/linker/Utils.h
+++ b/mozglue/linker/Utils.h
@@ -178,17 +178,19 @@ AUTOCLEAN_TEMPLATE(AutoDeleteArray, Auto
  */
 template <typename T>
 class GenericMappedPtr
 {
 public:
   GenericMappedPtr(void *buf, size_t length): buf(buf), length(length) { }
   GenericMappedPtr(): buf(MAP_FAILED), length(0) { }
 
-  void Init(void *b, size_t len) {
+  void Assign(void *b, size_t len) {
+    if (buf != MAP_FAILED)
+      static_cast<T *>(this)->munmap(buf, length);
     buf = b;
     length = len;
   }
 
   ~GenericMappedPtr()
   {
     if (buf != MAP_FAILED)
       static_cast<T *>(this)->munmap(buf, length);