elflog.cpp
author Taras Glek <tglek@mozilla.com>
Mon, 05 Apr 2010 16:46:47 -0700
changeset 7 6453ad2a7906
parent 6 d42d3c4bac01
child 8 6eda66f8be5f
permissions -rw-r--r--
Don't read in symbols when operating on sections alone
/*
  http://em386.blogspot.com

  You may not use this code in a product,
  but feel free to study it and rewrite it
  in your own way

  This code is an example of how to use the
  libelf library for reading ELF objects.

  gcc -o libelf-howto libelf-howto.c -lelf
*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <libelf.h>
#include <gelf.h>
#include <map>
#include <vector>
#include <algorithm>
#include <fstream>
#include <string>

using namespace std;
#define ERR -1

static char const *st_info_bind_str(unsigned char st_info) {
  // type of symbol binding
  switch(ELF32_ST_BIND(st_info))
    {
    case STB_LOCAL: return("LOCAL"); 
    case STB_GLOBAL: return("GLOBAL"); 
    case STB_WEAK: return("WEAK"); 
    case STB_NUM: return("NUM"); 
    case STB_LOOS: return("LOOS"); 
    case STB_HIOS: return("HIOS"); 
    case STB_LOPROC: return("LOPROC"); 
    case STB_HIPROC: return("HIPROC"); 
    default: return("UNKNOWN"); 
    }
}

static char const *st_info_type_str(unsigned char st_info) {
  // type of symbol
  switch(ELF32_ST_TYPE(st_info))
    {
    case STT_NOTYPE: return("NOTYPE");
    case STT_OBJECT: return("OBJECT");
    case STT_FUNC:  return("FUNC");
    case STT_SECTION: return("SECTION");
    case STT_FILE: return("FILE");
    case STT_COMMON: return("COMMON");
    case STT_TLS: return("TLS");
    case STT_NUM: return("NUM");
    case STT_LOOS: return("LOOS");
    case STT_HIOS: return("HIOS");
    case STT_LOPROC: return("LOPROC");
    case STT_HIPROC: return("HIPROC");
    default: return("UNKNOWN");
    }
}
static char const *sh_type_str(Elf64_Word sh_type) {
  switch(sh_type)
    {
    case SHT_NULL: return( "SHT_NULL\t");               
    case SHT_PROGBITS: return( "SHT_PROGBITS");       
    case SHT_SYMTAB: return( "SHT_SYMTAB");           
    case SHT_STRTAB: return( "SHT_STRTAB");           
    case SHT_RELA: return( "SHT_RELA\t");               
    case SHT_HASH: return( "SHT_HASH\t");               
    case SHT_DYNAMIC: return( "SHT_DYNAMIC");         
    case SHT_NOTE: return( "SHT_NOTE\t");               
    case SHT_NOBITS: return( "SHT_NOBITS");           
    case SHT_REL: return( "SHT_REL\t");                 
    case SHT_SHLIB: return( "SHT_SHLIB");             
    case SHT_DYNSYM: return( "SHT_DYNSYM");           
    case SHT_INIT_ARRAY: return( "SHT_INIT_ARRAY");   
    case SHT_FINI_ARRAY: return( "SHT_FINI_ARRAY");   
    case SHT_PREINIT_ARRAY: return( "SHT_PREINIT_ARRAY"); 
    case SHT_GROUP: return( "SHT_GROUP");             
    case SHT_SYMTAB_SHNDX: return( "SHT_SYMTAB_SHNDX"); 
    case SHT_NUM: return( "SHT_NUM\t");                 
    case SHT_LOOS: return( "SHT_LOOS\t");               
    case SHT_GNU_verdef: return( "SHT_GNU_verdef");   
    case SHT_GNU_verneed: return( "SHT_VERNEED");     
    case SHT_GNU_versym: return( "SHT_GNU_versym");   
    default: return( "(none) ");                      
    }
}

typedef vector<pair<int, int> > iovector;
static void parselog(iovector &log, char const *filename) {
  fstream f;
  string line;
  for (f.open(filename); !f.eof(); ) {
    if (!f.is_open()) {
      fprintf(stderr, "Could not open log file. Does %s exist?\n", filename);
      exit(1);
    }
    getline(f, line);
    if (!line.size())
      continue;
    if (strncmp(line.c_str(), "INSERT ", 7) != 0) {
      continue;
    }
    char *str = strchr((char*)line.c_str(), ',');
    str = strchr(str + 1, ',') + 2;
    long offset = strtol(str, &str, 10);
    long size = strtol(str + 2, NULL, 10);
    log.push_back(pair<int,int>(offset, size));
  }
}
#ifdef __x86_64__
GElf_Ehdr 
#else 
Elf32_Ehdr
#endif
* elf_header;		/* ELF header */


Elf_Data *edata;                /* Data Descriptor */
GElf_Shdr shdr;                 /* Section Header */

struct Symbol {
  size_t offset;
  size_t length;
  char const *name;
  unsigned char st_info;
  Symbol(char const *name, size_t offset, size_t length, unsigned char st_info):
    offset(offset), length(length), name(name), st_info (st_info) 
  {
  }

  Symbol() {
  }

  bool operator<(const Symbol &rhs) const {
    return offset < rhs.offset; 
  }
};

static void read_symbols(vector<Symbol> &symbols,   vector<GElf_Phdr> &phdrs, Elf *elf) { 
  Elf_Scn *scn = NULL;       /* Section Descriptor */
  while((scn = elf_nextscn(elf, scn)) != NULL)
    {
      gelf_getshdr(scn, &shdr);
      //printf("section type %s %d\n", sh_type_str(shdr.sh_type), shdr.sh_type);
      // When we find a section header marked SHT_SYMTAB stop and get symbols
      if(shdr.sh_type == SHT_SYMTAB)
        {
          // edata points to our symbol table
          edata = elf_getdata(scn, edata);

          // how many symbols are there? this number comes from the size of
          // the section divided by the entry size
          int symbol_count = shdr.sh_size / shdr.sh_entsize;

          // loop through to grab all symbols
          for(int i = 0; i < symbol_count; i++)
            {			
              GElf_Sym sym;			/* Symbol */
              // libelf grabs the symbol data using gelf_getsym()
              gelf_getsym(edata, i, &sym);
              // skip things that occupy no space
              if (!sym.st_size)
                continue;
              GElf_Phdr *phdr = NULL;
              for (unsigned int j = 0;j < phdrs.size();j++) {
                GElf_Phdr &j_phdr = phdrs[j]; 
                if (sym.st_value > j_phdr.p_vaddr && sym.st_value < j_phdr.p_vaddr + j_phdr.p_memsz) {
                  phdr = &phdrs[j];
                  break;
                }
              }
              if (!phdr) {
                // print out the value and size
                fprintf(stderr,"Unknown segment for %s: %Lx %Ld %s %s\n", elf_strptr(elf, shdr.sh_link, sym.st_name),
                        sym.st_value, sym.st_size, st_info_bind_str(sym.st_info),  st_info_type_str(sym.st_info));
                continue;
              } else {
                size_t offset = sym.st_value - phdr->p_vaddr + phdr->p_offset; /* convert mem offset to file offset */
                const char *name = elf_strptr(elf, shdr.sh_link, sym.st_name);
                symbols.push_back(Symbol(name, offset,
                                         sym.st_size, sym.st_info));
              }
            }
	}
    }
  sort(symbols.begin(), symbols.end());
  vector<Symbol>::iterator last = symbols.end();
  int erased = 0;
  int i = 0;
  for(vector<Symbol>::iterator it = symbols.begin();it != symbols.end();it++) {
    //    if (!strcmp(it->name, "_ZN13EmbedProgressC2Ev"))
     
    if (last != symbols.end() && last->offset == it->offset) {
      // erase the later symbol
      vector<Symbol>::iterator looser, survivor;
      if (strcmp(last->name, it->name) > 0) {
        looser = it;
        survivor = last;
      } else {
        survivor = it;
        looser = last;
      }
      // There is a high probability that .somesection.survivor is what the section is called
      //fprintf(stderr, "DUPLICATE of %s is %s\n", survivor->name, looser->name);
      symbols.erase(looser);
      it = last;
      erased++;
      continue;
    }
    last = it;
  }
  fprintf(stderr, "Skipped %d duplicate symbols(ie virtual constructors and other junk)\n", erased);
}

struct Section {
  long offset;
  long length;
  char const *name;
  Section(long offset, long length, char const *name) :
    offset(offset), length(length), name(name)
  {
    
  }
  Section() {
  }
  bool operator<(const Section &rhs) const {
    return offset < rhs.offset; 
  }
};

static void read_sections(vector<Section> &sections, Elf *elf) {
  /* Iterate through section headers */
  Elf_Scn *scn = NULL;       /* Section Descriptor */
  while((scn = elf_nextscn(elf, scn)) != 0)
    {
	// point shdr at this section header entry
	gelf_getshdr(scn, &shdr);

	// the shdr name is in a string table, libelf uses elf_strptr() to find it
	// using the e_shstrndx value from the elf_header
        Section s(shdr.sh_offset, shdr.sh_size, elf_strptr(elf, elf_header->e_shstrndx, shdr.sh_name));
        sections.push_back(s);
 }
  sort(sections.begin(), sections.end());
}
void usage (char *argv[], int i) {
  if (i)
    fprintf(stderr,"%s: %s argument not recognized\n", argv[0], argv[1]);

  fprintf(stderr, "Usage %s <flags> <library> <iolog>\n"
          "--contents list the symbol/section table\n"
          "--sections List sections instead of symbols\n",
          argv[0]);
  exit(1);
}

int main(int argc, char *argv[])
{
  int fd; 		// File Descriptor
  char *base_ptr;		// ptr to our object in memory
  int current_arg = 1;
  struct stat elf_stats;	// fstat struct
  bool dump_items = false;
  bool dump_sections = false;
  for(int i = current_arg; i < argc; i++) {
    string arg = argv[i];
    if (arg.size() < 2 || (arg[0] != '-' && arg[1] != '-'))
      continue;
    if (arg == "--contents") {
      dump_items = true;
      current_arg++;
    }
    else if (arg == "--sections") {
      dump_sections = true;
      current_arg++;
    }
    else 
      usage(argv, i);
  }

  if (argc - current_arg <= 0)
    usage(argv,0);

  char *file = argv[current_arg++]; // filename

  iovector iolog;
  if (current_arg < argc)
    parselog(iolog, argv[current_arg++]);

  if((fd = open(file, O_RDONLY)) == ERR)
    {
      printf("couldnt open %s\n", file);
      perror("error:");
      return ERR;
    }

  if((fstat(fd, &elf_stats)))
    {
      printf("could not fstat %s\n", file);
      close(fd);
      return ERR;
    }
  // do mmap instead
  if((base_ptr = (char *) malloc(elf_stats.st_size)) == NULL)
    {
      printf("could not malloc\n");
      close(fd);
      return ERR;
    }

  if((read(fd, base_ptr, elf_stats.st_size)) < elf_stats.st_size)
    {
      printf("could not read %s\n", file);
      free(base_ptr);
      close(fd);
      return ERR;
    }

  /* Check libelf version first */
  if(elf_version(EV_CURRENT) == EV_NONE)
    {
      printf("WARNING Elf Library is out of date!\n");
    }

  elf_header = (typeof elf_header) base_ptr;	// point elf_header at our object in memory

  Elf *elf = elf_begin(fd, ELF_C_READ, NULL);	// Initialize 'elf' pointer to our file descriptor
  vector<GElf_Phdr> phdrs;
  int i =0;
  for (i = 0; i< elf_header->e_phnum;i++) {
    GElf_Phdr phdr;
    gelf_getphdr(elf, i, &phdr);
    if (phdr.p_type != PT_LOAD && phdr.p_type != PT_TLS)
      continue;
    phdrs.push_back(phdr);
    //    printf("phdr%x p_offset:%Lx p_vaddr:%Lx p_memsz:%x\n", phdr.p_type, phdr.p_offset, phdr.p_vaddr, phdr.p_memsz);
  }

  vector<Section> sections;
  vector<Symbol> symbols;
  elf = elf_begin(fd, ELF_C_READ, NULL);	// Initialize 'elf' pointer to our file descriptor
  read_sections(sections, elf);
  if (!dump_sections) {
    elf = elf_begin(fd, ELF_C_READ, NULL);	// Initialize 'elf' pointer to our file descriptor
    read_symbols(symbols, phdrs, elf);
  }
  if (dump_items) {
    if (dump_sections) {
      for (vector<Section>::iterator it = sections.begin();it != sections.end();++it) {
        printf("%x\t%x\t%s\n", it->offset, it->length, it->name);
      }

    } else {
      long prev_offset;
      for (vector<Symbol>::iterator it = symbols.begin();it != symbols.end();++it) {
        Section fake_section;
        vector<Section>::iterator match_section;
        fake_section.offset = it->offset;
        match_section = lower_bound(sections.begin(), sections.end(), fake_section);
        const char *s = "";
        if (match_section != sections.end()) {
          if (match_section->offset > it->offset)
            match_section--;
          if (match_section->offset <= it->offset && match_section->length >= it->length)
            s = match_section->name;
        }

        printf("%x\t%x\t%s.%s %s %s\n", it->offset, it->length, s, it->name,
               st_info_bind_str(it->st_info),  st_info_type_str(it->st_info));
        prev_offset = it->offset;
      }
    }
  }
  
  for(iovector::iterator it = iolog.begin();it != iolog.end();++it) {
    printf("%x %x offset(#%d)=====================\n", it->first, it->second,1+ it - iolog.begin());
    if (!dump_sections) {
      Symbol fake;
      fake.offset = it->first;
      vector<Symbol>::iterator match;
      for (match = lower_bound(symbols.begin(), symbols.end(), fake);
           match->offset < it->first + it->second; ++match) {
        printf("%x\t%x\t%s\t%s\t%s\n", match->offset, match->length, match->name,
               st_info_bind_str(match->st_info),  st_info_type_str(match->st_info));
      }
    } else {
      Section fake_section;
      vector<Section>::iterator match_section;
      fake_section.offset = it->first;
      match_section = lower_bound(sections.begin(), sections.end(), fake_section);
      // backup, to see if the read covers the end of the previous section
      if (match_section != sections.begin()) {
        match_section--;
        if (!(match_section->offset <= fake_section.offset 
              && match_section->offset + match_section->length >= fake_section.offset)) {
          match_section++;
        }
      }
      for (; match_section->offset < it->first + it->second; ++match_section) {
        printf("%x\t%x\t%s\n", match_section->offset, match_section->length, match_section->name);
      }
    }

  }
  return 0;
}
//offset #32 needs investigating as to why the hell it's being called