Bug 1505719 - Try harder when computing breakpad IDs for Android system libraries. r=ted
authorMarkus Stange <mstange@themasta.com>
Mon, 03 Dec 2018 19:21:44 +0000
changeset 449275 1f38344436a918241109ca2a5c4e939d1c33cde3
parent 449274 2cc09eeb4898c1245d9ae01c95b8ce5d622c8122
child 449276 55d27d4a5f7981c40c6b6fd26dcfcce29d56b510
push id35156
push userrmaries@mozilla.com
push dateTue, 04 Dec 2018 21:51:17 +0000
treeherdermozilla-central@55b77d1533ab [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersted
bugs1505719
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1505719 - Try harder when computing breakpad IDs for Android system libraries. r=ted Differential Revision: https://phabricator.services.mozilla.com/D12836
Cargo.lock
tools/profiler/rust-helper/Cargo.toml
tools/profiler/rust-helper/src/elf.rs
tools/profiler/rust-helper/src/lib.rs
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1913,16 +1913,17 @@ dependencies = [
 name = "procedural-masquerade"
 version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
 name = "profiler_helper"
 version = "0.1.0"
 dependencies = [
+ "goblin 0.0.17 (registry+https://github.com/rust-lang/crates.io-index)",
  "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "object 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "thin-vec 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
 name = "pulse"
 version = "0.2.0"
--- a/tools/profiler/rust-helper/Cargo.toml
+++ b/tools/profiler/rust-helper/Cargo.toml
@@ -7,14 +7,22 @@ authors = ["Markus Stange <mstange@thema
 memmap = "0.6.2"
 
 [dependencies.object]
 version = "0.10.0"
 optional = true
 default-features = false
 features = ["std"]
 
+[dependencies.goblin]
+optional = true
+# The version and features of goblin need to match what's in object's Cargo.toml,
+# because we really want object's goblin and not another instance of goblin.
+version = "0.0.17"
+features = ["endian_fd", "elf32", "elf64", "mach32", "mach64", "pe32", "pe64", "archive"]
+default-features = false
+
 [dependencies.thin-vec]
 version = "0.1.0"
 features = ["gecko-ffi"]
 
 [features]
-parse_elf = ["object"]
+parse_elf = ["object", "goblin"]
--- a/tools/profiler/rust-helper/src/elf.rs
+++ b/tools/profiler/rust-helper/src/elf.rs
@@ -1,24 +1,93 @@
 use compact_symbol_table::CompactSymbolTable;
 use object::{ElfFile, Object, SymbolKind, Uuid};
 use std::collections::HashMap;
+use std::cmp;
+use goblin::elf;
+
+const UUID_SIZE: usize = 16;
+const PAGE_SIZE: usize = 4096;
 
 fn get_symbol_map<'a, 'b, T>(object_file: &'b T) -> HashMap<u32, &'a str>
 where
   T: Object<'a, 'b>,
 {
   object_file
     .dynamic_symbols()
     .chain(object_file.symbols())
     .filter(|symbol| symbol.kind() == SymbolKind::Text)
     .filter_map(|symbol| symbol.name().map(|name| (symbol.address() as u32, name)))
     .collect()
 }
 
 pub fn get_compact_symbol_table(buffer: &[u8], breakpad_id: &str) -> Option<CompactSymbolTable> {
   let elf_file = ElfFile::parse(buffer).ok()?;
-  let elf_id = Uuid::from_bytes(elf_file.build_id()?).ok()?;
+  let elf_id = get_elf_id(&elf_file, buffer)?;
   if format!("{:X}0", elf_id.simple()) != breakpad_id {
     return None;
   }
   return Some(CompactSymbolTable::from_map(get_symbol_map(&elf_file)));
 }
+
+fn create_elf_id(identifier: &[u8], little_endian: bool) -> Option<Uuid> {
+  // Make sure that we have exactly UUID_SIZE bytes available
+  let mut data = [0 as u8; UUID_SIZE];
+  let len = cmp::min(identifier.len(), UUID_SIZE);
+  data[0..len].copy_from_slice(&identifier[0..len]);
+
+  if little_endian {
+    // The file ELF file targets a little endian architecture. Convert to
+    // network byte order (big endian) to match the Breakpad processor's
+    // expectations. For big endian object files, this is not needed.
+    data[0..4].reverse(); // uuid field 1
+    data[4..6].reverse(); // uuid field 2
+    data[6..8].reverse(); // uuid field 3
+  }
+
+  Uuid::from_bytes(&data).ok()
+}
+
+/// Tries to obtain the object identifier of an ELF object.
+///
+/// As opposed to Mach-O, ELF does not specify a unique ID for object files in
+/// its header. Compilers and linkers usually add either `SHT_NOTE` sections or
+/// `PT_NOTE` program header elements for this purpose. If one of these notes
+/// is present, ElfFile's build_id() method will find it.
+///
+/// If neither of the above are present, this function will hash the first page
+/// of the `.text` section (program code). This matches what the Breakpad
+/// processor does.
+///
+/// If all of the above fails, this function will return `None`.
+pub fn get_elf_id(elf_file: &ElfFile, data: &[u8]) -> Option<Uuid> {
+  if let Some(identifier) = elf_file.build_id() {
+    return create_elf_id(identifier, elf_file.elf().little_endian);
+  }
+
+  // We were not able to locate the build ID, so fall back to hashing the
+  // first page of the ".text" (program code) section. This algorithm XORs
+  // 16-byte chunks directly into a UUID buffer.
+  if let Some(section_data) = find_text_section(elf_file.elf(), data) {
+    let mut hash = [0; UUID_SIZE];
+    for i in 0..cmp::min(section_data.len(), PAGE_SIZE) {
+      hash[i % UUID_SIZE] ^= section_data[i];
+    }
+
+    return create_elf_id(&hash, elf_file.elf().little_endian);
+  }
+
+  None
+}
+
+/// Returns a reference to the data of the the .text section in an ELF binary.
+fn find_text_section<'elf, 'data>(
+  elf: &'elf elf::Elf,
+  data: &'data [u8],
+) -> Option<&'data[u8]> {
+  elf.section_headers.iter().find_map(|header| {
+    match (header.sh_type, elf.shdr_strtab.get(header.sh_name)) {
+      (elf::section_header::SHT_PROGBITS, Some(Ok(".text"))) =>
+        Some(&data[header.sh_offset as usize..][..header.sh_size as usize]),
+      _ => None
+    }
+  })
+}
--- a/tools/profiler/rust-helper/src/lib.rs
+++ b/tools/profiler/rust-helper/src/lib.rs
@@ -1,13 +1,15 @@
 extern crate memmap;
 extern crate thin_vec;
 
 #[cfg(feature = "parse_elf")]
 extern crate object;
+#[cfg(feature = "parse_elf")]
+extern crate goblin;
 
 mod compact_symbol_table;
 
 #[cfg(feature = "parse_elf")]
 mod elf;
 
 #[cfg(feature = "parse_elf")]
 use memmap::MmapOptions;