Bug 1450185 - Implement DWARF stack walker for aarch64. r=jseward
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Thu, 12 Apr 2018 18:22:03 +0900
changeset 416035 1893ddb56c3b2c4264cae95ae53139cd769330bf
parent 416034 56a2a8cc35fd988715fdb07a8efd6806560284a5
child 416036 4b51c5cf8035e7e18506b2e3a1a8aee66eeed5e8
push id33915
push userncsoregi@mozilla.com
push dateFri, 27 Apr 2018 21:53:44 +0000
treeherdermozilla-central@8b2c1fc3d6c3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjseward
bugs1450185
milestone61.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1450185 - Implement DWARF stack walker for aarch64. r=jseward Since aarch64's DWARF doesn't have pc register, I use x29 (link register) if not first frame. I test by gtest on Linux/aarch64, and profiler works on Android/aarch64. EM_AARCH64 might not be defined on our builders since headers are old, so this define is needed. MozReview-Commit-ID: 8VDb5i0vwBT
tools/profiler/core/PlatformMacros.h
tools/profiler/core/platform-linux-android.cpp
tools/profiler/core/platform.cpp
tools/profiler/lul/LulDwarf.cpp
tools/profiler/lul/LulDwarfExt.h
tools/profiler/lul/LulDwarfSummariser.cpp
tools/profiler/lul/LulElf.cpp
tools/profiler/lul/LulMain.cpp
tools/profiler/lul/LulMain.h
tools/profiler/lul/LulMainInt.h
tools/profiler/moz.build
tools/profiler/tests/gtest/LulTestDwarf.cpp
tools/profiler/tests/gtest/moz.build
--- a/tools/profiler/core/PlatformMacros.h
+++ b/tools/profiler/core/PlatformMacros.h
@@ -10,29 +10,29 @@
 // else to this file, so it can remain freestanding. The primary factorisation
 // is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ macros are defined
 // too, since they are sometimes convenient.
 //
 // Note: "GP" is short for "Gecko Profiler".
 
 #undef GP_PLAT_x86_android
 #undef GP_PLAT_arm_android
-#undef GP_PLAT_aarch64_android
+#undef GP_PLAT_arm64_android
 #undef GP_PLAT_x86_linux
 #undef GP_PLAT_amd64_linux
 #undef GP_PLAT_arm_linux
 #undef GP_PLAT_mips64_linux
 #undef GP_PLAT_amd64_darwin
 #undef GP_PLAT_x86_windows
 #undef GP_PLAT_amd64_windows
 
 #undef GP_ARCH_x86
 #undef GP_ARCH_amd64
 #undef GP_ARCH_arm
-#undef GP_ARCH_aarch64
+#undef GP_ARCH_arm64
 #undef GP_ARCH_mips64
 
 #undef GP_OS_android
 #undef GP_OS_linux
 #undef GP_OS_darwin
 #undef GP_OS_windows
 
 // We test __ANDROID__ before __linux__ because __linux__ is defined on both
@@ -44,18 +44,18 @@
 # define GP_OS_android 1
 
 #elif defined(__ANDROID__) && defined(__arm__)
 # define GP_PLAT_arm_android 1
 # define GP_ARCH_arm 1
 # define GP_OS_android 1
 
 #elif defined(__ANDROID__) && defined(__aarch64__)
-# define GP_PLAT_aarch64_android 1
-# define GP_ARCH_aarch64 1
+# define GP_PLAT_arm64_android 1
+# define GP_ARCH_arm64 1
 # define GP_OS_android 1
 
 #elif defined(__linux__) && defined(__i386__)
 # define GP_PLAT_x86_linux 1
 # define GP_ARCH_x86 1
 # define GP_OS_linux 1
 
 #elif defined(__linux__) && defined(__x86_64__)
@@ -63,16 +63,21 @@
 # define GP_ARCH_amd64 1
 # define GP_OS_linux 1
 
 #elif defined(__linux__) && defined(__arm__)
 # define GP_PLAT_arm_linux 1
 # define GP_ARCH_arm 1
 # define GP_OS_linux 1
 
+#elif defined(__linux__) && defined(__aarch64__)
+# define GP_PLAT_arm64_linux 1
+# define GP_ARCH_arm64 1
+# define GP_OS_linux 1
+
 #elif defined(__linux__) && defined(__mips64)
 # define GP_PLAT_mips64_linux 1
 # define GP_ARCH_mips64 1
 # define GP_OS_linux 1
 
 #elif defined(__APPLE__) && defined(__x86_64__)
 # define GP_PLAT_amd64_darwin 1
 # define GP_ARCH_amd64 1
--- a/tools/profiler/core/platform-linux-android.cpp
+++ b/tools/profiler/core/platform-linux-android.cpp
@@ -98,17 +98,17 @@ PopulateRegsFromContext(Registers& aRegs
   aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
   aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
   aRegs.mLR = 0;
 #elif defined(GP_ARCH_arm)
   aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
   aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
   aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
   aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
-#elif defined(GP_ARCH_aarch64)
+#elif defined(GP_ARCH_arm64)
   aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
   aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
   aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
   aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
 #elif defined(GP_ARCH_mips64)
   aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
   aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
   aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -107,17 +107,18 @@
 #if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
 # define HAVE_NATIVE_UNWIND
 # define USE_EHABI_STACKWALK
 # include "EHABIStackWalk.h"
 #endif
 
 // Linux builds use LUL, which uses DWARF info to unwind stacks.
 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
-    defined(GP_PLAT_mips64_linux)
+    defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
+    defined(GP_PLAT_arm64_android)
 # define HAVE_NATIVE_UNWIND
 # define USE_LUL_STACKWALK
 # include "lul/LulMain.h"
 # include "lul/platform-linux-lul.h"
 
 // On linux we use LUL for periodic samples and synchronous samples, but we use
 // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
 // (See the comment at the top of the file for a definition of
@@ -1207,16 +1208,21 @@ DoLULBacktrace(PSLockRef aLock, const Re
   startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
   startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
   startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
   startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
   startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
   startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
   startRegs.r7  = lul::TaggedUWord(mc->arm_r7);
+#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+  startRegs.pc  = lul::TaggedUWord(mc->pc);
+  startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
+  startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
+  startRegs.sp  = lul::TaggedUWord(mc->sp);
 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
   startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
   startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
   startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
 #elif defined(GP_PLAT_mips64_linux)
   startRegs.pc = lul::TaggedUWord(mc->pc);
   startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
   startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
@@ -1259,16 +1265,19 @@ DoLULBacktrace(PSLockRef aLock, const Re
 
   {
 #if defined(GP_PLAT_amd64_linux)
     uintptr_t rEDZONE_SIZE = 128;
     uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
     uintptr_t rEDZONE_SIZE = 0;
     uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
+#elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
     uintptr_t rEDZONE_SIZE = 0;
     uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
 #elif defined(GP_PLAT_mips64_linux)
     uintptr_t rEDZONE_SIZE = 0;
     uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
 #else
 #   error "Unknown plat"
--- a/tools/profiler/lul/LulDwarf.cpp
+++ b/tools/profiler/lul/LulDwarf.cpp
@@ -1897,16 +1897,35 @@ unsigned int DwarfCFIToModule::RegisterN
    8 "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
    8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
    8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
    8 "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7"
   */
   return 13 * 8;
 }
 
+// Per ARM IHI 0057A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM64() {
+  /*
+   8 "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
+   8 "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
+   8 "x16"  "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+   8 "x24", "x25", "x26", "x27", "x28", "x29",  "x30","sp",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+   8 "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+   8 "v16", "v17", "v18", "v19", "v20", "v21", "v22,  "v23",
+   8 "v24", "x25", "x26,  "x27", "v28", "v29", "v30", "v31",
+  */
+  return 12 * 8;
+}
+
 unsigned int DwarfCFIToModule::RegisterNames::MIPS() {
   /*
    8 "$zero", "$at",  "$v0",  "$v1",  "$a0",   "$a1",  "$a2",  "$a3",
    8 "$t0",   "$t1",  "$t2",  "$t3",  "$t4",   "$t5",  "$t6",  "$t7",
    8 "$s0",   "$s1",  "$s2",  "$s3",  "$s4",   "$s5",  "$s6",  "$s7",
    8 "$t8",   "$t9",  "$k0",  "$k1",  "$gp",   "$sp",  "$fp",  "$ra",
    9 "$lo",   "$hi",  "$pc",  "$f0",  "$f1",   "$f2",  "$f3",  "$f4",  "$f5",
    8 "$f6",   "$f7",  "$f8",  "$f9",  "$f10",  "$f11", "$f12", "$f13",
--- a/tools/profiler/lul/LulDwarfExt.h
+++ b/tools/profiler/lul/LulDwarfExt.h
@@ -1212,16 +1212,19 @@ class DwarfCFIToModule: public CallFrame
     static unsigned int I386();
 
     // AMD x86_64, AMD64, Intel EM64T, or Intel 64
     static unsigned int X86_64();
 
     // ARM.
     static unsigned int ARM();
 
+    // AARCH64.
+    static unsigned int ARM64();
+
     // MIPS.
     static unsigned int MIPS();
   };
 
   // Create a handler for the dwarf2reader::CallFrameInfo parser that
   // records the stack unwinding information it receives in SUMM.
   //
   // Use REGISTER_NAMES[I] as the name of register number I; *this
--- a/tools/profiler/lul/LulDwarfSummariser.cpp
+++ b/tools/profiler/lul/LulDwarfSummariser.cpp
@@ -232,16 +232,90 @@ Summariser::Rule(uintptr_t aAddress, int
   mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0);
 
   // If there's no information about R15 (the return address), say
   // it's a copy of R14 (the link register).
   if (mCurrRules.mR15expr.mHow == UNKNOWN) {
     mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0);
   }
 
+#elif defined(GP_ARCH_arm64)
+
+  // ----------------- arm64 ----------------- //
+
+  switch (aNewReg) {
+    case DW_REG_CFA:
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      switch (oldReg) {
+        case DW_REG_AARCH64_X29:
+        case DW_REG_AARCH64_SP:
+          break;
+        default:
+          reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_AARCH64_X29:
+    case DW_REG_AARCH64_X30:
+    case DW_REG_AARCH64_SP: {
+      switch (how) {
+        case NODEREF:
+        case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for X29/X30/SP: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for X29/X30/SP: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_AARCH64_X29: mCurrRules.mX29expr = expr; break;
+        case DW_REG_AARCH64_X30: mCurrRules.mX30expr = expr; break;
+        case DW_REG_AARCH64_SP:  mCurrRules.mSPexpr  = expr; break;
+        default: MOZ_ASSERT(0);
+      }
+      break;
+    }
+    default:
+     // Leave |reason1| and |reason2| unset here, for the reasons explained
+     // in the analogous point
+     goto cant_summarise;
+  }
+
+  if (mCurrRules.mX29expr.mHow == UNKNOWN) {
+    mCurrRules.mX29expr = LExpr(NODEREF, DW_REG_AARCH64_X29, 0);
+  }
+  if (mCurrRules.mX30expr.mHow == UNKNOWN) {
+    mCurrRules.mX30expr = LExpr(NODEREF, DW_REG_AARCH64_X30, 0);
+  }
+  // On aarch64, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mSPexpr.mHow == UNKNOWN) {
+    mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
 #elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
 
   // ---------------- x64/x86 ---------------- //
 
   // Now, can we add the rule to our summary?  This depends on whether
   // the registers and the overall expression are representable.  This
   // is the heart of the summarisation process.
   switch (aNewReg) {
--- a/tools/profiler/lul/LulElf.cpp
+++ b/tools/profiler/lul/LulElf.cpp
@@ -68,16 +68,20 @@
 #include "LulMainInt.h"
 
 
 #if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
 // bionic and older glibsc don't define it
 # define SHT_ARM_EXIDX (SHT_LOPROC + 1)
 #endif
 
+// Old Linux header doesn't define EM_AARCH64
+#ifndef EM_AARCH64
+#define EM_AARCH64 183
+#endif
 
 // This namespace contains helper functions.
 namespace {
 
 using lul::DwarfCFIToModule;
 using lul::FindElfSectionByName;
 using lul::GetOffset;
 using lul::IsValidElf;
@@ -162,16 +166,19 @@ bool DwarfCFIRegisterNames(const typenam
       *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
       return true;
     case EM_X86_64:
       *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
       return true;
     case EM_MIPS:
       *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS();
       return true;
+    case EM_AARCH64:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64();
+      return true;
     default:
       MOZ_ASSERT(0);
       return false;
   }
 }
 
 template<typename ElfClass>
 bool LoadDwarfCFI(const string& dwarf_filename,
@@ -449,16 +456,17 @@ bool LoadSymbols(const string& obj_file,
 // ELF_HEADER.
 template<typename ElfClass>
 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
   typedef typename ElfClass::Half Half;
   Half arch = elf_header->e_machine;
   switch (arch) {
     case EM_386:        return "x86";
     case EM_ARM:        return "arm";
+    case EM_AARCH64:    return "arm64";
     case EM_MIPS:       return "mips";
     case EM_PPC64:      return "ppc64";
     case EM_PPC:        return "ppc";
     case EM_S390:       return "s390";
     case EM_SPARC:      return "sparc";
     case EM_SPARCV9:    return "sparcv9";
     case EM_X86_64:     return "x86_64";
     default: return NULL;
--- a/tools/profiler/lul/LulMain.cpp
+++ b/tools/profiler/lul/LulMain.cpp
@@ -71,16 +71,20 @@ NameOf_DW_REG(int16_t aReg)
     case DW_REG_INTEL_XIP: return "xip";
 #elif defined(GP_ARCH_arm)
     case DW_REG_ARM_R7:    return "r7";
     case DW_REG_ARM_R11:   return "r11";
     case DW_REG_ARM_R12:   return "r12";
     case DW_REG_ARM_R13:   return "r13";
     case DW_REG_ARM_R14:   return "r14";
     case DW_REG_ARM_R15:   return "r15";
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29: return "x29";
+    case DW_REG_AARCH64_X30: return "x30";
+    case DW_REG_AARCH64_SP:  return "sp";
 #elif defined(GP_ARCH_mips64)
     case DW_REG_MIPS_SP:   return "sp";
     case DW_REG_MIPS_FP:   return "fp";
     case DW_REG_MIPS_PC:   return "pc";
 #else
 # error "Unsupported arch"
 #endif
     default: return "???";
@@ -134,16 +138,20 @@ RuleSet::Print(void(*aLog)(const char*))
   res += mXbpExpr.ShowRule(" BP");
 #elif defined(GP_ARCH_arm)
   res += mR15expr.ShowRule(" R15");
   res += mR7expr .ShowRule(" R7" );
   res += mR11expr.ShowRule(" R11");
   res += mR12expr.ShowRule(" R12");
   res += mR13expr.ShowRule(" R13");
   res += mR14expr.ShowRule(" R14");
+#elif defined(GP_ARCH_arm64)
+  res += mX29expr.ShowRule(" X29");
+  res += mX30expr.ShowRule(" X30");
+  res += mSPexpr .ShowRule(" SP");
 #elif defined(GP_ARCH_mips64)
   res += mPCexpr.ShowRule(" PC");
   res += mSPexpr.ShowRule(" SP");
   res += mFPexpr.ShowRule(" FP");
 #else
 # error "Unsupported arch"
 #endif
   aLog(res.c_str());
@@ -159,16 +167,20 @@ RuleSet::ExprForRegno(DW_REG_NUMBER aReg
     case DW_REG_INTEL_XBP: return &mXbpExpr;
 #   elif defined(GP_ARCH_arm)
     case DW_REG_ARM_R15:   return &mR15expr;
     case DW_REG_ARM_R14:   return &mR14expr;
     case DW_REG_ARM_R13:   return &mR13expr;
     case DW_REG_ARM_R12:   return &mR12expr;
     case DW_REG_ARM_R11:   return &mR11expr;
     case DW_REG_ARM_R7:    return &mR7expr;
+#   elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29: return &mX29expr;
+    case DW_REG_AARCH64_X30: return &mX30expr;
+    case DW_REG_AARCH64_SP:  return &mSPexpr;
 #elif defined(GP_ARCH_mips64)
     case DW_REG_MIPS_SP:    return &mSPexpr;
     case DW_REG_MIPS_FP:    return &mFPexpr;
     case DW_REG_MIPS_PC:    return &mPCexpr;
 #   else
 #     error "Unknown arch"
 #   endif
     default: return nullptr;
@@ -912,16 +924,20 @@ TaggedUWord EvaluateReg(int16_t aReg, co
     case DW_REG_INTEL_XIP: return aOldRegs->xip;
 #elif defined(GP_ARCH_arm)
     case DW_REG_ARM_R7:    return aOldRegs->r7;
     case DW_REG_ARM_R11:   return aOldRegs->r11;
     case DW_REG_ARM_R12:   return aOldRegs->r12;
     case DW_REG_ARM_R13:   return aOldRegs->r13;
     case DW_REG_ARM_R14:   return aOldRegs->r14;
     case DW_REG_ARM_R15:   return aOldRegs->r15;
+#elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29: return aOldRegs->x29;
+    case DW_REG_AARCH64_X30: return aOldRegs->x30;
+    case DW_REG_AARCH64_SP:  return aOldRegs->sp;
 #elif defined(GP_ARCH_mips64)
     case DW_REG_MIPS_SP:   return aOldRegs->sp;
     case DW_REG_MIPS_FP:   return aOldRegs->fp;
     case DW_REG_MIPS_PC:   return aOldRegs->pc;
 #else
 # error "Unsupported arch"
 #endif
     default: MOZ_ASSERT(0); return TaggedUWord();
@@ -1108,16 +1124,21 @@ void UseRuleSet(/*MOD*/UnwindRegs* aRegs
   aRegs->xip = TaggedUWord();
 #elif defined(GP_ARCH_arm)
   aRegs->r7  = TaggedUWord();
   aRegs->r11 = TaggedUWord();
   aRegs->r12 = TaggedUWord();
   aRegs->r13 = TaggedUWord();
   aRegs->r14 = TaggedUWord();
   aRegs->r15 = TaggedUWord();
+#elif defined(GP_ARCH_arm64)
+  aRegs->x29 = TaggedUWord();
+  aRegs->x30 = TaggedUWord();
+  aRegs->sp  = TaggedUWord();
+  aRegs->pc  = TaggedUWord();
 #elif defined(GP_ARCH_mips64)
   aRegs->sp  = TaggedUWord();
   aRegs->fp  = TaggedUWord();
   aRegs->pc  = TaggedUWord();
 #else
 #  error "Unsupported arch"
 #endif
 
@@ -1149,16 +1170,23 @@ void UseRuleSet(/*MOD*/UnwindRegs* aRegs
   aRegs->r12
     = aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->r13
     = aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->r14
     = aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->r15
     = aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(GP_ARCH_arm64)
+  aRegs->x29
+    = aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->x30
+    = aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->sp
+    = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
 #elif defined(GP_ARCH_mips64)
   aRegs->sp
     = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->fp
     = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
   aRegs->pc
     = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
 #else
@@ -1208,16 +1236,26 @@ LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
                      (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
                      (int)regs.r7.Valid(),  (unsigned long long int)regs.r7.Value(),
                      (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
                      (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
                      (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
                      (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
       buf[sizeof(buf)-1] = 0;
       mLog(buf);
+#elif defined(GP_ARCH_arm64)
+      SprintfLiteral(buf,
+                     "LoopTop: pc %d/%llx  x29 %d/%llx  x30 %d/%llx"
+                     "  sp %d/%llx\n",
+                     (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
+                     (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
+                     (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
+                     (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
+      buf[sizeof(buf)-1] = 0;
+      mLog(buf);
 #elif defined(GP_ARCH_mips64)
       SprintfLiteral(buf,
                      "LoopTop: pc %d/%llx  sp %d/%llx  fp %d/%llx\n",
                      (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
                      (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
                      (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
       buf[sizeof(buf)-1] = 0;
       mLog(buf);
@@ -1227,16 +1265,19 @@ LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
     }
 
 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
     TaggedUWord ia = regs.xip;
     TaggedUWord sp = regs.xsp;
 #elif defined(GP_ARCH_arm)
     TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
     TaggedUWord sp = regs.r13;
+#elif defined(GP_ARCH_arm64)
+    TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
+    TaggedUWord sp = regs.sp;
 #elif defined(GP_ARCH_mips64)
     TaggedUWord ia = regs.pc;
     TaggedUWord sp = regs.sp;
 #else
 # error "Unsupported arch"
 #endif
 
     if (*aFramesUsed >= aFramesAvail) {
@@ -1525,16 +1566,36 @@ bool GetAndCheckStackTrace(LUL* aLUL, co
   startRegs.r15 = TaggedUWord(block[0]);
   startRegs.r14 = TaggedUWord(block[1]);
   startRegs.r13 = TaggedUWord(block[2]);
   startRegs.r12 = TaggedUWord(block[3]);
   startRegs.r11 = TaggedUWord(block[4]);
   startRegs.r7  = TaggedUWord(block[5]);
   const uintptr_t REDZONE_SIZE = 0;
   uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(GP_ARCH_arm64)
+  volatile uintptr_t block[4];
+  MOZ_ASSERT(sizeof(block) == 32);
+  __asm__ __volatile__(
+    "adr x0, . \n\t"
+    "str x0, [%0, #0] \n\t"
+    "str x29, [%0, #8] \n\t"
+    "str x30, [%0, #16] \n\t"
+    "mov x0, sp \n\t"
+    "str x0, [%0, #24] \n\t"
+    :
+    : "r"(&block[0])
+    : "memory", "x0"
+  );
+  startRegs.pc = TaggedUWord(block[0]);
+  startRegs.x29 = TaggedUWord(block[1]);
+  startRegs.x30 = TaggedUWord(block[2]);
+  startRegs.sp = TaggedUWord(block[3]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
 #elif defined(GP_ARCH_mips64)
   volatile uintptr_t block[3];
   MOZ_ASSERT(sizeof(block) == 24);
   __asm__ __volatile__(
     "sd $29, 8(%0)     \n"
     "sd $30, 16(%0)    \n"
     :
     :"r"(block)
--- a/tools/profiler/lul/LulMain.h
+++ b/tools/profiler/lul/LulMain.h
@@ -143,16 +143,21 @@ private:
 struct UnwindRegs {
 #if defined(GP_ARCH_arm)
   TaggedUWord r7;
   TaggedUWord r11;
   TaggedUWord r12;
   TaggedUWord r13;
   TaggedUWord r14;
   TaggedUWord r15;
+#elif defined(GP_ARCH_arm64)
+  TaggedUWord x29;
+  TaggedUWord x30;
+  TaggedUWord sp;
+  TaggedUWord pc;
 #elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
   TaggedUWord xbp;
   TaggedUWord xsp;
   TaggedUWord xip;
 #elif defined(GP_ARCH_mips64)
   TaggedUWord sp;
   TaggedUWord fp;
   TaggedUWord pc;
--- a/tools/profiler/lul/LulMainInt.h
+++ b/tools/profiler/lul/LulMainInt.h
@@ -38,16 +38,21 @@ enum DW_REG_NUMBER {
 #if defined(GP_ARCH_arm)
   // ARM registers
   DW_REG_ARM_R7  = 7,
   DW_REG_ARM_R11 = 11,
   DW_REG_ARM_R12 = 12,
   DW_REG_ARM_R13 = 13,
   DW_REG_ARM_R14 = 14,
   DW_REG_ARM_R15 = 15,
+#elif defined(GP_ARCH_arm64)
+  // aarch64 registers
+  DW_REG_AARCH64_X29 = 29,
+  DW_REG_AARCH64_X30 = 30,
+  DW_REG_AARCH64_SP  = 31,
 #elif defined(GP_ARCH_amd64)
   // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are
   // combined, a merged set of register constants is needed.
   DW_REG_INTEL_XBP = 6,
   DW_REG_INTEL_XSP = 7,
   DW_REG_INTEL_XIP = 16,
 #elif defined(GP_ARCH_x86)
   DW_REG_INTEL_XBP = 5,
@@ -274,16 +279,20 @@ public:
   LExpr  mXbpExpr;
 #elif defined(GP_ARCH_arm)
   LExpr  mR15expr; // return address
   LExpr  mR14expr;
   LExpr  mR13expr;
   LExpr  mR12expr;
   LExpr  mR11expr;
   LExpr  mR7expr;
+#elif defined(GP_ARCH_arm64)
+  LExpr  mX29expr; // frame pointer register
+  LExpr  mX30expr; // link register
+  LExpr  mSPexpr;
 #elif defined(GP_ARCH_mips64)
   LExpr  mPCexpr;
   LExpr  mFPexpr;
   LExpr  mSPexpr;
 #else
 #   error "Unknown arch"
 #endif
 };
@@ -294,16 +303,19 @@ static inline bool registerIsTracked(DW_
   switch (reg) {
 #   if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
     case DW_REG_INTEL_XBP: case DW_REG_INTEL_XSP: case DW_REG_INTEL_XIP:
       return true;
 #   elif defined(GP_ARCH_arm)
     case DW_REG_ARM_R7:  case DW_REG_ARM_R11: case DW_REG_ARM_R12:
     case DW_REG_ARM_R13: case DW_REG_ARM_R14: case DW_REG_ARM_R15:
       return true;
+#   elif defined(GP_ARCH_arm64)
+    case DW_REG_AARCH64_X29:  case DW_REG_AARCH64_X30: case DW_REG_AARCH64_SP:
+      return true;
 #elif defined(GP_ARCH_mips64)
     case DW_REG_MIPS_FP:  case DW_REG_MIPS_SP: case DW_REG_MIPS_PC:
       return true;
 #   else
 #     error "Unknown arch"
 #   endif
     default:
       return false;
--- a/tools/profiler/moz.build
+++ b/tools/profiler/moz.build
@@ -43,17 +43,17 @@ if CONFIG['MOZ_GECKO_PROFILER']:
             'gecko/nsProfiler.cpp',
         ]
     else:
         UNIFIED_SOURCES += [
             'gecko/nsProfiler.cpp',
         ]
 
     if CONFIG['OS_TARGET'] in ('Android', 'Linux'):
-        if CONFIG['CPU_ARCH'] in ('arm', 'x86', 'x86_64', 'mips64'):
+        if CONFIG['CPU_ARCH'] in ('arm', 'aarch64', 'x86', 'x86_64', 'mips64'):
             UNIFIED_SOURCES += [
                 'lul/AutoObjectMapper.cpp',
                 'lul/LulCommon.cpp',
                 'lul/LulDwarf.cpp',
                 'lul/LulDwarfSummariser.cpp',
                 'lul/LulElf.cpp',
                 'lul/LulMain.cpp',
                 'lul/platform-linux-lul.cpp',
--- a/tools/profiler/tests/gtest/LulTestDwarf.cpp
+++ b/tools/profiler/tests/gtest/LulTestDwarf.cpp
@@ -2401,16 +2401,19 @@ TEST_F(LulDwarfExpr, ExpressionOverrun) 
   EXPECT_TRUE(ix == -1);
 }
 
 // We'll need to mention specific Dwarf registers in the EvaluatePfxExpr tests,
 // and those names are arch-specific, so a bit of macro magic is helpful.
 #if defined(GP_ARCH_arm)
 # define TESTED_REG_STRUCT_NAME  r11
 # define TESTED_REG_DWARF_NAME   DW_REG_ARM_R11
+#elif defined(GP_ARCH_arm64)
+# define TESTED_REG_STRUCT_NAME  x29
+# define TESTED_REG_DWARF_NAME   DW_REG_AARCH64_X29
 #elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
 # define TESTED_REG_STRUCT_NAME  xbp
 # define TESTED_REG_DWARF_NAME   DW_REG_INTEL_XBP
 #else
 # error "Unknown plat"
 #endif
 
 struct EvaluatePfxExprFixture {
--- a/tools/profiler/tests/gtest/moz.build
+++ b/tools/profiler/tests/gtest/moz.build
@@ -1,16 +1,16 @@
 # -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
 # vim: set filetype=python:
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, you can obtain one at http://mozilla.org/MPL/2.0/.
 
 if (CONFIG['OS_TARGET'] in ('Android', 'Linux') and
-    CONFIG['CPU_ARCH'] in ('arm', 'x86', 'x86_64')):
+    CONFIG['CPU_ARCH'] in ('arm', 'aarch64', 'x86', 'x86_64')):
     UNIFIED_SOURCES += [
         'LulTest.cpp',
         'LulTestDwarf.cpp',
         'LulTestInfrastructure.cpp',
     ]
 
 LOCAL_INCLUDES += [
     '/tools/profiler/core',