mozglue/baseprofiler/lul/LulMain.cpp
author Masayuki Nakano <masayuki@d-toybox.com>
Sun, 16 Jan 2022 06:21:17 +0000
changeset 604634 9ef0614a59629916c1e182eb8eda055b0b0e8b32
parent 588520 e3d5648f410e154df54a19f6731da2e16659e52c
permissions -rw-r--r--
Bug 1749299 - Make `HTMLEditor::HandleInsertLinefeed()` stop handling it if insertion point cannot have text nodes r=m_kato Ideally, it should not be called when the editor cannot insert new text node. However, the callers are complicated. Therefore, let's check in it for avoiding making the callers more complicated. Fortunately, this is not realistic path for normal web apps. Therefore, the compatibility of the behavior is not matter. That's the reason why this patch does not have a test comparing the result. Differential Revision: https://phabricator.services.mozilla.com/D135826

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "LulMain.h"

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>  // write(), only for testing LUL

#include <algorithm>  // std::sort
#include <string>
#include <utility>

#include "mozilla/Assertions.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/CheckedInt.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/MemoryChecking.h"
#include "mozilla/Sprintf.h"
#include "mozilla/UniquePtr.h"
#include "mozilla/Unused.h"

#include "BaseProfiler.h"
#include "LulCommonExt.h"
#include "LulElfExt.h"
#include "LulMainInt.h"

using mozilla::baseprofiler::profiler_current_process_id;
using mozilla::baseprofiler::profiler_current_thread_id;

// Set this to 1 for verbose logging
#define DEBUG_MAIN 0

namespace lul {

using mozilla::CheckedInt;
using mozilla::DebugOnly;
using mozilla::MallocSizeOf;
using mozilla::Unused;
using std::pair;
using std::string;
using std::vector;

// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
//
// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
// Any such function -- and, hence, the transitive closure of those
// reachable from it -- must not do any dynamic memory allocation.
// Doing so risks deadlock.  There is exactly one root function for
// the transitive closure: Lul::Unwind.
//
// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING

////////////////////////////////////////////////////////////////
// RuleSet                                                    //
////////////////////////////////////////////////////////////////

static const char* NameOf_DW_REG(int16_t aReg) {
  switch (aReg) {
    case DW_REG_CFA:
      return "cfa";
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
    case DW_REG_INTEL_XBP:
      return "xbp";
    case DW_REG_INTEL_XSP:
      return "xsp";
    case DW_REG_INTEL_XIP:
      return "xip";
#elif defined(GP_ARCH_arm)
    case DW_REG_ARM_R7:
      return "r7";
    case DW_REG_ARM_R11:
      return "r11";
    case DW_REG_ARM_R12:
      return "r12";
    case DW_REG_ARM_R13:
      return "r13";
    case DW_REG_ARM_R14:
      return "r14";
    case DW_REG_ARM_R15:
      return "r15";
#elif defined(GP_ARCH_arm64)
    case DW_REG_AARCH64_X29:
      return "x29";
    case DW_REG_AARCH64_X30:
      return "x30";
    case DW_REG_AARCH64_SP:
      return "sp";
#elif defined(GP_ARCH_mips64)
    case DW_REG_MIPS_SP:
      return "sp";
    case DW_REG_MIPS_FP:
      return "fp";
    case DW_REG_MIPS_PC:
      return "pc";
#else
#  error "Unsupported arch"
#endif
    default:
      return "???";
  }
}

string LExpr::ShowRule(const char* aNewReg) const {
  char buf[64];
  string res = string(aNewReg) + "=";
  switch (mHow) {
    case UNKNOWN:
      res += "Unknown";
      break;
    case NODEREF:
      SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
      res += buf;
      break;
    case DEREF:
      SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
      res += buf;
      break;
    case PFXEXPR:
      SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
      res += buf;
      break;
    default:
      res += "???";
      break;
  }
  return res;
}

void RuleSet::Print(void (*aLog)(const char*)) const {
  char buf[96];
  SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr,
                 (unsigned long long int)(mAddr + mLen - 1));
  string res = string(buf);
  res += mCfaExpr.ShowRule("cfa");
  res += " in";
  // For each reg we care about, print the recovery expression.
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
  res += mXipExpr.ShowRule(" RA");
  res += mXspExpr.ShowRule(" SP");
  res += mXbpExpr.ShowRule(" BP");
#elif defined(GP_ARCH_arm)
  res += mR15expr.ShowRule(" R15");
  res += mR7expr.ShowRule(" R7");
  res += mR11expr.ShowRule(" R11");
  res += mR12expr.ShowRule(" R12");
  res += mR13expr.ShowRule(" R13");
  res += mR14expr.ShowRule(" R14");
#elif defined(GP_ARCH_arm64)
  res += mX29expr.ShowRule(" X29");
  res += mX30expr.ShowRule(" X30");
  res += mSPexpr.ShowRule(" SP");
#elif defined(GP_ARCH_mips64)
  res += mPCexpr.ShowRule(" PC");
  res += mSPexpr.ShowRule(" SP");
  res += mFPexpr.ShowRule(" FP");
#else
#  error "Unsupported arch"
#endif
  aLog(res.c_str());
}

LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
  switch (aRegno) {
    case DW_REG_CFA:
      return &mCfaExpr;
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
    case DW_REG_INTEL_XIP:
      return &mXipExpr;
    case DW_REG_INTEL_XSP:
      return &mXspExpr;
    case DW_REG_INTEL_XBP:
      return &mXbpExpr;
#elif defined(GP_ARCH_arm)
    case DW_REG_ARM_R15:
      return &mR15expr;
    case DW_REG_ARM_R14:
      return &mR14expr;
    case DW_REG_ARM_R13:
      return &mR13expr;
    case DW_REG_ARM_R12:
      return &mR12expr;
    case DW_REG_ARM_R11:
      return &mR11expr;
    case DW_REG_ARM_R7:
      return &mR7expr;
#elif defined(GP_ARCH_arm64)
    case DW_REG_AARCH64_X29:
      return &mX29expr;
    case DW_REG_AARCH64_X30:
      return &mX30expr;
    case DW_REG_AARCH64_SP:
      return &mSPexpr;
#elif defined(GP_ARCH_mips64)
    case DW_REG_MIPS_SP:
      return &mSPexpr;
    case DW_REG_MIPS_FP:
      return &mFPexpr;
    case DW_REG_MIPS_PC:
      return &mPCexpr;
#else
#  error "Unknown arch"
#endif
    default:
      return nullptr;
  }
}

RuleSet::RuleSet() {
  mAddr = 0;
  mLen = 0;
  // The only other fields are of type LExpr and those are initialised
  // by LExpr::LExpr().
}

////////////////////////////////////////////////////////////////
// SecMap                                                     //
////////////////////////////////////////////////////////////////

// See header file LulMainInt.h for comments about invariants.

SecMap::SecMap(void (*aLog)(const char*))
    : mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {}

SecMap::~SecMap() { mRuleSets.clear(); }

// RUNS IN NO-MALLOC CONTEXT
RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
  // Binary search mRuleSets to find one that brackets |ia|.
  // lo and hi need to be signed, else the loop termination tests
  // don't work properly.  Note that this works correctly even when
  // mRuleSets.size() == 0.

  // Can't do this until the array has been sorted and preened.
  MOZ_ASSERT(mUsable);

  long int lo = 0;
  long int hi = (long int)mRuleSets.size() - 1;
  while (true) {
    // current unsearched space is from lo to hi, inclusive.
    if (lo > hi) {
      // not found
      return nullptr;
    }
    long int mid = lo + ((hi - lo) / 2);
    RuleSet* mid_ruleSet = &mRuleSets[mid];
    uintptr_t mid_minAddr = mid_ruleSet->mAddr;
    uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
    if (ia < mid_minAddr) {
      hi = mid - 1;
      continue;
    }
    if (ia > mid_maxAddr) {
      lo = mid + 1;
      continue;
    }
    MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
    return mid_ruleSet;
  }
  // NOTREACHED
}

// Add a RuleSet to the collection.  The rule is copied in.  Calling
// this makes the map non-searchable.
void SecMap::AddRuleSet(const RuleSet* rs) {
  mUsable = false;
  mRuleSets.push_back(*rs);
}

// Add a PfxInstr to the vector of such instrs, and return the index
// in the vector.  Calling this makes the map non-searchable.
uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
  mUsable = false;
  mPfxInstrs.push_back(pfxi);
  return mPfxInstrs.size() - 1;
}

static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
  return rs1.mAddr < rs2.mAddr;
}

// Prepare the map for searching.  Completely remove any which don't
// fall inside the specified range [start, +len).
void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) {
  if (mRuleSets.empty()) {
    return;
  }

  MOZ_ASSERT(aLen > 0);
  if (aLen == 0) {
    // This should never happen.
    mRuleSets.clear();
    return;
  }

  // Sort by start addresses.
  std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);

  // Detect any entry not completely contained within [start, +len).
  // Set its length to zero, so that the next pass will remove it.
  for (size_t i = 0; i < mRuleSets.size(); ++i) {
    RuleSet* rs = &mRuleSets[i];
    if (rs->mLen > 0 &&
        (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
      rs->mLen = 0;
    }
  }

  // Iteratively truncate any overlaps and remove any zero length
  // entries that might result, or that may have been present
  // initially.  Unless the input is seriously screwy, this is
  // expected to iterate only once.
  while (true) {
    size_t i;
    size_t n = mRuleSets.size();
    size_t nZeroLen = 0;

    if (n == 0) {
      break;
    }

    for (i = 1; i < n; ++i) {
      RuleSet* prev = &mRuleSets[i - 1];
      RuleSet* here = &mRuleSets[i];
      MOZ_ASSERT(prev->mAddr <= here->mAddr);
      if (prev->mAddr + prev->mLen > here->mAddr) {
        prev->mLen = here->mAddr - prev->mAddr;
      }
      if (prev->mLen == 0) nZeroLen++;
    }

    if (mRuleSets[n - 1].mLen == 0) {
      nZeroLen++;
    }

    // At this point, the entries are in-order and non-overlapping.
    // If none of them are zero-length, we are done.
    if (nZeroLen == 0) {
      break;
    }

    // Slide back the entries to remove the zero length ones.
    size_t j = 0;  // The write-point.
    for (i = 0; i < n; ++i) {
      if (mRuleSets[i].mLen == 0) {
        continue;
      }
      if (j != i) mRuleSets[j] = mRuleSets[i];
      ++j;
    }
    MOZ_ASSERT(i == n);
    MOZ_ASSERT(nZeroLen <= n);
    MOZ_ASSERT(j == n - nZeroLen);
    while (nZeroLen > 0) {
      mRuleSets.pop_back();
      nZeroLen--;
    }

    MOZ_ASSERT(mRuleSets.size() == j);
  }

  size_t n = mRuleSets.size();

#ifdef DEBUG
  // Do a final check on the rules: their address ranges must be
  // ascending, non overlapping, non zero sized.
  if (n > 0) {
    MOZ_ASSERT(mRuleSets[0].mLen > 0);
    for (size_t i = 1; i < n; ++i) {
      RuleSet* prev = &mRuleSets[i - 1];
      RuleSet* here = &mRuleSets[i];
      MOZ_ASSERT(prev->mAddr < here->mAddr);
      MOZ_ASSERT(here->mLen > 0);
      MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
    }
  }
#endif

  // Set the summary min and max address values.
  if (n == 0) {
    // Use the values defined in comments in the class declaration.
    mSummaryMinAddr = 1;
    mSummaryMaxAddr = 0;
  } else {
    mSummaryMinAddr = mRuleSets[0].mAddr;
    mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1;
  }
  char buf[150];
  SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
                 (int)n, (unsigned long long int)mSummaryMinAddr,
                 (unsigned long long int)mSummaryMaxAddr);
  buf[sizeof(buf) - 1] = 0;
  mLog(buf);

  // Is now usable for binary search.
  mUsable = true;

#if 0
  mLog("\nRulesets after preening\n");
  for (size_t i = 0; i < mRuleSets.size(); ++i) {
    mRuleSets[i].Print(mLog);
    mLog("\n");
  }
  mLog("\n");
#endif
}

bool SecMap::IsEmpty() { return mRuleSets.empty(); }

size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
  size_t n = aMallocSizeOf(this);

  // It's conceivable that these calls would be unsafe with some
  // implementations of std::vector, but it seems to be working for now...
  n += aMallocSizeOf(mRuleSets.data());
  n += aMallocSizeOf(mPfxInstrs.data());

  return n;
}

////////////////////////////////////////////////////////////////
// SegArray                                                   //
////////////////////////////////////////////////////////////////

// A SegArray holds a set of address ranges that together exactly
// cover an address range, with no overlaps or holes.  Each range has
// an associated value, which in this case has been specialised to be
// a simple boolean.  The representation is kept to minimal canonical
// form in which adjacent ranges with the same associated value are
// merged together.  Each range is represented by a |struct Seg|.
//
// SegArrays are used to keep track of which parts of the address
// space are known to contain instructions.
class SegArray {
 public:
  void add(uintptr_t lo, uintptr_t hi, bool val) {
    if (lo > hi) {
      return;
    }
    split_at(lo);
    if (hi < UINTPTR_MAX) {
      split_at(hi + 1);
    }
    std::vector<Seg>::size_type iLo, iHi, i;
    iLo = find(lo);
    iHi = find(hi);
    for (i = iLo; i <= iHi; ++i) {
      mSegs[i].val = val;
    }
    preen();
  }

  // RUNS IN NO-MALLOC CONTEXT
  bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
                              /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
    std::vector<Seg>::size_type i = find(addr);
    if (!mSegs[i].val) {
      return false;
    }
    *rx_min = mSegs[i].lo;
    *rx_max = mSegs[i].hi;
    return true;
  }

  SegArray() {
    Seg s(0, UINTPTR_MAX, false);
    mSegs.push_back(s);
  }

 private:
  struct Seg {
    Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
    uintptr_t lo;
    uintptr_t hi;
    bool val;
  };

  void preen() {
    for (std::vector<Seg>::iterator iter = mSegs.begin();
         iter < mSegs.end() - 1; ++iter) {
      if (iter[0].val != iter[1].val) {
        continue;
      }
      iter[0].hi = iter[1].hi;
      mSegs.erase(iter + 1);
      // Back up one, so as not to miss an opportunity to merge
      // with the entry after this one.
      --iter;
    }
  }

  // RUNS IN NO-MALLOC CONTEXT
  std::vector<Seg>::size_type find(uintptr_t a) {
    long int lo = 0;
    long int hi = (long int)mSegs.size();
    while (true) {
      // The unsearched space is lo .. hi inclusive.
      if (lo > hi) {
        // Not found.  This can't happen.
        return (std::vector<Seg>::size_type)(-1);
      }
      long int mid = lo + ((hi - lo) / 2);
      uintptr_t mid_lo = mSegs[mid].lo;
      uintptr_t mid_hi = mSegs[mid].hi;
      if (a < mid_lo) {
        hi = mid - 1;
        continue;
      }
      if (a > mid_hi) {
        lo = mid + 1;
        continue;
      }
      return (std::vector<Seg>::size_type)mid;
    }
  }

  void split_at(uintptr_t a) {
    std::vector<Seg>::size_type i = find(a);
    if (mSegs[i].lo == a) {
      return;
    }
    mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
    mSegs[i].hi = a - 1;
    mSegs[i + 1].lo = a;
  }

  void show() {
    printf("<< %d entries:\n", (int)mSegs.size());
    for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
         ++iter) {
      printf("  %016llx  %016llx  %s\n", (unsigned long long int)(*iter).lo,
             (unsigned long long int)(*iter).hi,
             (*iter).val ? "true" : "false");
    }
    printf(">>\n");
  }

  std::vector<Seg> mSegs;
};

////////////////////////////////////////////////////////////////
// PriMap                                                     //
////////////////////////////////////////////////////////////////

class PriMap {
 public:
  explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}

  // RUNS IN NO-MALLOC CONTEXT
  pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
    SecMap* sm = FindSecMap(ia);
    return pair<const RuleSet*, const vector<PfxInstr>*>(
        sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
  }

  // Add a secondary map.  No overlaps allowed w.r.t. existing
  // secondary maps.
  void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
    // We can't add an empty SecMap to the PriMap.  But that's OK
    // since we'd never be able to find anything in it anyway.
    if (aSecMap->IsEmpty()) {
      return;
    }

    // Iterate through the SecMaps and find the right place for this
    // one.  At the same time, ensure that the in-order
    // non-overlapping invariant is preserved (and, generally, holds).
    // FIXME: this gives a cost that is O(N^2) in the total number of
    // shared objects in the system.  ToDo: better.
    MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);

    size_t num_secMaps = mSecMaps.size();
    uintptr_t i;
    for (i = 0; i < num_secMaps; ++i) {
      mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
      MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
      if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
        // |aSecMap| needs to be inserted immediately before mSecMaps[i].
        break;
      }
    }
    MOZ_ASSERT(i <= num_secMaps);
    if (i == num_secMaps) {
      // It goes at the end.
      mSecMaps.push_back(std::move(aSecMap));
    } else {
      std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
          mSecMaps.begin() + i;
      mSecMaps.insert(iter, std::move(aSecMap));
    }
    char buf[100];
    SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
                   (int)mSecMaps.size());
    buf[sizeof(buf) - 1] = 0;
    mLog(buf);
  }

  // Remove and delete any SecMaps in the mapping, that intersect
  // with the specified address range.
  void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
    MOZ_ASSERT(avma_min <= avma_max);
    size_t num_secMaps = mSecMaps.size();
    if (num_secMaps > 0) {
      intptr_t i;
      // Iterate from end to start over the vector, so as to ensure
      // that the special case where |avma_min| and |avma_max| denote
      // the entire address space, can be completed in time proportional
      // to the number of elements in the map.
      for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
        mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
        if (sm_i->mSummaryMaxAddr < avma_min ||
            avma_max < sm_i->mSummaryMinAddr) {
          // There's no overlap.  Move on.
          continue;
        }
        // We need to remove mSecMaps[i] and slide all those above it
        // downwards to cover the hole.
        mSecMaps.erase(mSecMaps.begin() + i);
      }
    }
  }

  // Return the number of currently contained SecMaps.
  size_t CountSecMaps() { return mSecMaps.size(); }

  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
    size_t n = aMallocSizeOf(this);

    // It's conceivable that this call would be unsafe with some
    // implementations of std::vector, but it seems to be working for now...
    n += aMallocSizeOf(mSecMaps.data());

    for (size_t i = 0; i < mSecMaps.size(); i++) {
      n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
    }

    return n;
  }

 private:
  // RUNS IN NO-MALLOC CONTEXT
  SecMap* FindSecMap(uintptr_t ia) {
    // Binary search mSecMaps to find one that brackets |ia|.
    // lo and hi need to be signed, else the loop termination tests
    // don't work properly.
    long int lo = 0;
    long int hi = (long int)mSecMaps.size() - 1;
    while (true) {
      // current unsearched space is from lo to hi, inclusive.
      if (lo > hi) {
        // not found
        return nullptr;
      }
      long int mid = lo + ((hi - lo) / 2);
      mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
      uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
      uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
      if (ia < mid_minAddr) {
        hi = mid - 1;
        continue;
      }
      if (ia > mid_maxAddr) {
        lo = mid + 1;
        continue;
      }
      MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
      return mid_secMap.get();
    }
    // NOTREACHED
  }

 private:
  // sorted array of per-object ranges, non overlapping, non empty
  std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;

  // a logging sink, for debugging.
  void (*mLog)(const char*);
};

////////////////////////////////////////////////////////////////
// LUL                                                        //
////////////////////////////////////////////////////////////////

#define LUL_LOG(_str)                                                          \
  do {                                                                         \
    char buf[200];                                                             \
    SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \
                   uint64_t(profiler_current_process_id().ToNumber()),         \
                   uint64_t(profiler_current_thread_id().ToNumber()), this,    \
                   (_str));                                                    \
    buf[sizeof(buf) - 1] = 0;                                                  \
    mLog(buf);                                                                 \
  } while (0)

LUL::LUL(void (*aLog)(const char*))
    : mLog(aLog),
      mAdminMode(true),
      mAdminThreadId(profiler_current_thread_id()),
      mPriMap(new PriMap(aLog)),
      mSegArray(new SegArray()),
      mUSU(new UniqueStringUniverse()) {
  LUL_LOG("LUL::LUL: Created object");
}

LUL::~LUL() {
  LUL_LOG("LUL::~LUL: Destroyed object");
  delete mPriMap;
  delete mSegArray;
  mLog = nullptr;
  delete mUSU;
}

void LUL::MaybeShowStats() {
  // This is racey in the sense that it can't guarantee that
  //   n_new == n_new_Context + n_new_CFI + n_new_Scanned
  // if it should happen that mStats is updated by some other thread
  // in between computation of n_new and n_new_{Context,CFI,FP}.
  // But it's just stats printing, so we don't really care.
  uint32_t n_new = mStats - mStatsPrevious;
  if (n_new >= 5000) {
    uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
    uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
    uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
    mStatsPrevious = mStats;
    char buf[200];
    SprintfLiteral(buf,
                   "LUL frame stats: TOTAL %5u"
                   "    CTX %4u    CFI %4u    FP %4u",
                   n_new, n_new_Context, n_new_CFI, n_new_FP);
    buf[sizeof(buf) - 1] = 0;
    mLog(buf);
  }
}

size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
  size_t n = aMallocSizeOf(this);
  n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);

  // Measurement of the following members may be added later if DMD finds it
  // is worthwhile:
  // - mSegArray
  // - mUSU

  return n;
}

void LUL::EnableUnwinding() {
  LUL_LOG("LUL::EnableUnwinding");
  // Don't assert for Admin mode here.  That is, tolerate a call here
  // if we are already in Unwinding mode.
  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  mAdminMode = false;
}

void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
                         const void* aMappedImage) {
  MOZ_RELEASE_ASSERT(mAdminMode);
  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  mLog(":\n");
  char buf[200];
  SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
                 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
                 aFileName);
  buf[sizeof(buf) - 1] = 0;
  mLog(buf);

  // Ignore obviously-stupid notifications.
  if (aSize > 0) {
    // Here's a new mapping, for this object.
    mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog);

    // Read CFI or EXIDX unwind data into |smap|.
    if (!aMappedImage) {
      (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
                                smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
    } else {
      (void)lul::ReadSymbolDataInternal(
          (const uint8_t*)aMappedImage, string(aFileName),
          std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
    }

    mLog("NotifyMap .. preparing entries\n");

    smap->PrepareRuleSets(aRXavma, aSize);

    SprintfLiteral(buf, "NotifyMap got %lld entries\n",
                   (long long int)smap->Size());
    buf[sizeof(buf) - 1] = 0;
    mLog(buf);

    // Add it to the primary map (the top level set of mapped objects).
    mPriMap->AddSecMap(std::move(smap));

    // Tell the segment array about the mapping, so that the stack
    // scan and __kernel_syscall mechanisms know where valid code is.
    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
  }
}

void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
  MOZ_RELEASE_ASSERT(mAdminMode);
  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  mLog(":\n");
  char buf[200];
  SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
                 (unsigned long long int)aRXavma,
                 (unsigned long long int)aSize);
  buf[sizeof(buf) - 1] = 0;
  mLog(buf);

  // Ignore obviously-stupid notifications.
  if (aSize > 0) {
    // Tell the segment array about the mapping, so that the stack
    // scan and __kernel_syscall mechanisms know where valid code is.
    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
  }
}

void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
  MOZ_RELEASE_ASSERT(mAdminMode);
  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  mLog(":\n");
  char buf[100];
  SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
                 (unsigned long long int)aRXavmaMin,
                 (unsigned long long int)aRXavmaMax);
  buf[sizeof(buf) - 1] = 0;
  mLog(buf);

  MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);

  // Remove from the primary map, any secondary maps that intersect
  // with the address range.  Also delete the secondary maps.
  mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);

  // Tell the segment array that the address range no longer
  // contains valid code.
  mSegArray->add(aRXavmaMin, aRXavmaMax, false);

  SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
                 (int)mPriMap->CountSecMaps());
  buf[sizeof(buf) - 1] = 0;
  mLog(buf);
}

size_t LUL::CountMappings() {
  MOZ_RELEASE_ASSERT(mAdminMode);
  MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);

  return mPriMap->CountSecMaps();
}

// RUNS IN NO-MALLOC CONTEXT
static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
  if (!aAddr.Valid()) {
    return TaggedUWord();
  }

  // Lower limit check.  |aAddr.Value()| is the lowest requested address
  // and |aStackImg->mStartAvma| is the lowest address we actually have,
  // so the comparison is straightforward.
  if (aAddr.Value() < aStackImg->mStartAvma) {
    return TaggedUWord();
  }

  // Upper limit check.  We must compute the highest requested address
  // and the highest address we actually have, but being careful to
  // avoid overflow.  In particular if |aAddr| is 0xFFF...FFF or the
  // 3/7 values below that, then we will get overflow.  See bug #1245477.
  typedef CheckedInt<uintptr_t> CheckedUWord;
  CheckedUWord highest_requested_plus_one =
      CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
  CheckedUWord highest_available_plus_one =
      CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
  if (!highest_requested_plus_one.isValid()     // overflow?
      || !highest_available_plus_one.isValid()  // overflow?
      || (highest_requested_plus_one.value() >
          highest_available_plus_one.value())) {  // in range?
    return TaggedUWord();
  }

  return TaggedUWord(
      *(uintptr_t*)(&aStackImg
                         ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
}

// RUNS IN NO-MALLOC CONTEXT
static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
                               TaggedUWord aCFA) {
  switch (aReg) {
    case DW_REG_CFA:
      return aCFA;
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
    case DW_REG_INTEL_XBP:
      return aOldRegs->xbp;
    case DW_REG_INTEL_XSP:
      return aOldRegs->xsp;
    case DW_REG_INTEL_XIP:
      return aOldRegs->xip;
#elif defined(GP_ARCH_arm)
    case DW_REG_ARM_R7:
      return aOldRegs->r7;
    case DW_REG_ARM_R11:
      return aOldRegs->r11;
    case DW_REG_ARM_R12:
      return aOldRegs->r12;
    case DW_REG_ARM_R13:
      return aOldRegs->r13;
    case DW_REG_ARM_R14:
      return aOldRegs->r14;
    case DW_REG_ARM_R15:
      return aOldRegs->r15;
#elif defined(GP_ARCH_arm64)
    case DW_REG_AARCH64_X29:
      return aOldRegs->x29;
    case DW_REG_AARCH64_X30:
      return aOldRegs->x30;
    case DW_REG_AARCH64_SP:
      return aOldRegs->sp;
#elif defined(GP_ARCH_mips64)
    case DW_REG_MIPS_SP:
      return aOldRegs->sp;
    case DW_REG_MIPS_FP:
      return aOldRegs->fp;
    case DW_REG_MIPS_PC:
      return aOldRegs->pc;
#else
#  error "Unsupported arch"
#endif
    default:
      MOZ_ASSERT(0);
      return TaggedUWord();
  }
}

// RUNS IN NO-MALLOC CONTEXT
// See prototype for comment.
TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
                            TaggedUWord aCFA, const StackImage* aStackImg,
                            const vector<PfxInstr>& aPfxInstrs) {
  // A small evaluation stack, and a stack pointer, which points to
  // the highest numbered in-use element.
  const int N_STACK = 10;
  TaggedUWord stack[N_STACK];
  int stackPointer = -1;
  for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();

#define PUSH(_tuw)                                             \
  do {                                                         \
    if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
    stack[++stackPointer] = (_tuw);                            \
  } while (0)

#define POP(_lval)                                   \
  do {                                               \
    if (stackPointer < 0) goto fail; /* underflow */ \
    _lval = stack[stackPointer--];                   \
  } while (0)

  // Cursor in the instruction sequence.
  size_t curr = start + 1;

  // Check the start point is sane.
  size_t nInstrs = aPfxInstrs.size();
  if (start < 0 || (size_t)start >= nInstrs) goto fail;

  {
    // The instruction sequence must start with PX_Start.  If not,
    // something is seriously wrong.
    PfxInstr first = aPfxInstrs[start];
    if (first.mOpcode != PX_Start) goto fail;

    // Push the CFA on the stack to start with (or not), as required by
    // the original DW_OP_*expression* CFI.
    if (first.mOperand != 0) PUSH(aCFA);
  }

  while (true) {
    if (curr >= nInstrs) goto fail;  // ran off the end of the sequence

    PfxInstr pfxi = aPfxInstrs[curr++];
    if (pfxi.mOpcode == PX_End) break;  // we're done

    switch (pfxi.mOpcode) {
      case PX_Start:
        // This should appear only at the start of the sequence.
        goto fail;
      case PX_End:
        // We just took care of that, so we shouldn't see it again.
        MOZ_ASSERT(0);
        goto fail;
      case PX_SImm32:
        PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
        break;
      case PX_DwReg: {
        DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
        MOZ_ASSERT(reg != DW_REG_CFA);
        PUSH(EvaluateReg(reg, aOldRegs, aCFA));
        break;
      }
      case PX_Deref: {
        TaggedUWord addr;
        POP(addr);
        PUSH(DerefTUW(addr, aStackImg));
        break;
      }
      case PX_Add: {
        TaggedUWord x, y;
        POP(x);
        POP(y);
        PUSH(y + x);
        break;
      }
      case PX_Sub: {
        TaggedUWord x, y;
        POP(x);
        POP(y);
        PUSH(y - x);
        break;
      }
      case PX_And: {
        TaggedUWord x, y;
        POP(x);
        POP(y);
        PUSH(y & x);
        break;
      }
      case PX_Or: {
        TaggedUWord x, y;
        POP(x);
        POP(y);
        PUSH(y | x);
        break;
      }
      case PX_CmpGES: {
        TaggedUWord x, y;
        POP(x);
        POP(y);
        PUSH(y.CmpGEs(x));
        break;
      }
      case PX_Shl: {
        TaggedUWord x, y;
        POP(x);
        POP(y);
        PUSH(y << x);
        break;
      }
      default:
        MOZ_ASSERT(0);
        goto fail;
    }
  }  // while (true)

  // Evaluation finished.  The top value on the stack is the result.
  if (stackPointer >= 0) {
    return stack[stackPointer];
  }
  // Else fall through

fail:
  return TaggedUWord();

#undef PUSH
#undef POP
}

// RUNS IN NO-MALLOC CONTEXT
TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
                                const StackImage* aStackImg,
                                const vector<PfxInstr>* aPfxInstrs) const {
  switch (mHow) {
    case UNKNOWN:
      return TaggedUWord();
    case NODEREF: {
      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
      tuw = tuw + TaggedUWord((intptr_t)mOffset);
      return tuw;
    }
    case DEREF: {
      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
      tuw = tuw + TaggedUWord((intptr_t)mOffset);
      return DerefTUW(tuw, aStackImg);
    }
    case PFXEXPR: {
      MOZ_ASSERT(aPfxInstrs);
      if (!aPfxInstrs) {
        return TaggedUWord();
      }
      return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
    }
    default:
      MOZ_ASSERT(0);
      return TaggedUWord();
  }
}

// RUNS IN NO-MALLOC CONTEXT
static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
                       const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
  // Take a copy of regs, since we'll need to refer to the old values
  // whilst computing the new ones.
  UnwindRegs old_regs = *aRegs;

  // Mark all the current register values as invalid, so that the
  // caller can see, on our return, which ones have been computed
  // anew.  If we don't even manage to compute a new PC value, then
  // the caller will have to abandon the unwind.
  // FIXME: Create and use instead: aRegs->SetAllInvalid();
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
  aRegs->xbp = TaggedUWord();
  aRegs->xsp = TaggedUWord();
  aRegs->xip = TaggedUWord();
#elif defined(GP_ARCH_arm)
  aRegs->r7 = TaggedUWord();
  aRegs->r11 = TaggedUWord();
  aRegs->r12 = TaggedUWord();
  aRegs->r13 = TaggedUWord();
  aRegs->r14 = TaggedUWord();
  aRegs->r15 = TaggedUWord();
#elif defined(GP_ARCH_arm64)
  aRegs->x29 = TaggedUWord();
  aRegs->x30 = TaggedUWord();
  aRegs->sp = TaggedUWord();
  aRegs->pc = TaggedUWord();
#elif defined(GP_ARCH_mips64)
  aRegs->sp = TaggedUWord();
  aRegs->fp = TaggedUWord();
  aRegs->pc = TaggedUWord();
#else
#  error "Unsupported arch"
#endif

  // This is generally useful.
  const TaggedUWord inval = TaggedUWord();

  // First, compute the CFA.
  TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
                                               aStackImg, aPfxInstrs);

  // If we didn't manage to compute the CFA, well .. that's ungood,
  // but keep going anyway.  It'll be OK provided none of the register
  // value rules mention the CFA.  In any case, compute the new values
  // for each register that we're tracking.

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
  aRegs->xbp =
      aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->xsp =
      aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->xip =
      aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
#elif defined(GP_ARCH_arm)
  aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->r11 =
      aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->r12 =
      aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->r13 =
      aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->r14 =
      aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->r15 =
      aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
#elif defined(GP_ARCH_arm64)
  aRegs->x29 =
      aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->x30 =
      aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
#elif defined(GP_ARCH_mips64)
  aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
  aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
#else
#  error "Unsupported arch"
#endif

  // We're done.  Any regs for which we didn't manage to compute a
  // new value will now be marked as invalid.
}

// RUNS IN NO-MALLOC CONTEXT
void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
                 /*OUT*/ uintptr_t* aFrameSPs,
                 /*OUT*/ size_t* aFramesUsed,
                 /*OUT*/ size_t* aFramePointerFramesAcquired,
                 size_t aFramesAvail, UnwindRegs* aStartRegs,
                 StackImage* aStackImg) {
  MOZ_RELEASE_ASSERT(!mAdminMode);

  /////////////////////////////////////////////////////////
  // BEGIN UNWIND

  *aFramesUsed = 0;

  UnwindRegs regs = *aStartRegs;
  TaggedUWord last_valid_sp = TaggedUWord();

  while (true) {
    if (DEBUG_MAIN) {
      char buf[300];
      mLog("\n");
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
      SprintfLiteral(
          buf, "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
          (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
          (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
          (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
      buf[sizeof(buf) - 1] = 0;
      mLog(buf);
#elif defined(GP_ARCH_arm)
      SprintfLiteral(
          buf,
          "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
          "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
          (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
          (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
          (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
          (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
          (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
          (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
      buf[sizeof(buf) - 1] = 0;
      mLog(buf);
#elif defined(GP_ARCH_arm64)
      SprintfLiteral(
          buf,
          "LoopTop: pc %d/%llx  x29 %d/%llx  x30 %d/%llx"
          "  sp %d/%llx\n",
          (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
          (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
          (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
          (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
      buf[sizeof(buf) - 1] = 0;
      mLog(buf);
#elif defined(GP_ARCH_mips64)
      SprintfLiteral(
          buf, "LoopTop: pc %d/%llx  sp %d/%llx  fp %d/%llx\n",
          (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
          (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
          (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
      buf[sizeof(buf) - 1] = 0;
      mLog(buf);
#else
#  error "Unsupported arch"
#endif
    }

#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
    TaggedUWord ia = regs.xip;
    TaggedUWord sp = regs.xsp;
#elif defined(GP_ARCH_arm)
    TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
    TaggedUWord sp = regs.r13;
#elif defined(GP_ARCH_arm64)
    TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
    TaggedUWord sp = regs.sp;
#elif defined(GP_ARCH_mips64)
    TaggedUWord ia = regs.pc;
    TaggedUWord sp = regs.sp;
#else
#  error "Unsupported arch"
#endif

    if (*aFramesUsed >= aFramesAvail) {
      break;
    }

    // If we don't have a valid value for the PC, give up.
    if (!ia.Valid()) {
      break;
    }

    // If this is the innermost frame, record the SP value, which
    // presumably is valid.  If this isn't the innermost frame, and we
    // have a valid SP value, check that its SP value isn't less that
    // the one we've seen so far, so as to catch potential SP value
    // cycles.
    if (*aFramesUsed == 0) {
      last_valid_sp = sp;
    } else {
      MOZ_ASSERT(last_valid_sp.Valid());
      if (sp.Valid()) {
        if (sp.Value() < last_valid_sp.Value()) {
          // Hmm, SP going in the wrong direction.  Let's stop.
          break;
        }
        // Remember where we got to.
        last_valid_sp = sp;
      }
    }

    // For the innermost frame, the IA value is what we need.  For all
    // other frames, it's actually the return address, so back up one
    // byte so as to get it into the calling instruction.
    aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
    aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
    (*aFramesUsed)++;

    // Find the RuleSet for the current IA, if any.  This will also
    // query the backing (secondary) maps if it isn't found in the
    // thread-local cache.

    // If this isn't the innermost frame, back up into the calling insn.
    if (*aFramesUsed > 1) {
      ia = ia + TaggedUWord((uintptr_t)(-1));
    }

    pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
        mPriMap->Lookup(ia.Value());
    const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
    const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;

    if (DEBUG_MAIN) {
      char buf[100];
      SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
                     (unsigned long long int)ia.Value(), ruleset);
      buf[sizeof(buf) - 1] = 0;
      mLog(buf);
    }

#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
    /////////////////////////////////////////////
    ////
    // On 32 bit x86-linux, syscalls are often done via the VDSO
    // function __kernel_vsyscall, which doesn't have a corresponding
    // object that we can read debuginfo from.  That effectively kills
    // off all stack traces for threads blocked in syscalls.  Hence
    // special-case by looking at the code surrounding the program
    // counter.
    //
    // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx
    // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx
    // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp
    // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp
    // 0xf7757425 <__kernel_vsyscall+5>:	sysenter
    // 0xf7757427 <__kernel_vsyscall+7>:	nop
    // 0xf7757428 <__kernel_vsyscall+8>:	nop
    // 0xf7757429 <__kernel_vsyscall+9>:	nop
    // 0xf775742a <__kernel_vsyscall+10>:	nop
    // 0xf775742b <__kernel_vsyscall+11>:	nop
    // 0xf775742c <__kernel_vsyscall+12>:	nop
    // 0xf775742d <__kernel_vsyscall+13>:	nop
    // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80
    // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp
    // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx
    // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx
    // 0xf7757433 <__kernel_vsyscall+19>:	ret
    //
    // In cases where the sampled thread is blocked in a syscall, its
    // program counter will point at "pop %ebp".  Hence we look for
    // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
    // the corresponding register-recovery actions are:
    //    new_ebp = *(old_esp + 0)
    //    new eip = *(old_esp + 12)
    //    new_esp = old_esp + 16
    //
    // It may also be the case that the program counter points two
    // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
    // the case where the syscall has been restarted but the thread
    // hasn't been rescheduled.  The code below doesn't handle that;
    // it could easily be made to.
    //
    if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
      uintptr_t insns_min, insns_max;
      uintptr_t eip = ia.Value();
      bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
      if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
        uint8_t* eipC = (uint8_t*)eip;
        if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
            eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
          TaggedUWord sp_plus_0 = sp;
          TaggedUWord sp_plus_12 = sp;
          TaggedUWord sp_plus_16 = sp;
          sp_plus_12 = sp_plus_12 + TaggedUWord(12);
          sp_plus_16 = sp_plus_16 + TaggedUWord(16);
          TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
          TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
          TaggedUWord new_esp = sp_plus_16;
          if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
            regs.xbp = new_ebp;
            regs.xip = new_eip;
            regs.xsp = new_esp;
            continue;
          }
        }
      }
    }
    ////
    /////////////////////////////////////////////
#endif  // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)

    // So, do we have a ruleset for this address?  If so, use it now.
    if (ruleset) {
      if (DEBUG_MAIN) {
        ruleset->Print(mLog);
        mLog("\n");
      }
      // Use the RuleSet to compute the registers for the previous
      // frame.  |regs| is modified in-place.
      UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
      continue;
    }

#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||     \
    defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
    defined(GP_PLAT_amd64_freebsd)
    // There's no RuleSet for the specified address.  On amd64/x86_linux, see if
    // it's possible to recover the caller's frame by using the frame pointer.

    // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
    // and assume the following layout:
    //
    //                 <--- new_SP
    //   +----------+
    //   |  new_IP  |  (return address)
    //   +----------+
    //   |  new_BP  |  <--- old_BP
    //   +----------+
    //   |   ....   |
    //   |   ....   |
    //   |   ....   |
    //   +----------+  <---- old_SP (arbitrary, but must be <= old_BP)

    const size_t wordSzB = sizeof(uintptr_t);
    TaggedUWord old_xsp = regs.xsp;

    // points at new_BP ?
    TaggedUWord old_xbp = regs.xbp;
    // points at new_IP ?
    TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
    // is the new_SP ?
    TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);

    if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
        old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
      // We don't need to do any range, alignment or validity checks for
      // addresses passed to DerefTUW, since that performs them itself, and
      // returns an invalid value on failure.  Any such value will poison
      // subsequent uses, and we do a final check for validity before putting
      // the computed values into |regs|.
      TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
      if (new_xbp.Valid() && new_xbp.IsAligned() &&
          old_xbp.Value() < new_xbp.Value()) {
        TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
        TaggedUWord new_xsp = old_xbp_plus2;
        if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
          regs.xbp = new_xbp;
          regs.xip = new_xip;
          regs.xsp = new_xsp;
          (*aFramePointerFramesAcquired)++;
          continue;
        }
      }
    }
#elif defined(GP_ARCH_arm64)
    // Here is an example of generated code for prologue and epilogue..
    //
    // stp     x29, x30, [sp, #-16]!
    // mov     x29, sp
    // ...
    // ldp     x29, x30, [sp], #16
    // ret
    //
    // Next is another example of generated code.
    //
    // stp     x20, x19, [sp, #-32]!
    // stp     x29, x30, [sp, #16]
    // add     x29, sp, #0x10
    // ...
    // ldp     x29, x30, [sp, #16]
    // ldp     x20, x19, [sp], #32
    // ret
    //
    // Previous x29 and x30 register are stored in the address of x29 register.
    // But since sp register value depends on local variables, we cannot compute
    // previous sp register from current sp/fp/lr register and there is no
    // regular rule for sp register in prologue. But since return address is lr
    // register, if x29 is valid, we will get return address without sp
    // register.
    //
    // So we assume the following layout that if no rule set. x29 is frame
    // pointer, so we will be able to compute x29 and x30 .
    //
    //   +----------+  <--- new_sp (cannot compute)
    //   |   ....   |
    //   +----------+
    //   |  new_lr  |  (return address)
    //   +----------+
    //   |  new_fp  |  <--- old_fp
    //   +----------+
    //   |   ....   |
    //   |   ....   |
    //   +----------+  <---- old_sp (arbitrary, but unused)

    TaggedUWord old_fp = regs.x29;
    if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
        last_valid_sp.Value() <= old_fp.Value()) {
      TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
      if (new_fp.Valid() && new_fp.IsAligned() &&
          old_fp.Value() < new_fp.Value()) {
        TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
        TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
        if (new_lr.Valid()) {
          regs.x29 = new_fp;
          regs.x30 = new_lr;
          // When using frame pointer to walk stack, we cannot compute sp
          // register since we cannot compute sp register from fp/lr/sp
          // register, and there is no regular rule to compute previous sp
          // register. So mark as invalid.
          regs.sp = TaggedUWord();
          (*aFramePointerFramesAcquired)++;
          continue;
        }
      }
    }
#endif  // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
        // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android)

    // We failed to recover a frame either using CFI or FP chasing, and we
    // have no other ways to recover the frame.  So we have to give up.
    break;

  }  // top level unwind loop

  // END UNWIND
  /////////////////////////////////////////////////////////
}

////////////////////////////////////////////////////////////////
// LUL Unit Testing                                           //
////////////////////////////////////////////////////////////////

static const int LUL_UNIT_TEST_STACK_SIZE = 32768;

#if defined(GP_ARCH_mips64)
static __attribute__((noinline)) unsigned long __getpc(void) {
  unsigned long rtaddr;
  __asm__ volatile("move %0, $31" : "=r"(rtaddr));
  return rtaddr;
}
#endif

// This function is innermost in the test call sequence.  It uses LUL
// to unwind, and compares the result with the sequence specified in
// the director string.  These need to agree in order for the test to
// pass.  In order not to screw up the results, this function needs
// to have a not-very big stack frame, since we're only presenting
// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
// that chunk unavoidably includes the frame for this function.
//
// This function must not be inlined into its callers.  Doing so will
// cause the expected-vs-actual backtrace consistency checking to
// fail.  Prints summary results to |aLUL|'s logging sink and also
// returns a boolean indicating whether or not the test passed.
static __attribute__((noinline)) bool GetAndCheckStackTrace(
    LUL* aLUL, const char* dstring) {
  // Get hold of the current unwind-start registers.
  UnwindRegs startRegs;
  memset(&startRegs, 0, sizeof(startRegs));
#if defined(GP_ARCH_amd64)
  volatile uintptr_t block[3];
  MOZ_ASSERT(sizeof(block) == 24);
  __asm__ __volatile__(
      "leaq 0(%%rip), %%r15"
      "\n\t"
      "movq %%r15, 0(%0)"
      "\n\t"
      "movq %%rsp, 8(%0)"
      "\n\t"
      "movq %%rbp, 16(%0)"
      "\n"
      :
      : "r"(&block[0])
      : "memory", "r15");
  startRegs.xip = TaggedUWord(block[0]);
  startRegs.xsp = TaggedUWord(block[1]);
  startRegs.xbp = TaggedUWord(block[2]);
  const uintptr_t REDZONE_SIZE = 128;
  uintptr_t start = block[1] - REDZONE_SIZE;
#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
  volatile uintptr_t block[3];
  MOZ_ASSERT(sizeof(block) == 12);
  __asm__ __volatile__(
      ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
      "\n\t"
      "popl %%edi"
      "\n\t"
      "movl %%edi, 0(%0)"
      "\n\t"
      "movl %%esp, 4(%0)"
      "\n\t"
      "movl %%ebp, 8(%0)"
      "\n"
      :
      : "r"(&block[0])
      : "memory", "edi");
  startRegs.xip = TaggedUWord(block[0]);
  startRegs.xsp = TaggedUWord(block[1]);
  startRegs.xbp = TaggedUWord(block[2]);
  const uintptr_t REDZONE_SIZE = 0;
  uintptr_t start = block[1] - REDZONE_SIZE;
#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
  volatile uintptr_t block[6];
  MOZ_ASSERT(sizeof(block) == 24);
  __asm__ __volatile__(
      "mov r0, r15"
      "\n\t"
      "str r0,  [%0, #0]"
      "\n\t"
      "str r14, [%0, #4]"
      "\n\t"
      "str r13, [%0, #8]"
      "\n\t"
      "str r12, [%0, #12]"
      "\n\t"
      "str r11, [%0, #16]"
      "\n\t"
      "str r7,  [%0, #20]"
      "\n"
      :
      : "r"(&block[0])
      : "memory", "r0");
  startRegs.r15 = TaggedUWord(block[0]);
  startRegs.r14 = TaggedUWord(block[1]);
  startRegs.r13 = TaggedUWord(block[2]);
  startRegs.r12 = TaggedUWord(block[3]);
  startRegs.r11 = TaggedUWord(block[4]);
  startRegs.r7 = TaggedUWord(block[5]);
  const uintptr_t REDZONE_SIZE = 0;
  uintptr_t start = block[1] - REDZONE_SIZE;
#elif defined(GP_ARCH_arm64)
  volatile uintptr_t block[4];
  MOZ_ASSERT(sizeof(block) == 32);
  __asm__ __volatile__(
      "adr x0, . \n\t"
      "str x0, [%0, #0] \n\t"
      "str x29, [%0, #8] \n\t"
      "str x30, [%0, #16] \n\t"
      "mov x0, sp \n\t"
      "str x0, [%0, #24] \n\t"
      :
      : "r"(&block[0])
      : "memory", "x0");
  startRegs.pc = TaggedUWord(block[0]);
  startRegs.x29 = TaggedUWord(block[1]);
  startRegs.x30 = TaggedUWord(block[2]);
  startRegs.sp = TaggedUWord(block[3]);
  const uintptr_t REDZONE_SIZE = 0;
  uintptr_t start = block[1] - REDZONE_SIZE;
#elif defined(GP_ARCH_mips64)
  volatile uintptr_t block[3];
  MOZ_ASSERT(sizeof(block) == 24);
  __asm__ __volatile__(
      "sd $29, 8(%0)     \n"
      "sd $30, 16(%0)    \n"
      :
      : "r"(block)
      : "memory");
  block[0] = __getpc();
  startRegs.pc = TaggedUWord(block[0]);
  startRegs.sp = TaggedUWord(block[1]);
  startRegs.fp = TaggedUWord(block[2]);
  const uintptr_t REDZONE_SIZE = 0;
  uintptr_t start = block[1] - REDZONE_SIZE;
#else
#  error "Unsupported platform"
#endif

  // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
  // stack.
  uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
  uintptr_t ws = sizeof(void*);
  start &= ~(ws - 1);
  end &= ~(ws - 1);
  uintptr_t nToCopy = end - start;
  if (nToCopy > lul::N_STACK_BYTES) {
    nToCopy = lul::N_STACK_BYTES;
  }
  MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
  StackImage* stackImg = new StackImage();
  stackImg->mLen = nToCopy;
  stackImg->mStartAvma = start;
  if (nToCopy > 0) {
    MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
    memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
  }

  // Unwind it.
  const int MAX_TEST_FRAMES = 64;
  uintptr_t framePCs[MAX_TEST_FRAMES];
  uintptr_t frameSPs[MAX_TEST_FRAMES];
  size_t framesAvail = mozilla::ArrayLength(framePCs);
  size_t framesUsed = 0;
  size_t framePointerFramesAcquired = 0;
  aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
               &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);

  delete stackImg;

  // if (0) {
  //  // Show what we have.
  //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
  //  for (size_t i = 0; i < framesUsed; i++) {
  //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
  //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
  //  }
  //  fprintf(stderr, "\n");
  //}

  // Check to see if there's a consistent binding between digits in
  // the director string ('1' .. '8') and the PC values acquired by
  // the unwind.  If there isn't, the unwinding has failed somehow.
  uintptr_t binding[8];  // binding for '1' .. binding for '8'
  memset((void*)binding, 0, sizeof(binding));

  // The general plan is to work backwards along the director string
  // and forwards along the framePCs array.  Doing so corresponds to
  // working outwards from the innermost frame of the recursive test set.
  const char* cursor = dstring;

  // Find the end.  This leaves |cursor| two bytes past the first
  // character we want to look at -- see comment below.
  while (*cursor) cursor++;

  // Counts the number of consistent frames.
  size_t nConsistent = 0;

  // Iterate back to the start of the director string.  The starting
  // points are a bit complex.  We can't use framePCs[0] because that
  // contains the PC in this frame (above).  We can't use framePCs[1]
  // because that will contain the PC at return point in the recursive
  // test group (TestFn[1-8]) for their call "out" to this function,
  // GetAndCheckStackTrace.  Although LUL will compute a correct
  // return address, that will not be the same return address as for a
  // recursive call out of the the function to another function in the
  // group.  Hence we can only start consistency checking at
  // framePCs[2].
  //
  // To be consistent, then, we must ignore the last element in the
  // director string as that corresponds to framePCs[1].  Hence the
  // start points are: framePCs[2] and the director string 2 bytes
  // before the terminating zero.
  //
  // Also as a result of this, the number of consistent frames counted
  // will always be one less than the length of the director string
  // (not including its terminating zero).
  size_t frameIx;
  for (cursor = cursor - 2, frameIx = 2;
       cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
    char c = *cursor;
    uintptr_t pc = framePCs[frameIx];
    // If this doesn't hold, the director string is ill-formed.
    MOZ_ASSERT(c >= '1' && c <= '8');
    int n = ((int)c) - ((int)'1');
    if (binding[n] == 0) {
      // There's no binding for |c| yet, so install |pc| and carry on.
      binding[n] = pc;
      nConsistent++;
      continue;
    }
    // There's a pre-existing binding for |c|.  Check it's consistent.
    if (binding[n] != pc) {
      // Not consistent.  Give up now.
      break;
    }
    // Consistent.  Keep going.
    nConsistent++;
  }

  // So, did we succeed?
  bool passed = nConsistent + 1 == strlen(dstring);

  // Show the results.
  char buf[200];
  SprintfLiteral(buf, "LULUnitTest:   dstring = %s\n", dstring);
  buf[sizeof(buf) - 1] = 0;
  aLUL->mLog(buf);
  SprintfLiteral(buf, "LULUnitTest:     %d consistent, %d in dstring: %s\n",
                 (int)nConsistent, (int)strlen(dstring),
                 passed ? "PASS" : "FAIL");
  buf[sizeof(buf) - 1] = 0;
  aLUL->mLog(buf);

  return passed;
}

// Macro magic to create a set of 8 mutually recursive functions with
// varying frame sizes.  These will recurse amongst themselves as
// specified by |strP|, the directory string, and call
// GetAndCheckStackTrace when the string becomes empty, passing it the
// original value of the string.  This checks the result, printing
// results on |aLUL|'s logging sink, and also returns a boolean
// indicating whether or not the results are acceptable (correct).

#define DECL_TEST_FN(NAME) \
  bool NAME(LUL* aLUL, const char* strPorig, const char* strP);

#define GEN_TEST_FN(NAME, FRAMESIZE)                                          \
  bool NAME(LUL* aLUL, const char* strPorig, const char* strP) {              \
    /* Create a frame of size (at least) FRAMESIZE, so that the */            \
    /* 8 functions created by this macro offer some variation in frame */     \
    /* sizes.  This isn't as simple as it might seem, since a clever */       \
    /* optimizing compiler (eg, clang-5) detects that the array is unused */  \
    /* and removes it.  We try to defeat this by passing it to a function */  \
    /* in a different compilation unit, and hoping that clang does not */     \
    /* notice that the call is a no-op. */                                    \
    char space[FRAMESIZE];                                                    \
    Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
                                                                              \
    if (*strP == '\0') {                                                      \
      /* We've come to the end of the director string. */                     \
      /* Take a stack snapshot. */                                            \
      return GetAndCheckStackTrace(aLUL, strPorig);                           \
    } else {                                                                  \
      /* Recurse onwards.  This is a bit subtle.  The obvious */              \
      /* thing to do here is call onwards directly, from within the */        \
      /* arms of the case statement.  That gives a problem in that */         \
      /* there will be multiple return points inside each function when */    \
      /* unwinding, so it will be difficult to check for consistency */       \
      /* against the director string.  Instead, we make an indirect */        \
      /* call, so as to guarantee that there is only one call site */         \
      /* within each function.  This does assume that the compiler */         \
      /* won't transform it back to the simple direct-call form. */           \
      /* To discourage it from doing so, the call is bracketed with */        \
      /* __asm__ __volatile__ sections so as to make it not-movable. */       \
      bool (*nextFn)(LUL*, const char*, const char*) = NULL;                  \
      switch (*strP) {                                                        \
        case '1':                                                             \
          nextFn = TestFn1;                                                   \
          break;                                                              \
        case '2':                                                             \
          nextFn = TestFn2;                                                   \
          break;                                                              \
        case '3':                                                             \
          nextFn = TestFn3;                                                   \
          break;                                                              \
        case '4':                                                             \
          nextFn = TestFn4;                                                   \
          break;                                                              \
        case '5':                                                             \
          nextFn = TestFn5;                                                   \
          break;                                                              \
        case '6':                                                             \
          nextFn = TestFn6;                                                   \
          break;                                                              \
        case '7':                                                             \
          nextFn = TestFn7;                                                   \
          break;                                                              \
        case '8':                                                             \
          nextFn = TestFn8;                                                   \
          break;                                                              \
        default:                                                              \
          nextFn = TestFn8;                                                   \
          break;                                                              \
      }                                                                       \
      /* "use" |space| immediately after the recursive call, */               \
      /* so as to dissuade clang from deallocating the space while */         \
      /* the call is active, or otherwise messing with the stack frame. */    \
      __asm__ __volatile__("" ::: "cc", "memory");                            \
      bool passed = nextFn(aLUL, strPorig, strP + 1);                         \
      Unused << write(1, space, 0);                                           \
      __asm__ __volatile__("" ::: "cc", "memory");                            \
      return passed;                                                          \
    }                                                                         \
  }

// The test functions are mutually recursive, so it is necessary to
// declare them before defining them.
DECL_TEST_FN(TestFn1)
DECL_TEST_FN(TestFn2)
DECL_TEST_FN(TestFn3)
DECL_TEST_FN(TestFn4)
DECL_TEST_FN(TestFn5)
DECL_TEST_FN(TestFn6)
DECL_TEST_FN(TestFn7)
DECL_TEST_FN(TestFn8)

GEN_TEST_FN(TestFn1, 123)
GEN_TEST_FN(TestFn2, 456)
GEN_TEST_FN(TestFn3, 789)
GEN_TEST_FN(TestFn4, 23)
GEN_TEST_FN(TestFn5, 47)
GEN_TEST_FN(TestFn6, 117)
GEN_TEST_FN(TestFn7, 1)
GEN_TEST_FN(TestFn8, 99)

// This starts the test sequence going.  Call here to generate a
// sequence of calls as directed by the string |dstring|.  The call
// sequence will, from its innermost frame, finish by calling
// GetAndCheckStackTrace() and passing it |dstring|.
// GetAndCheckStackTrace() will unwind the stack, check consistency
// of those results against |dstring|, and print a pass/fail message
// to aLUL's logging sink.  It also updates the counters in *aNTests
// and aNTestsPassed.
__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
                                       /*OUT*/ int* aNTestsPassed, LUL* aLUL,
                                       const char* dstring) {
  // Ensure that the stack has at least this much space on it.  This
  // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
  // and hand it to LUL.  Safe in the sense that no segfault can
  // happen because the stack is at least this big.  This is all
  // somewhat dubious in the sense that a sufficiently clever compiler
  // (clang, for one) can figure out that space[] is unused and delete
  // it from the frame.  Hence the somewhat elaborate hoop jumping to
  // fill it up before the call and to at least appear to use the
  // value afterwards.
  int i;
  volatile char space[LUL_UNIT_TEST_STACK_SIZE];
  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
    space[i] = (char)(i & 0x7F);
  }

  // Really run the test.
  bool passed = TestFn1(aLUL, dstring, dstring);

  // Appear to use space[], by visiting the value to compute some kind
  // of checksum, and then (apparently) using the checksum.
  int sum = 0;
  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
    // If this doesn't fool LLVM, I don't know what will.
    sum += space[i] - 3 * i;
  }
  __asm__ __volatile__("" : : "r"(sum));

  // Update the counters.
  (*aNTests)++;
  if (passed) {
    (*aNTestsPassed)++;
  }
}

void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
                     LUL* aLUL) {
  aLUL->mLog(":\n");
  aLUL->mLog("LULUnitTest: BEGIN\n");
  *aNTests = *aNTestsPassed = 0;
  TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
  TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
  TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
  TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
  TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
  TestUnw(aNTests, aNTestsPassed, aLUL,
          "123456781122334455667788777777777777777777777");
  aLUL->mLog("LULUnitTest: END\n");
  aLUL->mLog(":\n");
}

}  // namespace lul