mozglue/baseprofiler/public/leb128iterator.h
author James Teh <jteh@mozilla.com>
Wed, 01 Feb 2023 05:02:01 +0000
changeset 651150 dd0fdd1daa69783be36acd5c50544f3694eaa8f9
parent 522221 8a9798def63e3284728a4a604d6981eee6272353
permissions -rw-r--r--
Bug 1813980: Check IsDoc before Parent in RemoteAccessibleBase::ApplyCrossDocOffset. r=morgan We call this function on every ancestor when calculating bounds. RemoteParent() currently requires a hash lookup, so it's more efficient to early return for !IsDoc() first. This is a micro-optimisation, but it might have some impact given that we call this on every ancestor, especially when hit testing, where we call Bounds() a lot. As a bit of drive-by cleanup, use RemoteParent() rather than calling Parent() and IsRemote/AsRemote(). Differential Revision: https://phabricator.services.mozilla.com/D168346

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// LEB128 utilities that can read/write unsigned LEB128 numbers from/to
// iterators.
//
// LEB128 = Little Endian Base 128, where small numbers take few bytes, but
// large numbers are still allowed, which is ideal when serializing numbers that
// are likely to be small.
// Each byte contains 7 bits from the number, starting at the "little end", the
// top bit is 0 for the last byte, 1 otherwise.
// Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc.
//
// Iterators only need to provide:
// - `*it` to return a reference to the next byte to be read from or written to.
// - `++it` to advance the iterator after a byte is written.
//
// The caller must always provide sufficient space to write any number, by:
// - pre-allocating a large enough buffer, or
// - allocating more space when `++it` reaches the end and/or `*it` is invoked
//   after the end, or
// - moving the underlying pointer to an appropriate location (e.g., wrapping
//   around a circular buffer).
// The caller must also provide enough bytes to read a full value (i.e., at
// least one byte should have its top bit unset), and a type large enough to
// hold the stored value.
//
// Note: There are insufficient checks for validity! These functions are
// intended to be used together, i.e., the user should only `ReadULEB128()` from
// a sufficiently-large buffer that the same user filled with `WriteULEB128()`.
// Using with externally-sourced data (e.g., DWARF) is *not* recommended.
//
// https://en.wikipedia.org/wiki/LEB128

#ifndef leb128iterator_h
#define leb128iterator_h

#include "mozilla/Assertions.h"
#include "mozilla/Likely.h"

#include <climits>
#include <cstdint>
#include <limits>
#include <type_traits>

namespace mozilla {

// Number of bytes needed to represent `aValue`.
template <typename T>
constexpr uint_fast8_t ULEB128Size(T aValue) {
  static_assert(!std::numeric_limits<T>::is_signed,
                "ULEB128Size only takes unsigned types");
  // We need one output byte per 7 bits of non-zero value. So we just remove
  // 7 least significant bits at a time until the value becomes zero.
  // Note the special case of 0, which still needs 1 output byte; this is done
  // by starting the first loop before we check for 0.
  uint_fast8_t size = 0;
  for (;;) {
    size += 1;
    aValue >>= 7;
    // Expecting small values, so it should be more likely that `aValue == 0`.
    if (MOZ_LIKELY(aValue == 0)) {
      return size;
    }
  }
}

// Maximum number of bytes needed to represent any value of type `T`.
template <typename T>
constexpr uint_fast8_t ULEB128MaxSize() {
  return ULEB128Size<T>(std::numeric_limits<T>::max());
}

// Write `aValue` in LEB128 to `aIterator`.
// The iterator will be moved past the last byte.
template <typename T, typename It>
void WriteULEB128(T aValue, It& aIterator) {
  static_assert(!std::numeric_limits<T>::is_signed,
                "WriteULEB128 only takes unsigned types");
  using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
  static_assert(sizeof(IteratorValue) == 1,
                "WriteULEB128 expects an iterator to single bytes");
  // 0. Don't test for 0 yet, as we want to output one byte for it.
  for (;;) {
    // 1. Extract the 7 least significant bits.
    const uint_fast8_t byte = aValue & 0x7Fu;
    // 2. Remove them from `aValue`.
    aValue >>= 7;
    // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet
    // (meaning there will be more bytes after this one.)
    // Expecting small values, so it should be more likely that `aValue == 0`.
    // Note: No absolute need to force-cast to IteratorValue, because we have
    // only changed the bottom 8 bits above. However the compiler could warn
    // about a narrowing conversion from potentially-multibyte uint_fast8_t down
    // to whatever single-byte type `*iterator* expects, so we make it explicit.
    *aIterator = static_cast<IteratorValue>(
        MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u));
    // 4. Always advance the iterator to the next byte.
    ++aIterator;
    // 5. We're done if `aValue` is 0.
    // Expecting small values, so it should be more likely that `aValue == 0`.
    if (MOZ_LIKELY(aValue == 0)) {
      return;
    }
  }
}

// Read an LEB128 value from `aIterator`.
// The iterator will be moved past the last byte.
template <typename T, typename It>
T ReadULEB128(It& aIterator) {
  static_assert(!std::numeric_limits<T>::is_signed,
                "ReadULEB128 must return an unsigned type");
  using IteratorValue = std::remove_reference_t<decltype(*aIterator)>;
  static_assert(sizeof(IteratorValue) == 1,
                "ReadULEB128 expects an iterator to single bytes");
  // Incoming bits will be added to `result`...
  T result = 0;
  // ... starting with the least significant bits.
  uint_fast8_t shift = 0;
  for (;;) {
    // 1. Read one byte from the iterator.
    // `static_cast` just in case IteratorValue is not implicitly convertible to
    // uint_fast8_t. It wouldn't matter if the sign was extended, we're only
    // dealing with the bottom 8 bits below.
    const uint_fast8_t byte = static_cast<uint_fast8_t>(*aIterator);
    // 2. Always advance the iterator.
    ++aIterator;
    // 3. Extract the 7 bits of value, and shift them in place into `result`.
    result |= static_cast<T>(byte & 0x7fu) << shift;
    // 4. If the 8th bit is *not* set, this was the last byte.
    // Expecting small values, so it should be more likely that the bit is off.
    if (MOZ_LIKELY((byte & 0x80u) == 0)) {
      return result;
    }
    // There are more bytes to read.
    // 5. Next byte will contain more significant bits above the past 7.
    shift += 7;
    // Safety check that we're not going to shift by >= than the type size,
    // which is Undefined Behavior in C++.
    MOZ_ASSERT(shift < CHAR_BIT * sizeof(T));
  }
}

// constexpr ULEB128 reader class.
// Mostly useful when dealing with non-trivial byte feeds.
template <typename T>
class ULEB128Reader {
  static_assert(!std::numeric_limits<T>::is_signed,
                "ULEB128Reader must handle an unsigned type");

 public:
  constexpr ULEB128Reader() = default;

  // Don't allow copy/assignment, it doesn't make sense for a stateful parser.
  constexpr ULEB128Reader(const ULEB128Reader&) = delete;
  constexpr ULEB128Reader& operator=(const ULEB128Reader&) = delete;

  // Feed a byte into the parser.
  // Returns true if this was the last byte.
  [[nodiscard]] constexpr bool FeedByteIsComplete(unsigned aByte) {
    MOZ_ASSERT(!IsComplete());
    // Extract the 7 bits of value, and shift them in place into the value.
    mValue |= static_cast<T>(aByte & 0x7fu) << mShift;
    // If the 8th bit is *not* set, this was the last byte.
    // Expecting small values, so it should be more likely that the bit is off.
    if (MOZ_LIKELY((aByte & 0x80u) == 0)) {
      mShift = mCompleteShift;
      return true;
    }
    // There are more bytes to read.
    // Next byte will contain more significant bits above the past 7.
    mShift += 7;
    // Safety check that we're not going to shift by >= than the type size,
    // which is Undefined Behavior in C++.
    MOZ_ASSERT(mShift < CHAR_BIT * sizeof(T));
    return false;
  }

  constexpr void Reset() {
    mValue = 0;
    mShift = 0;
  }

  [[nodiscard]] constexpr bool IsComplete() const {
    return mShift == mCompleteShift;
  }

  [[nodiscard]] constexpr T Value() const {
    MOZ_ASSERT(IsComplete());
    return mValue;
  }

 private:
  // Special value of `mShift` indicating that parsing is complete.
  constexpr static unsigned mCompleteShift = 0x10000u;

  T mValue = 0;
  unsigned mShift = 0;
};

}  // namespace mozilla

#endif  // leb128iterator_h