Bug 1581052 - [BinAST] Make getHuffmanLookup faster by inverting bits after reading data r=Yoric
authorAyrton Munoz <a.munoz3327@gmail.com>
Mon, 30 Sep 2019 18:23:08 +0000
changeset 495716 b33ee74d78c35dd93537846207596ac86b673797
parent 495715 853ea993c4f578e93deffa6156cff56fbc04b3e0
child 495717 28a5e5744f1f8e3d14237fa3738afd73fd88f630
push id114140
push userdvarga@mozilla.com
push dateWed, 02 Oct 2019 18:04:51 +0000
treeherdermozilla-inbound@32eb0ea893f3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersYoric
bugs1581052
milestone71.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1581052 - [BinAST] Make getHuffmanLookup faster by inverting bits after reading data r=Yoric Reading new data into `newBits` then doing the per-byte bit inversion in place and combining with `this->bits` reduces time spent in `getHuffmanLookup()` by about 15%. Differential Revision: https://phabricator.services.mozilla.com/D46559
js/src/frontend/BinASTTokenReaderContext.cpp
--- a/js/src/frontend/BinASTTokenReaderContext.cpp
+++ b/js/src/frontend/BinASTTokenReaderContext.cpp
@@ -1251,49 +1251,58 @@ JS::Result<HuffmanLookup> BinASTTokenRea
   // `0b_XXXX_XXXX__XXXX_XXXX__ABCD_EFGH__IJKL_MNOP`, where `X` are bits that
   // are beyond `this->bitLength`
 
   if (this->bitLength <= MAX_PREFIX_BIT_LENGTH) {
     // Keys can be up to MAX_PREFIX_BIT_LENGTH bits long. If we have fewer bits
     // available, it's time to reload. We'll try and get as close to 64 bits as
     // possible.
 
+    uint64_t newBits = 0;
     while (this->bitLength <= BIT_BUFFER_SIZE - BIT_BUFFER_READ_UNIT) {
       // 1. Let's try and pull one byte.
       uint8_t byte;
       uint32_t readLen = 1;
       MOZ_TRY((owner.readBuf<Compression::No, EndOfFilePolicy::BestEffort>(
           &byte, readLen)));
       if (readLen < 1) {
         // Ok, nothing left to read.
         break;
       }
 
-      // 2. We have just read to `byte`, here `0b_XWVU_TSRQ`. Let's reverse
-      // `byte` into `0b_QRST_UVWX`.
-      const uint8_t reversedByte =
-          (byte & 0b10000000) >> 7 | (byte & 0b01000000) >> 5 |
-          (byte & 0b00100000) >> 3 | (byte & 0b00010000) >> 1 |
-          (byte & 0b00001000) << 1 | (byte & 0b00000100) << 3 |
-          (byte & 0b00000010) << 5 | (byte & 0b00000001) << 7;
-
-      // 3. Make space for these bits at the end of the stream
+      // 2. Make space for these bits at the end of `bits`
       // so shift `bits` into
-      // `0b_XXXX_XXXX__XXXD_EFGH__IJKL_MNOP__0000_0000`.
+      // `0b_XXXX_XXXX__ABCD_EFGH__IJKL_MNOP__0000_0000`.
       this->bits <<= BIT_BUFFER_READ_UNIT;
 
-      // 4. Finally, combine into.
-      // `0b_XXXX_XXXX__XXXD_EFGH__IJKL_MNOP__QRST_UVWX`.
-      this->bits += reversedByte;
+      // 3. Read in another new byte from the stream
+      // so `newBits` becomes
+      // `0B_0000_0000__0000_0000__0000_0000__XWVU_TSRQ`
+      newBits <<= BIT_BUFFER_READ_UNIT;
+      newBits += byte;
+
       this->bitLength += BIT_BUFFER_READ_UNIT;
       MOZ_ASSERT_IF(this->bitLength != 64 /* >> 64 is UB for a uint64_t */,
                     this->bits >> this->bitLength == 0);
 
-      // 5. Continue as long as we don't have enough bits.
+      // 4. Continue as long as we don't have enough bits.
     }
+    // 5. Let's reverse each of the 8 bytes in `newBits` in place
+    // First swap alternating bits
+    newBits = ((newBits >> 1) & 0x5555555555555555) |
+              ((newBits & 0x5555555555555555) << 1);
+    // Then swap alternating groups of 2 bits
+    newBits = ((newBits >> 2) & 0x3333333333333333) |
+              ((newBits & 0x3333333333333333) << 2);
+    // Finally swap alternating nibbles
+    newBits = ((newBits >> 4) & 0x0F0F0F0F0F0F0F0F) |
+              ((newBits & 0x0F0F0F0F0F0F0F0F) << 4);
+    // 6. Finally, combine `newBits` with `this->bits` to get
+    // `0b_XXXX_XXXX__ABCD_EFGH__IJKL_MNOP__QRST_UVWX`
+    this->bits += newBits;
   }
 
   // Now, we may prepare a `HuffmanLookup` with up to 32 bits.
   if (this->bitLength <= MAX_PREFIX_BIT_LENGTH) {
     return HuffmanLookup(this->bits, this->bitLength);
   }
   // Keep only 32 bits. We perform the operation on 64 bits to avoid any
   // arithmetics surprise.
@@ -2039,17 +2048,19 @@ JS::Result<Ok> SingleLookupHuffmanTable<
   }
   this->largestBitLength = 0;
   return Ok();
 }
 
 template <typename T>
 JS::Result<Ok> SingleLookupHuffmanTable<T>::initStart(
     JSContext* cx, size_t numberOfSymbols, uint8_t largestBitLength) {
-  MOZ_ASSERT_IF(largestBitLength != 32, (uint32_t(1) << largestBitLength) - 1 <= mozilla::MaxValue<InternalIndex>::value);
+  MOZ_ASSERT_IF(largestBitLength != 32,
+                (uint32_t(1) << largestBitLength) - 1 <=
+                    mozilla::MaxValue<InternalIndex>::value);
   MOZ_ASSERT(values.empty());  // Make sure that we're initializing.
 
   this->largestBitLength = largestBitLength;
 
   if (!values.initCapacity(numberOfSymbols)) {
     return cx->alreadyReportedError();
   }
   const size_t saturatedLength = 1 << largestBitLength;