Bug 1242904 - Update Brotli to latest upstream revision ; now at 33aa40220b96cf95ad2b9ba61dc8d7fd2f964f2c. r=mcmanus
authorFrédéric Wang <fred.wang@free.fr>
Mon, 08 Feb 2016 12:01:21 +0100
changeset 283485 ad43c7344bdb41ecca4cd6a2c157c8a9267bec64
parent 283484 faf39373fc660fb0416ad1d4ab6786074cd982ac
child 283486 cdbec1a7065c95a9181684eb5e362c0886d3caac
push id29986
push usercbook@mozilla.com
push dateTue, 09 Feb 2016 11:03:17 +0000
treeherdermozilla-central@2dfb45d74f42 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmcmanus
bugs1242904
milestone47.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1242904 - Update Brotli to latest upstream revision ; now at 33aa40220b96cf95ad2b9ba61dc8d7fd2f964f2c. r=mcmanus
modules/brotli/README.mozilla
modules/brotli/dec/Makefile
modules/brotli/dec/bit_reader.c
modules/brotli/dec/bit_reader.h
modules/brotli/dec/context.h
modules/brotli/dec/decode.c
modules/brotli/dec/decode.h
modules/brotli/dec/dictionary.c
modules/brotli/dec/dictionary.h
modules/brotli/dec/huffman.c
modules/brotli/dec/huffman.h
modules/brotli/dec/port.h
modules/brotli/dec/prefix.h
modules/brotli/dec/state.c
modules/brotli/dec/state.h
modules/brotli/dec/streams.c
modules/brotli/dec/streams.h
modules/brotli/dec/transform.h
modules/brotli/dec/types.h
modules/brotli/moz.build
modules/brotli/update.sh
netwerk/streamconv/converters/nsHTTPCompressConv.cpp
--- a/modules/brotli/README.mozilla
+++ b/modules/brotli/README.mozilla
@@ -9,64 +9,9 @@ Upstream code can be viewed at
 
 and cloned by
   git clone https://github.com/google/brotli
 
 The in-tree copy is updated by running
   sh update.sh
 from within the modules/brotli directory.
 
-Current version: [commit 933bb9bd800c8f5f7f6a02382d33c902a98ef73a].
-
-this trivial patch is added to preserve no-warnings behavior in code
-that includes the brotli interface. future imports are expected to
-have an equivalent change made upstream already.
-
-diff --git a/modules/brotli/dec/bit_reader.h b/modules/brotli/dec/bit_reader.h
---- a/modules/brotli/dec/bit_reader.h
-+++ b/modules/brotli/dec/bit_reader.h
-@@ -284,17 +284,17 @@ static BROTLI_INLINE int BrotliPeekByte(
-   int bytes_left = (int)(sizeof(br->val_) - (br->bit_pos_ >> 3));
-   if (br->bit_pos_ & 7) {
-     return -1;
-   }
-   if (offset < bytes_left) {
-     return (br->val_ >> (br->bit_pos_ + (unsigned)(offset << 3))) & 0xFF;
-   }
-   offset -= bytes_left;
--  if (offset < br->avail_in) {
-+  if (offset < (long)br->avail_in) {
-     return br->next_in[offset];
-   }
-   return -1;
- }
- 
- /* Copies remaining input bytes stored in the bit reader to the output. Value
-    num may not be larger than BrotliGetRemainingBytes. The bit reader must be
-    warmed up again after this. */
-
-
-This patch fixes a use-before declare error on big endian platforms
-in bit_reader.h.  Upstream has already fixed this error by re-arranging 
-some functions in the file.
-
-diff --git a/modules/brotli/dec/bit_reader.h b/modules/brotli/dec/bit_reader.h
---- a/modules/brotli/dec/bit_reader.h
-+++ b/modules/brotli/dec/bit_reader.h
-@@ -58,17 +58,17 @@ typedef struct {
- /* Initializes the bitreader fields. */
- void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input);
- 
- /* Ensures that accumulator is not empty. May consume one byte of input.
-    Returns 0 if data is required but there is no input available.
-    For BROTLI_BUILD_PORTABLE this function also prepares bit reader for aligned
-    reading. */
- int BrotliWarmupBitReader(BrotliBitReader* const br);
--
-+static BROTLI_INLINE void BrotliPullByte(BrotliBitReader* const br);
- /* Pulls data from the input to the the read buffer.
- 
-    Returns 0 if one of:
-     - the input callback returned an error, or
-     - there is no more input and the position is past the end of the stream.
-     - finish is false and less than BROTLI_READ_SIZE are available - a next call
-       when more data is available makes it continue including the partially read
-       data
+Current version: [commit 33aa40220b96cf95ad2b9ba61dc8d7fd2f964f2c].
--- a/modules/brotli/dec/Makefile
+++ b/modules/brotli/dec/Makefile
@@ -1,12 +1,12 @@
 #brotli/dec
 
 include ../shared.mk
 
 CFLAGS += -Wall
 
-OBJS = bit_reader.o decode.o dictionary.o huffman.o state.o streams.o
+OBJS = bit_reader.o decode.o dictionary.o huffman.o state.o
 
 all : $(OBJS)
 
 clean :
 	rm -f $(OBJS)
--- a/modules/brotli/dec/bit_reader.c
+++ b/modules/brotli/dec/bit_reader.c
@@ -1,58 +1,48 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Bit reading helpers */
 
-#include <stdlib.h>
+#include "./bit_reader.h"
 
-#include "./bit_reader.h"
 #include "./port.h"
+#include "./types.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
-void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) {
-  BROTLI_DCHECK(br != NULL);
-
-  br->input_ = input;
+void BrotliInitBitReader(BrotliBitReader* const br) {
   br->val_ = 0;
   br->bit_pos_ = sizeof(br->val_) << 3;
-  br->avail_in = 0;
-  br->eos_ = 0;
-  br->next_in = br->buf_;
 }
 
 int BrotliWarmupBitReader(BrotliBitReader* const br) {
   size_t aligned_read_mask = (sizeof(br->val_) >> 1) - 1;
   /* Fixing alignment after unaligned BrotliFillWindow would result accumulator
      overflow. If unalignment is caused by BrotliSafeReadBits, then there is
      enough space in accumulator to fix aligment. */
   if (!BROTLI_ALIGNED_READ) {
     aligned_read_mask = 0;
   }
-  while (br->bit_pos_ == (sizeof(br->val_) << 3) ||
-      (((size_t)br->next_in) & aligned_read_mask) != 0) {
-    if (!br->avail_in) {
+  if (BrotliGetAvailableBits(br) == 0) {
+    if (!BrotliPullByte(br)) {
       return 0;
     }
-    BrotliPullByte(br);
+  }
+
+  while ((((size_t)br->next_in) & aligned_read_mask) != 0) {
+    if (!BrotliPullByte(br)) {
+      /* If we consumed all the input, we don't care about the alignment. */
+      return 1;
+    }
   }
   return 1;
 }
 
 #if defined(__cplusplus) || defined(c_plusplus)
 }    /* extern "C" */
 #endif
--- a/modules/brotli/dec/bit_reader.h
+++ b/modules/brotli/dec/bit_reader.h
@@ -1,331 +1,387 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Bit reading helpers */
 
 #ifndef BROTLI_DEC_BIT_READER_H_
 #define BROTLI_DEC_BIT_READER_H_
 
-#include <string.h>
+#include <string.h>  /* memcpy */
+
 #include "./port.h"
-#include "./streams.h"
 #include "./types.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
-#define BROTLI_READ_SIZE          1024
-/* 128 bytes, plus 8 bytes slack for valid 128-byte BrotliCheckInputAmount with
-   some bytes read in val_ of bit reader. */
-#define BROTLI_IMPLICIT_ZEROES    136
-#define BROTLI_IBUF_SIZE          (BROTLI_READ_SIZE + BROTLI_IMPLICIT_ZEROES)
-#define BROTLI_IBUF_MASK          (BROTLI_READ_SIZE - 1)
+#if (BROTLI_64_BITS)
+#define BROTLI_SHORT_FILL_BIT_WINDOW_READ 4
+typedef uint64_t reg_t;
+#else
+#define BROTLI_SHORT_FILL_BIT_WINDOW_READ 2
+typedef uint32_t reg_t;
+#endif
 
-/* Masking with this expression turns to a single "Unsigned Bit Field Extract"
-   UBFX instruction on ARM. */
-static BROTLI_INLINE uint32_t BitMask(int n) { return ~((0xffffffff) << n); }
+static const uint32_t kBitMask[33] = { 0x0000,
+    0x00000001, 0x00000003, 0x00000007, 0x0000000F,
+    0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
+    0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
+    0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
+    0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
+    0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
+    0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
+    0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
+};
+
+static BROTLI_INLINE uint32_t BitMask(uint32_t n) {
+  if (IS_CONSTANT(n) || BROTLI_HAS_UBFX) {
+    /* Masking with this expression turns to a single
+       "Unsigned Bit Field Extract" UBFX instruction on ARM. */
+    return ~((0xffffffffU) << n);
+  } else {
+    return kBitMask[n];
+  }
+}
 
 typedef struct {
-#if (BROTLI_64_BITS_LITTLE_ENDIAN)
-  uint64_t    val_;          /* pre-fetched bits */
-#else
-  uint32_t    val_;          /* pre-fetched bits */
-#endif
+  reg_t       val_;          /* pre-fetched bits */
   uint32_t    bit_pos_;      /* current bit-reading position in val_ */
-  uint8_t*    next_in;       /* the byte we're reading from */
-  uint32_t    avail_in;
-  int         eos_;          /* input stream is finished */
-  BrotliInput input_;        /* input callback */
-
-  /* Input byte buffer, consist of a ringbuffer and a "slack" region where */
-  /* bytes from the start of the ringbuffer are copied. */
-  uint8_t buf_[BROTLI_IBUF_SIZE];
+  const uint8_t* next_in;    /* the byte we're reading from */
+  size_t      avail_in;
 } BrotliBitReader;
 
+typedef struct {
+  reg_t    val_;
+  uint32_t bit_pos_;
+  const uint8_t* next_in;
+  size_t   avail_in;
+} BrotliBitReaderState;
+
 /* Initializes the bitreader fields. */
-void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input);
+void BrotliInitBitReader(BrotliBitReader* const br);
 
 /* Ensures that accumulator is not empty. May consume one byte of input.
    Returns 0 if data is required but there is no input available.
    For BROTLI_BUILD_PORTABLE this function also prepares bit reader for aligned
    reading. */
 int BrotliWarmupBitReader(BrotliBitReader* const br);
-static BROTLI_INLINE void BrotliPullByte(BrotliBitReader* const br);
-/* Pulls data from the input to the the read buffer.
 
-   Returns 0 if one of:
-    - the input callback returned an error, or
-    - there is no more input and the position is past the end of the stream.
-    - finish is false and less than BROTLI_READ_SIZE are available - a next call
-      when more data is available makes it continue including the partially read
-      data
+static BROTLI_INLINE void BrotliBitReaderSaveState(
+    BrotliBitReader* const from, BrotliBitReaderState* to) {
+  to->val_ = from->val_;
+  to->bit_pos_ = from->bit_pos_;
+  to->next_in = from->next_in;
+  to->avail_in = from->avail_in;
+}
 
-   If finish is true and the end of the stream is reached,
-   BROTLI_IMPLICIT_ZEROES additional zero bytes are copied to the ringbuffer.
-*/
-static BROTLI_INLINE int BrotliReadInput(
-    BrotliBitReader* const br, int finish) {
-  if (PREDICT_FALSE(br->eos_)) {
-    return 0;
-  } else {
-    size_t i;
-    int bytes_read;
-    if (br->next_in != br->buf_) {
-      for (i = 0; i < br->avail_in; i++) {
-        br->buf_[i] = br->next_in[i];
-      }
-      br->next_in = br->buf_;
-    }
-    bytes_read = BrotliRead(br->input_, br->next_in + br->avail_in,
-        (size_t)(BROTLI_READ_SIZE - br->avail_in));
-    if (bytes_read < 0) {
-      return 0;
-    }
-    br->avail_in += (uint32_t)bytes_read;
-    if (br->avail_in < BROTLI_READ_SIZE) {
-      if (!finish) {
-        return 0;
-      }
-      br->eos_ = 1;
-      /* Store BROTLI_IMPLICIT_ZEROES bytes of zero after the stream end. */
-      memset(br->next_in + br->avail_in, 0, BROTLI_IMPLICIT_ZEROES);
-      br->avail_in += BROTLI_IMPLICIT_ZEROES;
-    }
-    return 1;
-  }
+static BROTLI_INLINE void BrotliBitReaderRestoreState(
+    BrotliBitReader* const to, BrotliBitReaderState* from) {
+  to->val_ = from->val_;
+  to->bit_pos_ = from->bit_pos_;
+  to->next_in = from->next_in;
+  to->avail_in = from->avail_in;
+}
+
+static BROTLI_INLINE uint32_t BrotliGetAvailableBits(
+    const BrotliBitReader* br) {
+  return (BROTLI_64_BITS ? 64 : 32) - br->bit_pos_;
 }
 
 /* Returns amount of unread bytes the bit reader still has buffered from the
    BrotliInput, including whole bytes in br->val_. */
 static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
-  size_t result = br->avail_in + sizeof(br->val_) - (br->bit_pos_ >> 3);
-  if (!br->eos_) {
-    return result;
-  }
-  if (result <= BROTLI_IMPLICIT_ZEROES) {
-    return 0;
-  }
-  return result - BROTLI_IMPLICIT_ZEROES;
+  return br->avail_in + (BrotliGetAvailableBits(br) >> 3);
 }
 
 /* Checks if there is at least num bytes left in the input ringbuffer (excluding
-   the bits remaining in br->val_). The maximum value for num is
-   BROTLI_IMPLICIT_ZEROES bytes. */
+   the bits remaining in br->val_). */
 static BROTLI_INLINE int BrotliCheckInputAmount(
     BrotliBitReader* const br, size_t num) {
   return br->avail_in >= num;
 }
 
+static BROTLI_INLINE uint16_t BrotliLoad16LE(const uint8_t* in) {
+  if (BROTLI_LITTLE_ENDIAN) {
+    return *((const uint16_t*)in);
+  } else if (BROTLI_BIG_ENDIAN) {
+    uint16_t value = *((const uint16_t*)in);
+    return (uint16_t)(
+        ((value & 0xFFU) << 8) |
+        ((value & 0xFF00U) >> 8));
+  } else {
+    return (uint16_t)(in[0] | (in[1] << 8));
+  }
+}
+
+static BROTLI_INLINE uint32_t BrotliLoad32LE(const uint8_t* in) {
+  if (BROTLI_LITTLE_ENDIAN) {
+    return *((const uint32_t*)in);
+  } else if (BROTLI_BIG_ENDIAN) {
+    uint32_t value = *((const uint32_t*)in);
+    return ((value & 0xFFU) << 24) | ((value & 0xFF00U) << 8) |
+        ((value & 0xFF0000U) >> 8) | ((value & 0xFF000000U) >> 24);
+  } else {
+    uint32_t value = (uint32_t)(*(in++));
+    value |= (uint32_t)(*(in++)) << 8;
+    value |= (uint32_t)(*(in++)) << 16;
+    value |= (uint32_t)(*(in++)) << 24;
+    return value;
+  }
+}
+
+#if (BROTLI_64_BITS)
+static BROTLI_INLINE uint64_t BrotliLoad64LE(const uint8_t* in) {
+  if (BROTLI_LITTLE_ENDIAN) {
+    return *((const uint64_t*)in);
+  } else if (BROTLI_BIG_ENDIAN) {
+    uint64_t value = *((const uint64_t*)in);
+    return
+        ((value & 0xFFU) << 56) |
+        ((value & 0xFF00U) << 40) |
+        ((value & 0xFF0000U) << 24) |
+        ((value & 0xFF000000U) << 8) |
+        ((value & 0xFF00000000U) >> 8) |
+        ((value & 0xFF0000000000U) >> 24) |
+        ((value & 0xFF000000000000U) >> 40) |
+        ((value & 0xFF00000000000000U) >> 56);
+  } else {
+    uint64_t value = (uint64_t)(*(in++));
+    value |= (uint64_t)(*(in++)) << 8;
+    value |= (uint64_t)(*(in++)) << 16;
+    value |= (uint64_t)(*(in++)) << 24;
+    value |= (uint64_t)(*(in++)) << 32;
+    value |= (uint64_t)(*(in++)) << 40;
+    value |= (uint64_t)(*(in++)) << 48;
+    value |= (uint64_t)(*(in++)) << 56;
+    return value;
+  }
+}
+#endif
+
 /* Guarantees that there are at least n_bits + 1 bits in accumulator.
    Precondition: accumulator contains at least 1 bit.
    n_bits should be in the range [1..24] for regular build. For portable
    non-64-bit little endian build only 16 bits are safe to request. */
 static BROTLI_INLINE void BrotliFillBitWindow(
-    BrotliBitReader* const br, int n_bits) {
-#if (BROTLI_64_BITS_LITTLE_ENDIAN)
+    BrotliBitReader* const br, uint32_t n_bits) {
+#if (BROTLI_64_BITS)
   if (!BROTLI_ALIGNED_READ && IS_CONSTANT(n_bits) && (n_bits <= 8)) {
     if (br->bit_pos_ >= 56) {
       br->val_ >>= 56;
       br->bit_pos_ ^= 56;  /* here same as -= 56 because of the if condition */
-      br->val_ |= (*(const uint64_t*)(br->next_in)) << 8;
+      br->val_ |= BrotliLoad64LE(br->next_in) << 8;
       br->avail_in -= 7;
       br->next_in += 7;
     }
   } else if (!BROTLI_ALIGNED_READ && IS_CONSTANT(n_bits) && (n_bits <= 16)) {
     if (br->bit_pos_ >= 48) {
       br->val_ >>= 48;
       br->bit_pos_ ^= 48;  /* here same as -= 48 because of the if condition */
-      br->val_ |= (*(const uint64_t*)(br->next_in)) << 16;
+      br->val_ |= BrotliLoad64LE(br->next_in) << 16;
       br->avail_in -= 6;
       br->next_in += 6;
     }
   } else {
     if (br->bit_pos_ >= 32) {
       br->val_ >>= 32;
       br->bit_pos_ ^= 32;  /* here same as -= 32 because of the if condition */
-      br->val_ |= ((uint64_t)(*(const uint32_t*)(br->next_in))) << 32;
-      br->avail_in -= 4;
-      br->next_in += 4;
+      br->val_ |= ((uint64_t)BrotliLoad32LE(br->next_in)) << 32;
+      br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+      br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
     }
   }
-#elif (BROTLI_LITTLE_ENDIAN)
+#else
   if (!BROTLI_ALIGNED_READ && IS_CONSTANT(n_bits) && (n_bits <= 8)) {
     if (br->bit_pos_ >= 24) {
       br->val_ >>= 24;
       br->bit_pos_ ^= 24;  /* here same as -= 24 because of the if condition */
-      br->val_ |= (*(const uint32_t*)(br->next_in)) << 8;
+      br->val_ |= BrotliLoad32LE(br->next_in) << 8;
       br->avail_in -= 3;
       br->next_in += 3;
     }
   } else {
     if (br->bit_pos_ >= 16) {
       br->val_ >>= 16;
       br->bit_pos_ ^= 16;  /* here same as -= 16 because of the if condition */
-      br->val_ |= ((uint32_t)(*(const uint16_t*)(br->next_in))) << 16;
-      br->avail_in -= 2;
-      br->next_in += 2;
+      br->val_ |= ((uint32_t)BrotliLoad16LE(br->next_in)) << 16;
+      br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
+      br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
     }
   }
-#else
-  while (br->bit_pos_ >= 16) {
-    BrotliPullByte(br);
-  }
 #endif
 }
 
+/* Mosltly like BrotliFillBitWindow, but guarantees only 16 bits and reads no
+   more than BROTLI_SHORT_FILL_BIT_WINDOW_READ bytes of input. */
+static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) {
+  BrotliFillBitWindow(br, 17);
+}
+
 /* Pulls one byte of input to accumulator. */
-static BROTLI_INLINE void BrotliPullByte(BrotliBitReader* const br) {
+static BROTLI_INLINE int BrotliPullByte(BrotliBitReader* const br) {
+  if (br->avail_in == 0) {
+    return 0;
+  }
   br->val_ >>= 8;
-#if (BROTLI_64_BITS_LITTLE_ENDIAN)
+#if (BROTLI_64_BITS)
     br->val_ |= ((uint64_t)*br->next_in) << 56;
 #else
     br->val_ |= ((uint32_t)*br->next_in) << 24;
 #endif
   br->bit_pos_ -= 8;
   --br->avail_in;
   ++br->next_in;
+  return 1;
 }
 
-/* Like BrotliGetBits, but does not mask the result, it is only guaranteed
-that it has minimum n_bits. */
-static BROTLI_INLINE uint32_t BrotliGetBitsUnmasked(
-    BrotliBitReader* const br, int n_bits) {
-  BrotliFillBitWindow(br, n_bits);
-  return (uint32_t)(br->val_ >> br->bit_pos_);
+/* Returns currently available bits.
+   The number of valid bits could be calclulated by BrotliGetAvailableBits. */
+static BROTLI_INLINE reg_t BrotliGetBitsUnmasked(BrotliBitReader* const br) {
+  return br->val_ >> br->bit_pos_;
+}
+
+/* Like BrotliGetBits, but does not mask the result.
+   The result contains at least 16 valid bits. */
+static BROTLI_INLINE uint32_t BrotliGet16BitsUnmasked(
+    BrotliBitReader* const br) {
+  BrotliFillBitWindow(br, 16);
+  return (uint32_t)BrotliGetBitsUnmasked(br);
 }
 
 /* Returns the specified number of bits from br without advancing bit pos. */
 static BROTLI_INLINE uint32_t BrotliGetBits(
-    BrotliBitReader* const br, int n_bits) {
+    BrotliBitReader* const br, uint32_t n_bits) {
   BrotliFillBitWindow(br, n_bits);
-  return (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
+  return (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+}
+
+/* Tries to peek the specified amount of bits. Returns 0, if there is not
+   enough input. */
+static BROTLI_INLINE int BrotliSafeGetBits(
+  BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  while (BrotliGetAvailableBits(br) < n_bits) {
+    if (!BrotliPullByte(br)) {
+      return 0;
+    }
+  }
+  *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
+  return 1;
 }
 
 /* Advances the bit pos by n_bits. */
 static BROTLI_INLINE void BrotliDropBits(
-    BrotliBitReader* const br, int n_bits) {
-  br->bit_pos_ += (uint32_t)n_bits;
+    BrotliBitReader* const br, uint32_t n_bits) {
+  br->bit_pos_ += n_bits;
+}
+
+static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
+  uint32_t unused_bytes = BrotliGetAvailableBits(br) >> 3;
+  uint32_t unused_bits = unused_bytes << 3;
+  br->avail_in += unused_bytes;
+  br->next_in -= unused_bytes;
+  if (unused_bits == sizeof(br->val_) << 3) {
+    br->val_ = 0;
+  } else {
+    br->val_ <<= unused_bits;
+  }
+  br->bit_pos_ += unused_bits;
 }
 
 /* Reads the specified number of bits from br and advances the bit pos.
    Precondition: accumulator MUST contain at least n_bits. */
 static BROTLI_INLINE void BrotliTakeBits(
-  BrotliBitReader* const br, int n_bits, uint32_t* val) {
-  *val = (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
+  BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  *val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
 #ifdef BROTLI_DECODE_DEBUG
   printf("[BrotliReadBits]  %d %d %d val: %6x\n",
          (int)br->avail_in, (int)br->bit_pos_, n_bits, (int)*val);
 #endif
-  br->bit_pos_ += (uint32_t)n_bits;
+  BrotliDropBits(br, n_bits);
 }
 
 /* Reads the specified number of bits from br and advances the bit pos.
    Assumes that there is enough input to perform BrotliFillBitWindow. */
 static BROTLI_INLINE uint32_t BrotliReadBits(
-    BrotliBitReader* const br, int n_bits) {
-  if (BROTLI_64_BITS_LITTLE_ENDIAN || (n_bits <= 16)) {
+    BrotliBitReader* const br, uint32_t n_bits) {
+  if (BROTLI_64_BITS || (n_bits <= 16)) {
     uint32_t val;
     BrotliFillBitWindow(br, n_bits);
     BrotliTakeBits(br, n_bits, &val);
     return val;
   } else {
     uint32_t low_val;
     uint32_t high_val;
     BrotliFillBitWindow(br, 16);
     BrotliTakeBits(br, 16, &low_val);
     BrotliFillBitWindow(br, 8);
     BrotliTakeBits(br, n_bits - 16, &high_val);
     return low_val | (high_val << 16);
   }
 }
 
 /* Tries to read the specified amount of bits. Returns 0, if there is not
-   enough input. */
+   enough input. n_bits MUST be positive. */
 static BROTLI_INLINE int BrotliSafeReadBits(
-  BrotliBitReader* const br, int n_bits, uint32_t* val) {
-  while (br->bit_pos_ + (uint32_t)n_bits > (sizeof(br->val_) << 3)) {
-    if (br->avail_in == 0) {
+  BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  while (BrotliGetAvailableBits(br) < n_bits) {
+    if (!BrotliPullByte(br)) {
       return 0;
     }
-    BrotliPullByte(br);
   }
   BrotliTakeBits(br, n_bits, val);
   return 1;
 }
 
 /* Advances the bit reader position to the next byte boundary and verifies
    that any skipped bits are set to zero. */
 static BROTLI_INLINE int BrotliJumpToByteBoundary(BrotliBitReader* br) {
-  int pad_bits_count = (64 - (int)br->bit_pos_) & 0x7;
+  uint32_t pad_bits_count = BrotliGetAvailableBits(br) & 0x7;
   uint32_t pad_bits = 0;
   if (pad_bits_count != 0) {
     BrotliTakeBits(br, pad_bits_count, &pad_bits);
   }
   return pad_bits == 0;
 }
 
 /* Peeks a byte at specified offset.
    Precondition: bit reader is parked to a byte boundary.
    Returns -1 if operation is not feasible. */
-static BROTLI_INLINE int BrotliPeekByte(BrotliBitReader* br, int offset) {
-  int bytes_left = (int)(sizeof(br->val_) - (br->bit_pos_ >> 3));
-  if (br->bit_pos_ & 7) {
-    return -1;
-  }
+static BROTLI_INLINE int BrotliPeekByte(BrotliBitReader* br, size_t offset) {
+  uint32_t available_bits = BrotliGetAvailableBits(br);
+  size_t bytes_left = available_bits >> 3;
+  BROTLI_DCHECK((available_bits & 7) == 0);
   if (offset < bytes_left) {
-    return (br->val_ >> (br->bit_pos_ + (unsigned)(offset << 3))) & 0xFF;
+    return (BrotliGetBitsUnmasked(br) >> (unsigned)(offset << 3)) & 0xFF;
   }
   offset -= bytes_left;
-  if (offset < (long)br->avail_in) {
+  if (offset < br->avail_in) {
     return br->next_in[offset];
   }
   return -1;
 }
 
 /* Copies remaining input bytes stored in the bit reader to the output. Value
    num may not be larger than BrotliGetRemainingBytes. The bit reader must be
    warmed up again after this. */
 static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
                                           BrotliBitReader* br, size_t num) {
-  while (br->bit_pos_ + 8 <= (BROTLI_64_BITS_LITTLE_ENDIAN ? 64 : 32)
-      && num > 0) {
-    *dest = (uint8_t)(br->val_ >> br->bit_pos_);
-    br->bit_pos_ += 8;
+  while (BrotliGetAvailableBits(br) >= 8 && num > 0) {
+    *dest = (uint8_t)BrotliGetBitsUnmasked(br);
+    BrotliDropBits(br, 8);
     ++dest;
     --num;
   }
   memcpy(dest, br->next_in, num);
-  br->avail_in -= (uint32_t)num;
+  br->avail_in -= num;
   br->next_in += num;
 }
 
-/* Checks that bit reader hasn't read after the end of input.
-   Returns 0 if bit reader has used implicit zeroes after the end of input. */
-static BROTLI_INLINE int BrotliIsBitReaderOK(BrotliBitReader* br) {
-  size_t remaining_bytes =
-      br->avail_in + sizeof(br->val_) - (br->bit_pos_ >> 3);
-  return !br->eos_ || (remaining_bytes >= BROTLI_IMPLICIT_ZEROES);
-}
-
-#undef BROTLI_IMPLICIT_ZEROES
-#undef BROTLI_IBUF_SIZE
-#undef BROTLI_IBUF_MASK
-
 #if defined(__cplusplus) || defined(c_plusplus)
 }    /* extern "C" */
 #endif
 
 #endif  /* BROTLI_DEC_BIT_READER_H_ */
--- a/modules/brotli/dec/context.h
+++ b/modules/brotli/dec/context.h
@@ -1,21 +1,12 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Lookup table to map the previous two bytes to a context id.
 
    There are four different context modeling modes defined here:
      CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
      CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
      CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
--- a/modules/brotli/dec/decode.c
+++ b/modules/brotli/dec/decode.c
@@ -1,65 +1,72 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "./bit_reader.h"
-#include "./context.h"
 #include "./decode.h"
-#include "./dictionary.h"
-#include "./port.h"
-#include "./transform.h"
-#include "./huffman.h"
-#include "./prefix.h"
 
 #ifdef __ARM_NEON__
 #include <arm_neon.h>
 #endif
 
+#include <stdio.h>  /* printf (debug output) */
+#include <stdlib.h>  /* free, malloc */
+#include <string.h>  /* memcpy, memset */
+
+#include "./bit_reader.h"
+#include "./context.h"
+#include "./dictionary.h"
+#include "./huffman.h"
+#include "./port.h"
+#include "./prefix.h"
+#include "./transform.h"
+
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
+/* BROTLI_FAILURE macro unwraps to BROTLI_RESULT_ERROR in non-debug build. */
+/* In debug build it dumps file name, line and pretty function name. */
+#if defined(_MSC_VER) || !defined(BROTLI_DEBUG)
+#define BROTLI_FAILURE() BROTLI_RESULT_ERROR
+#else
+#define BROTLI_FAILURE() \
+    BrotliFailure(__FILE__, __LINE__, __PRETTY_FUNCTION__)
+static inline BrotliResult BrotliFailure(const char *f, int l, const char *fn) {
+  fprintf(stderr, "ERROR at %s:%d (%s)\n", f, l, fn);
+  fflush(stderr);
+  return BROTLI_RESULT_ERROR;
+}
+#endif
+
 #ifdef BROTLI_DECODE_DEBUG
 #define BROTLI_LOG_UINT(name)                                    \
   printf("[%s] %s = %lu\n", __func__, #name, (unsigned long)(name))
 #define BROTLI_LOG_ARRAY_INDEX(array_name, idx)                  \
   printf("[%s] %s[%lu] = %lu\n", __func__, #array_name, \
          (unsigned long)(idx), (unsigned long)array_name[idx])
 #define BROTLI_LOG(x) printf x
 #else
 #define BROTLI_LOG_UINT(name)
 #define BROTLI_LOG_ARRAY_INDEX(array_name, idx)
 #define BROTLI_LOG(x)
 #endif
 
-static const uint8_t kDefaultCodeLength = 8;
-static const uint8_t kCodeLengthRepeatCode = 16;
-static const int kNumLiteralCodes = 256;
-static const int kNumInsertAndCopyCodes = 704;
-static const int kNumBlockLengthCodes = 26;
+static const uint32_t kDefaultCodeLength = 8;
+static const uint32_t kCodeLengthRepeatCode = 16;
+static const uint32_t kNumLiteralCodes = 256;
+static const uint32_t kNumInsertAndCopyCodes = 704;
+static const uint32_t kNumBlockLengthCodes = 26;
 static const int kLiteralContextBits = 6;
 static const int kDistanceContextBits = 2;
 
-#define HUFFMAN_TABLE_BITS      8
+#define HUFFMAN_TABLE_BITS      8U
 #define HUFFMAN_TABLE_MASK      0xff
 
 #define CODE_LENGTH_CODES 18
 static const uint8_t kCodeLengthCodeOrder[CODE_LENGTH_CODES] = {
   1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
 };
 
 /* Static prefix code for the complex code length code lengths. */
@@ -68,16 +75,44 @@ static const uint8_t kCodeLengthPrefixLe
 };
 
 static const uint8_t kCodeLengthPrefixValue[16] = {
   0, 4, 3, 2, 0, 4, 3, 1, 0, 4, 3, 2, 0, 4, 3, 5,
 };
 
 #define NUM_DISTANCE_SHORT_CODES 16
 
+BrotliState* BrotliCreateState(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  BrotliState* state = 0;
+  if (!alloc_func && !free_func) {
+    state = (BrotliState*)malloc(sizeof(BrotliState));
+  } else if (alloc_func && free_func) {
+    state = (BrotliState*)alloc_func(opaque, sizeof(BrotliState));
+  }
+  if (state == 0) {
+    (void)BROTLI_FAILURE();
+    return 0;
+  }
+  BrotliStateInitWithCustomAllocators(state, alloc_func, free_func, opaque);
+  return state;
+}
+
+/* Deinitializes and frees BrotliState instance. */
+void BrotliDestroyState(BrotliState* state) {
+  if (!state) {
+    return;
+  } else {
+    brotli_free_func free_func = state->free_func;
+    void* opaque = state->memory_manager_opaque;
+    BrotliStateCleanup(state);
+    free_func(opaque, state);
+  }
+}
+
 /* Decodes a number in the range [9..24], by reading 1 - 7 bits.
    Precondition: bit-reader accumulator has at least 7 bits. */
 static uint32_t DecodeWindowBits(BrotliBitReader* br) {
   uint32_t n;
   BrotliTakeBits(br, 1, &n);
   if (n == 0) {
     return 16;
   }
@@ -87,40 +122,30 @@ static uint32_t DecodeWindowBits(BrotliB
   }
   BrotliTakeBits(br, 3, &n);
   if (n != 0) {
     return 8 + n;
   }
   return 17;
 }
 
-static BROTLI_INLINE BROTLI_NO_ASAN void memmove16(
+static BROTLI_INLINE void memmove16(
     uint8_t* dst, uint8_t* src) {
-#if BROTLI_SAFE_MEMMOVE
-  /* For x86 this compiles to the same binary as signle memcpy.
-     On ARM memcpy is not inlined, so it works slower.
-     This implementation makes decompression 1% slower than regular one,
-     and 2% slower than NEON implementation.
-   */
+#if defined(__ARM_NEON__)
+  vst1q_u8(dst, vld1q_u8(src));
+#else
   uint32_t buffer[4];
   memcpy(buffer, src, 16);
   memcpy(dst, buffer, 16);
-#elif defined(__ARM_NEON__)
-  vst1q_u8(dst, vld1q_u8(src));
-#else
-  /* memcpy is unsafe for overlapping regions and ASAN detects this.
-     But, because of optimizations, it works exactly as memmove:
-     copies data to registers first, and then stores them to dst. */
-  memcpy(dst, src, 16);
 #endif
 }
 
 /* Decodes a number in the range [0..255], by reading 1 - 11 bits. */
 static BROTLI_NOINLINE BrotliResult DecodeVarLenUint8(BrotliState* s,
-    BrotliBitReader* br, int* value) {
+    BrotliBitReader* br, uint32_t* value) {
   uint32_t bits;
   switch (s->substate_decode_uint8) {
     case BROTLI_STATE_DECODE_UINT8_NONE:
       if (PREDICT_FALSE(!BrotliSafeReadBits(br, 1, &bits))) {
         return BROTLI_RESULT_NEEDS_MORE_INPUT;
       }
       if (bits == 0) {
         *value = 0;
@@ -134,25 +159,25 @@ static BROTLI_NOINLINE BrotliResult Deco
         return BROTLI_RESULT_NEEDS_MORE_INPUT;
       }
       if (bits == 0) {
         *value = 1;
         s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
         return BROTLI_RESULT_SUCCESS;
       }
       /* Use output value as a temporary storage. It MUST be persisted. */
-      *value = (int)bits;
+      *value = bits;
       /* No break, transit to the next state. */
 
     case BROTLI_STATE_DECODE_UINT8_LONG:
       if (PREDICT_FALSE(!BrotliSafeReadBits(br, *value, &bits))) {
         s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_LONG;
         return BROTLI_RESULT_NEEDS_MORE_INPUT;
       }
-      *value = (1 << *value) + (int)bits;
+      *value = (1U << *value) + bits;
       s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
       return BROTLI_RESULT_SUCCESS;
 
     default:
       return BROTLI_FAILURE();
   }
 }
 
@@ -215,17 +240,17 @@ static BrotliResult BROTLI_NOINLINE Deco
           }
           s->meta_block_remaining_len |= (int)(bits << (i * 4));
         }
         s->substate_metablock_header =
             BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED;
         /* No break, transit to the next state. */
 
       case BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED:
-        if (!s->is_last_metablock && !s->is_metadata) {
+        if (!s->is_last_metablock) {
           if (!BrotliSafeReadBits(br, 1, &bits)) {
             return BROTLI_RESULT_NEEDS_MORE_INPUT;
           }
           s->is_uncompressed = (uint8_t)bits;
         }
         ++s->meta_block_remaining_len;
         s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
         return BROTLI_RESULT_SUCCESS;
@@ -259,296 +284,562 @@ static BrotliResult BROTLI_NOINLINE Deco
             s->loop_counter = i;
             return BROTLI_RESULT_NEEDS_MORE_INPUT;
           }
           if (i + 1 == s->size_nibbles && s->size_nibbles > 1 && bits == 0) {
             return BROTLI_FAILURE();
           }
           s->meta_block_remaining_len |= (int)(bits << (i * 8));
         }
-        s->substate_metablock_header =
-            BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED;
-        break;
+        ++s->meta_block_remaining_len;
+        s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
+        return BROTLI_RESULT_SUCCESS;
 
       default:
         return BROTLI_FAILURE();
     }
   }
 }
 
-/* Decodes the next Huffman code from bit-stream. Reads 0 - 15 bits. */
-static BROTLI_INLINE int ReadSymbol(const HuffmanCode* table,
-                                    BrotliBitReader* br) {
-  /* Read the bits for two reads at once. */
-  uint32_t val = BrotliGetBitsUnmasked(br, 15);
-  table += val & HUFFMAN_TABLE_MASK;
+/* Decodes the Huffman code.
+   This method doesn't read data from the bit reader, BUT drops the amount of
+   bits that correspond to the decoded symbol.
+   bits MUST contain at least 15 (BROTLI_HUFFMAN_MAX_CODE_LENGTH) valid bits. */
+static BROTLI_INLINE uint32_t DecodeSymbol(uint32_t bits,
+                                           const HuffmanCode* table,
+                                           BrotliBitReader* br) {
+  table += bits & HUFFMAN_TABLE_MASK;
   if (table->bits > HUFFMAN_TABLE_BITS) {
-    int nbits = table->bits - HUFFMAN_TABLE_BITS;
+    uint32_t nbits = table->bits - HUFFMAN_TABLE_BITS;
     BrotliDropBits(br, HUFFMAN_TABLE_BITS);
     table += table->value;
-    table += (int)(val >> HUFFMAN_TABLE_BITS) & (int)BitMask(nbits);
+    table += (bits >> HUFFMAN_TABLE_BITS) & BitMask(nbits);
   }
   BrotliDropBits(br, table->bits);
   return table->value;
 }
 
-/* Makes a look-up in first level Huffman table. Peeks 8 bits. */
-static BROTLI_INLINE void PreloadSymbol(const HuffmanCode* table,
+/* Reads and decodes the next Huffman code from bit-stream.
+   This method peeks 16 bits of input and drops 0 - 15 of them. */
+static BROTLI_INLINE uint32_t ReadSymbol(const HuffmanCode* table,
+                                         BrotliBitReader* br) {
+  return DecodeSymbol(BrotliGet16BitsUnmasked(br), table, br);
+}
+
+/* Same as DecodeSymbol, but it is known that there is less than 15 bits of
+   input are currently available. */
+static BROTLI_NOINLINE int SafeDecodeSymbol(const HuffmanCode* table,
+                                            BrotliBitReader* br,
+                                            uint32_t* result) {
+  uint32_t val;
+  uint32_t available_bits = BrotliGetAvailableBits(br);
+  if (available_bits == 0) {
+    if (table->bits == 0) {
+      *result = table->value;
+      return 1;
+    }
+    return 0; /* No valid bits at all. */
+  }
+  val = (uint32_t)BrotliGetBitsUnmasked(br);
+  table += val & HUFFMAN_TABLE_MASK;
+  if (table->bits <= HUFFMAN_TABLE_BITS) {
+    if (table->bits <= available_bits) {
+      BrotliDropBits(br, table->bits);
+      *result = table->value;
+      return 1;
+    } else {
+      return 0; /* Not enough bits for the first level. */
+    }
+  }
+  if (available_bits <= HUFFMAN_TABLE_BITS) {
+    return 0; /* Not enough bits to move to the second level. */
+  }
+
+  /* Speculatively drop HUFFMAN_TABLE_BITS. */
+  val = (val & BitMask(table->bits)) >> HUFFMAN_TABLE_BITS;
+  available_bits -= HUFFMAN_TABLE_BITS;
+  table += table->value + val;
+  if (available_bits < table->bits) {
+    return 0; /* Not enough bits for the second level. */
+  }
+
+  BrotliDropBits(br, HUFFMAN_TABLE_BITS + table->bits);
+  *result = table->value;
+  return 1;
+}
+
+static BROTLI_INLINE int SafeReadSymbol(const HuffmanCode* table,
                                         BrotliBitReader* br,
-                                        unsigned* bits,
-                                        unsigned* value) {
+                                        uint32_t* result) {
+  uint32_t val;
+  if (PREDICT_TRUE(BrotliSafeGetBits(br, 15, &val))) {
+    *result = DecodeSymbol(val, table, br);
+    return 1;
+  }
+  return SafeDecodeSymbol(table, br, result);
+}
+
+
+/* Makes a look-up in first level Huffman table. Peeks 8 bits. */
+static BROTLI_INLINE void PreloadSymbol(int safe,
+                                        const HuffmanCode* table,
+                                        BrotliBitReader* br,
+                                        uint32_t* bits,
+                                        uint32_t* value) {
+  if (safe) {
+    return;
+  }
   table += BrotliGetBits(br, HUFFMAN_TABLE_BITS);
   *bits = table->bits;
   *value = table->value;
 }
 
 /* Decodes the next Huffman code using data prepared by PreloadSymbol.
    Reads 0 - 15 bits. Also peeks 8 following bits. */
-static BROTLI_INLINE unsigned ReadPreloadedSymbol(const HuffmanCode* table,
+static BROTLI_INLINE uint32_t ReadPreloadedSymbol(const HuffmanCode* table,
                                                   BrotliBitReader* br,
-                                                  unsigned* bits,
-                                                  unsigned* value) {
-  unsigned result = *value;
+                                                  uint32_t* bits,
+                                                  uint32_t* value) {
+  uint32_t result = *value;
   if (PREDICT_FALSE(*bits > HUFFMAN_TABLE_BITS)) {
-    uint32_t val = BrotliGetBitsUnmasked(br, 15);
+    uint32_t val = BrotliGet16BitsUnmasked(br);
     const HuffmanCode* ext = table + (val & HUFFMAN_TABLE_MASK) + *value;
-    int mask = (int)BitMask((int)(*bits - HUFFMAN_TABLE_BITS));
+    uint32_t mask = BitMask((*bits - HUFFMAN_TABLE_BITS));
     BrotliDropBits(br, HUFFMAN_TABLE_BITS);
-    ext += (int)(val >> HUFFMAN_TABLE_BITS) & mask;
+    ext += (val >> HUFFMAN_TABLE_BITS) & mask;
     BrotliDropBits(br, ext->bits);
     result = ext->value;
   } else {
-    BrotliDropBits(br, (int)*bits);
+    BrotliDropBits(br, *bits);
   }
-  PreloadSymbol(table, br, bits, value);
+  PreloadSymbol(0, table, br, bits, value);
   return result;
 }
 
-static BROTLI_INLINE int Log2Floor(int x) {
-  int result = 0;
+static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) {
+  uint32_t result = 0;
   while (x) {
     x >>= 1;
     ++result;
   }
   return result;
 }
 
+/* Reads (s->symbol + 1) symbols.
+   Totally 1..4 symbols are read, 1..10 bits each.
+   The list of symbols MUST NOT contain duplicates.
+ */
+static BrotliResult ReadSimpleHuffmanSymbols(uint32_t alphabet_size,
+                                             BrotliState* s) {
+  /* max_bits == 1..10; symbol == 0..3; 1..40 bits will be read. */
+  BrotliBitReader* br = &s->br;
+  uint32_t max_bits = Log2Floor(alphabet_size - 1);
+  uint32_t i = s->sub_loop_counter;
+  uint32_t num_symbols = s->symbol;
+  while (i <= num_symbols) {
+    uint32_t v;
+    if (PREDICT_FALSE(!BrotliSafeReadBits(br, max_bits, &v))) {
+      s->sub_loop_counter = i;
+      s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ;
+      return BROTLI_RESULT_NEEDS_MORE_INPUT;
+    }
+    if (v >= alphabet_size) {
+      return BROTLI_FAILURE();
+    }
+    s->symbols_lists_array[i] = (uint16_t)v;
+    BROTLI_LOG_UINT(s->symbols_lists_array[i]);
+    ++i;
+  }
+
+  for (i = 0; i < num_symbols; ++i) {
+    uint32_t k = i + 1;
+    for (; k <= num_symbols; ++k) {
+      if (s->symbols_lists_array[i] == s->symbols_lists_array[k]) {
+        return BROTLI_FAILURE();
+      }
+    }
+  }
+
+  return BROTLI_RESULT_SUCCESS;
+}
+
+/* Process single decoded symbol code length:
+    A) reset the repeat variable
+    B) remember code length (if it is not 0)
+    C) extend corredponding index-chain
+    D) reduce the huffman space
+    E) update the histogram
+ */
+static BROTLI_INLINE void ProcessSingleCodeLength(uint32_t code_len,
+    uint32_t* symbol, uint32_t* repeat, uint32_t* space,
+    uint32_t* prev_code_len, uint16_t* symbol_lists,
+    uint16_t* code_length_histo, int* next_symbol) {
+  *repeat = 0;
+  if (code_len != 0) { /* code_len == 1..15 */
+    symbol_lists[next_symbol[code_len]] = (uint16_t)(*symbol);
+    next_symbol[code_len] = (int)(*symbol);
+    *prev_code_len = code_len;
+    *space -= 32768U >> code_len;
+    code_length_histo[code_len]++;
+    BROTLI_LOG(("[ReadHuffmanCode] code_length[%d] = %d\n",
+                *symbol, code_len));
+  }
+  (*symbol)++;
+}
+
+/* Process repeated symbol code length.
+    A) Check if it is the extension of previous repeat sequence; if the decoded
+       value is not kCodeLengthRepeatCode, then it is a new symbol-skip
+    B) Update repeat variable
+    C) Check if operation is feasible (fits alphapet)
+    D) For each symbol do the same operations as in ProcessSingleCodeLength
+
+   PRECONDITION: code_len == kCodeLengthRepeatCode or kCodeLengthRepeatCode + 1
+ */
+static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len,
+    uint32_t repeat_delta, uint32_t alphabet_size, uint32_t* symbol,
+    uint32_t* repeat, uint32_t* space, uint32_t* prev_code_len,
+    uint32_t* repeat_code_len, uint16_t* symbol_lists,
+    uint16_t* code_length_histo, int* next_symbol) {
+  uint32_t old_repeat;
+  uint32_t new_len = 0;
+  if (code_len == kCodeLengthRepeatCode) {
+    new_len = *prev_code_len;
+  }
+  if (*repeat_code_len != new_len) {
+    *repeat = 0;
+    *repeat_code_len = new_len;
+  }
+  old_repeat = *repeat;
+  if (*repeat > 0) {
+    *repeat -= 2;
+    *repeat <<= code_len - 14U;
+  }
+  *repeat += repeat_delta + 3U;
+  repeat_delta = *repeat - old_repeat;
+  if (*symbol + repeat_delta > alphabet_size) {
+    (void)BROTLI_FAILURE();
+    *symbol = alphabet_size;
+    *space = 0xFFFFF;
+    return;
+  }
+  BROTLI_LOG(("[ReadHuffmanCode] code_length[%d..%d] = %d\n",
+              *symbol, *symbol + repeat_delta - 1, *repeat_code_len));
+  if (*repeat_code_len != 0) {
+    unsigned last = *symbol + repeat_delta;
+    int next = next_symbol[*repeat_code_len];
+    do {
+      symbol_lists[next] = (uint16_t)*symbol;
+      next = (int)*symbol;
+    } while (++(*symbol) != last);
+    next_symbol[*repeat_code_len] = next;
+    *space -= repeat_delta << (15 - *repeat_code_len);
+    code_length_histo[*repeat_code_len] = (uint16_t)
+        (code_length_histo[*repeat_code_len] + repeat_delta);
+  } else {
+    *symbol += repeat_delta;
+  }
+}
+
+/* Reads and decodes symbol codelengths. */
+static BrotliResult ReadSymbolCodeLengths(
+    uint32_t alphabet_size, BrotliState* s) {
+  BrotliBitReader* br = &s->br;
+  uint32_t symbol = s->symbol;
+  uint32_t repeat = s->repeat;
+  uint32_t space = s->space;
+  uint32_t prev_code_len = s->prev_code_len;
+  uint32_t repeat_code_len = s->repeat_code_len;
+  uint16_t* symbol_lists = s->symbol_lists;
+  uint16_t* code_length_histo = s->code_length_histo;
+  int* next_symbol = s->next_symbol;
+  if (!BrotliWarmupBitReader(br)) {
+    return BROTLI_RESULT_NEEDS_MORE_INPUT;
+  }
+  while (symbol < alphabet_size && space > 0) {
+    const HuffmanCode* p = s->table;
+    uint32_t code_len;
+    if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) {
+      s->symbol = symbol;
+      s->repeat = repeat;
+      s->prev_code_len = prev_code_len;
+      s->repeat_code_len = repeat_code_len;
+      s->space = space;
+      return BROTLI_RESULT_NEEDS_MORE_INPUT;
+    }
+    BrotliFillBitWindow16(br);
+    p += BrotliGetBitsUnmasked(br) &
+        BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
+    BrotliDropBits(br, p->bits); /* Use 1..5 bits */
+    code_len = p->value; /* code_len == 0..17 */
+    if (code_len < kCodeLengthRepeatCode) {
+      ProcessSingleCodeLength(code_len, &symbol, &repeat, &space,
+          &prev_code_len, symbol_lists, code_length_histo, next_symbol);
+    } else { /* code_len == 16..17, extra_bits == 2..3 */
+      uint32_t repeat_delta =
+          (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(code_len - 14U);
+      BrotliDropBits(br, code_len - 14U);
+      ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
+          &symbol, &repeat, &space, &prev_code_len, &repeat_code_len,
+          symbol_lists, code_length_histo, next_symbol);
+    }
+  }
+  s->space = space;
+  return BROTLI_RESULT_SUCCESS;
+}
+
+static BrotliResult SafeReadSymbolCodeLengths(
+    uint32_t alphabet_size, BrotliState* s) {
+  BrotliBitReader* br = &s->br;
+  while (s->symbol < alphabet_size && s->space > 0) {
+    const HuffmanCode* p = s->table;
+    uint32_t code_len;
+    uint32_t bits = 0;
+    uint32_t available_bits = BrotliGetAvailableBits(br);
+    if (available_bits != 0) {
+      bits = (uint32_t)BrotliGetBitsUnmasked(br);
+    }
+    p += bits & BitMask(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
+    if (p->bits > available_bits) goto pullMoreInput;
+    code_len = p->value; /* code_len == 0..17 */
+    if (code_len < kCodeLengthRepeatCode) {
+      BrotliDropBits(br, p->bits);
+      ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space,
+          &s->prev_code_len, s->symbol_lists, s->code_length_histo,
+          s->next_symbol);
+    } else { /* code_len == 16..17, extra_bits == 2..3 */
+      uint32_t extra_bits = code_len - 14U;
+      uint32_t repeat_delta = (bits >> p->bits) & BitMask(extra_bits);
+      if (available_bits < p->bits + extra_bits) goto pullMoreInput;
+      BrotliDropBits(br, p->bits + extra_bits);
+      ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
+          &s->symbol, &s->repeat, &s->space, &s->prev_code_len,
+          &s->repeat_code_len, s->symbol_lists, s->code_length_histo,
+          s->next_symbol);
+    }
+    continue;
+
+pullMoreInput:
+    if (!BrotliPullByte(br)) {
+      return BROTLI_RESULT_NEEDS_MORE_INPUT;
+    }
+  }
+  return BROTLI_RESULT_SUCCESS;
+}
+
+/* Reads and decodes 15..18 codes using static prefix code.
+   Each code is 2..4 bits long. In total 30..72 bits are used. */
+static BrotliResult ReadCodeLengthCodeLengths(BrotliState* s) {
+  BrotliBitReader* br = &s->br;
+  uint32_t num_codes = s->repeat;
+  unsigned space = s->space;
+  uint32_t i = s->sub_loop_counter;
+  for (; i < CODE_LENGTH_CODES; ++i) {
+    const uint8_t code_len_idx = kCodeLengthCodeOrder[i];
+    uint32_t ix;
+    uint32_t v;
+    if (PREDICT_FALSE(!BrotliSafeGetBits(br, 4, &ix))) {
+      uint32_t available_bits = BrotliGetAvailableBits(br);
+      if (available_bits != 0) {
+        ix = BrotliGetBitsUnmasked(br) & 0xF;
+      } else {
+        ix = 0;
+      }
+      if (kCodeLengthPrefixLength[ix] > available_bits) {
+        s->sub_loop_counter = i;
+        s->repeat = num_codes;
+        s->space = space;
+        s->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
+        return BROTLI_RESULT_NEEDS_MORE_INPUT;
+      }
+    }
+    v = kCodeLengthPrefixValue[ix];
+    BrotliDropBits(br, kCodeLengthPrefixLength[ix]);
+    s->code_length_code_lengths[code_len_idx] = (uint8_t)v;
+    BROTLI_LOG_ARRAY_INDEX(s->code_length_code_lengths, code_len_idx);
+    if (v != 0) {
+      space = space - (32U >> v);
+      ++num_codes;
+      ++s->code_length_histo[v];
+      if (space - 1U >= 32U) {
+        /* space is 0 or wrapped around */
+        break;
+      }
+    }
+  }
+  if (!(num_codes == 1 || space == 0)) {
+    return BROTLI_FAILURE();
+  }
+  return BROTLI_RESULT_SUCCESS;
+}
+
 /* Decodes the Huffman tables.
    There are 2 scenarios:
     A) Huffman code contains only few symbols (1..4). Those symbols are read
        directly; their code lengths are defined by the number of symbols.
        For this scenario 4 - 45 bits will be read.
 
     B) 2-phase decoding:
     B.1) Small Huffman table is decoded; it is specified with code lengths
          encoded with predefined entropy code. 32 - 74 bits are used.
     B.2) Decoded table is used to decode code lengths of symbols in resulting
          Huffman table. In worst case 3520 bits are read.
 */
-static BrotliResult ReadHuffmanCode(int alphabet_size,
+static BrotliResult ReadHuffmanCode(uint32_t alphabet_size,
                                     HuffmanCode* table,
-                                    int* opt_table_size,
+                                    uint32_t* opt_table_size,
                                     BrotliState* s) {
   BrotliBitReader* br = &s->br;
-  int i;
   /* Unnecessary masking, but might be good for safety. */
   alphabet_size &= 0x3ff;
   /* State machine */
   switch (s->substate_huffman) {
     case BROTLI_STATE_HUFFMAN_NONE:
-      if (!BrotliCheckInputAmount(br, 32)) {
+      if (!BrotliSafeReadBits(br, 2, &s->sub_loop_counter)) {
         return BROTLI_RESULT_NEEDS_MORE_INPUT;
       }
-      i = (int)BrotliReadBits(br, 2);
+      BROTLI_LOG_UINT(s->sub_loop_counter);
       /* The value is used as follows:
          1 for simple code;
          0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
-      BROTLI_LOG_UINT((unsigned)i);
-      if (i == 1) {
-        /* Read symbols, codes & code lengths directly. */
-        int max_bits = Log2Floor(alphabet_size - 1);
-        uint32_t num_symbols = BrotliReadBits(br, 2);
-        for (i = 0; i < 4; ++i) {
-          s->symbols_lists_array[i] = 0;
-        }
-        i = 0;
-        /* max_bits == 0..10; symbol == 0..3; 0..40 bits will be read. */
-        do {
-          uint32_t v = BrotliReadBits(br, max_bits);
-          if (v >= alphabet_size) {
-            return BROTLI_FAILURE();
-          }
-          s->symbols_lists_array[i] = (uint16_t)v;
-          BROTLI_LOG_UINT(s->symbols_lists_array[i]);
-        } while (++i <= num_symbols);
-        for (i = 0; i < num_symbols; ++i) {
-          int k = i + 1;
-          for (; k <= num_symbols; ++k) {
-            if (s->symbols_lists_array[i] == s->symbols_lists_array[k]) {
-              return BROTLI_FAILURE();
-            }
-          }
-        }
-        if (num_symbols == 3) {
-          num_symbols += BrotliReadBits(br, 1);
-        }
-        BROTLI_LOG_UINT(num_symbols);
-        i = BrotliBuildSimpleHuffmanTable(
-            table, HUFFMAN_TABLE_BITS, s->symbols_lists_array, num_symbols);
-        if (opt_table_size) {
-          *opt_table_size = i;
-        }
-        s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
-        return BROTLI_RESULT_SUCCESS;
-      } else {  /* Decode Huffman-coded code lengths. */
-        int8_t num_codes = 0;
-        unsigned space = 32;
+      if (s->sub_loop_counter != 1) {
+        s->space = 32;
+        s->repeat = 0; /* num_codes */
         memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo[0]) *
             (BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1));
         memset(&s->code_length_code_lengths[0], 0,
-               sizeof(s->code_length_code_lengths));
-        /* 15..18 codes will be read, 2..4 bits each; 30..72 bits totally. */
-        for (; i < CODE_LENGTH_CODES; ++i) {
-          const uint8_t code_len_idx = kCodeLengthCodeOrder[i];
-          uint8_t ix = (uint8_t)BrotliGetBits(br, 4);
-          uint8_t v = kCodeLengthPrefixValue[ix];
-          BrotliDropBits(br, kCodeLengthPrefixLength[ix]);
-          s->code_length_code_lengths[code_len_idx] = v;
-          BROTLI_LOG_ARRAY_INDEX(s->code_length_code_lengths, code_len_idx);
-          if (v != 0) {
-            space = space - (32U >> v);
-            ++num_codes;
-            ++s->code_length_histo[v];
-            if (space - 1U >= 32U) {
-              /* space is 0 or wrapped around */
-              break;
-            }
-          }
+            sizeof(s->code_length_code_lengths));
+        s->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
+        goto Complex;
+      }
+      /* No break, transit to the next state. */
+
+    case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE:
+      /* Read symbols, codes & code lengths directly. */
+      if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */
+        s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
+        return BROTLI_RESULT_NEEDS_MORE_INPUT;
+      }
+      s->sub_loop_counter = 0;
+      /* No break, transit to the next state. */
+    case BROTLI_STATE_HUFFMAN_SIMPLE_READ: {
+      BrotliResult result = ReadSimpleHuffmanSymbols(alphabet_size, s);
+      if (result != BROTLI_RESULT_SUCCESS) {
+        return result;
+      }
+      /* No break, transit to the next state. */
+    }
+    case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: {
+      uint32_t table_size;
+      if (s->symbol == 3) {
+        uint32_t bits;
+        if (!BrotliSafeReadBits(br, 1, &bits)) {
+          s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_BUILD;
+          return BROTLI_RESULT_NEEDS_MORE_INPUT;
         }
-        if (!(num_codes == 1 || space == 0)) {
-          return BROTLI_FAILURE();
-        }
+        s->symbol += bits;
+      }
+      BROTLI_LOG_UINT(s->symbol);
+      table_size = BrotliBuildSimpleHuffmanTable(
+          table, HUFFMAN_TABLE_BITS, s->symbols_lists_array, s->symbol);
+      if (opt_table_size) {
+        *opt_table_size = table_size;
+      }
+      s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
+      return BROTLI_RESULT_SUCCESS;
+    }
+
+Complex: /* Decode Huffman-coded code lengths. */
+    case BROTLI_STATE_HUFFMAN_COMPLEX: {
+      uint32_t i;
+      BrotliResult result = ReadCodeLengthCodeLengths(s);
+      if (result != BROTLI_RESULT_SUCCESS) {
+        return result;
       }
       BrotliBuildCodeLengthsHuffmanTable(s->table,
                                          s->code_length_code_lengths,
                                          s->code_length_histo);
       memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo));
       for (i = 0; i <= BROTLI_HUFFMAN_MAX_CODE_LENGTH; ++i) {
-        s->next_symbol[i] = i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
-        s->symbol_lists[i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1)] = 0xFFFF;
+        s->next_symbol[i] = (int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
+        s->symbol_lists[(int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1)] = 0xFFFF;
       }
 
       s->symbol = 0;
       s->prev_code_len = kDefaultCodeLength;
       s->repeat = 0;
       s->repeat_code_len = 0;
       s->space = 32768;
       s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
       /* No break, transit to the next state. */
+    }
     case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: {
-      uint32_t symbol = s->symbol;
-      uint32_t repeat = s->repeat;
-      uint32_t space = s->space;
-      uint8_t prev_code_len = s->prev_code_len;
-      uint8_t repeat_code_len = s->repeat_code_len;
-      uint16_t* symbol_lists = s->symbol_lists;
-      uint16_t* code_length_histo = s->code_length_histo;
-      int* next_symbol = s->next_symbol;
-      while (symbol < alphabet_size && space > 0) {
-        const HuffmanCode* p = s->table;
-        uint8_t code_len;
-        if (!BrotliCheckInputAmount(br, 8)) {
-          s->symbol = symbol;
-          s->repeat = repeat;
-          s->prev_code_len = prev_code_len;
-          s->repeat_code_len = repeat_code_len;
-          s->space = space;
-          return BROTLI_RESULT_NEEDS_MORE_INPUT;
-        }
-        p += BrotliGetBits(br, BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
-        BrotliDropBits(br, p->bits); /* Use 1..5 bits */
-        code_len = (uint8_t)p->value; /* code_len == 0..17 */
-        if (code_len < kCodeLengthRepeatCode) {
-          repeat = 0;
-          if (code_len != 0) { /* code_len == 1..15 */
-            symbol_lists[next_symbol[code_len]] = (uint16_t)symbol;
-            next_symbol[code_len] = (int)symbol;
-            prev_code_len = code_len;
-            space -= 32768U >> code_len;
-            code_length_histo[code_len]++;
-          }
-          symbol++;
-        } else { /* code_len == 16..17, extra_bits == 2..3 */
-          uint32_t repeat_delta = BrotliReadBits(br, code_len - 14);
-          uint32_t old_repeat;
-          uint8_t new_len = 0;
-          if (code_len == kCodeLengthRepeatCode) {
-            new_len = prev_code_len;
-          }
-          if (repeat_code_len != new_len) {
-            repeat = 0;
-            repeat_code_len = new_len;
-          }
-          old_repeat = repeat;
-          if (repeat > 0) {
-            repeat -= 2;
-            repeat <<= code_len - 14;
-          }
-          repeat += repeat_delta + 3;
-          repeat_delta = repeat - old_repeat; /* repeat_delta >= 3 */
-          /* So, for extra 2..3 bits we produce more than 2 symbols.
-             Consequently, at most 5 bits per symbol are used. */
-          if (symbol + repeat_delta > alphabet_size) {
-            return BROTLI_FAILURE();
-          }
-          if (repeat_code_len != 0) {
-            unsigned last = symbol + repeat_delta;
-            i = next_symbol[repeat_code_len];
-            do {
-              symbol_lists[i] = (uint16_t)symbol;
-              i = (int)symbol;
-            } while (++symbol != last);
-            next_symbol[repeat_code_len] = i;
-            space -= repeat_delta << (15 - repeat_code_len);
-            code_length_histo[repeat_code_len] = (uint16_t)
-                (code_length_histo[repeat_code_len] + repeat_delta);
-          } else {
-            symbol += repeat_delta;
-          }
-        }
+      uint32_t table_size;
+      BrotliResult result = ReadSymbolCodeLengths(alphabet_size, s);
+      if (result == BROTLI_RESULT_NEEDS_MORE_INPUT) {
+        result = SafeReadSymbolCodeLengths(alphabet_size, s);
       }
-      if (space != 0) {
-        BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", space));
+      if (result != BROTLI_RESULT_SUCCESS) {
+        return result;
+      }
+
+      if (s->space != 0) {
+        BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", s->space));
         return BROTLI_FAILURE();
       }
-      {
-        int table_size = BrotliBuildHuffmanTable(
-            table, HUFFMAN_TABLE_BITS, symbol_lists,
-            s->code_length_histo);
-        if (opt_table_size) {
-          *opt_table_size = table_size;
-        }
+      table_size = BrotliBuildHuffmanTable(table, HUFFMAN_TABLE_BITS,
+          s->symbol_lists, s->code_length_histo);
+      if (opt_table_size) {
+        *opt_table_size = table_size;
       }
       s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
       return BROTLI_RESULT_SUCCESS;
     }
 
     default:
       return BROTLI_FAILURE();
   }
 }
 
 /* Decodes a block length by reading 3..39 bits. */
-static BROTLI_INLINE int ReadBlockLength(const HuffmanCode* table,
-                                         BrotliBitReader* br) {
-  int code;
-  int nbits;
+static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table,
+                                              BrotliBitReader* br) {
+  uint32_t code;
+  uint32_t nbits;
   code = ReadSymbol(table, br);
   nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */
-  return kBlockLengthPrefixCode[code].offset + (int)BrotliReadBits(br, nbits);
+  return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits);
+}
+
+/* WARNING: if state is not BROTLI_STATE_READ_BLOCK_LENGTH_NONE, then
+   reading can't be continued with ReadBlockLength. */
+static BROTLI_INLINE int SafeReadBlockLength(BrotliState* s,
+                                             uint32_t* result,
+                                             const HuffmanCode* table,
+                                             BrotliBitReader* br) {
+  uint32_t index;
+  if (s->substate_read_block_length == BROTLI_STATE_READ_BLOCK_LENGTH_NONE) {
+    if (!SafeReadSymbol(table, br, &index)) {
+      return 0;
+    }
+  } else {
+    index = s->block_length_index;
+  }
+  {
+    uint32_t bits;
+    uint32_t nbits = kBlockLengthPrefixCode[index].nbits; /* nbits == 2..24 */
+    if (!BrotliSafeReadBits(br, nbits, &bits)) {
+      s->block_length_index = index;
+      s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX;
+      return 0;
+    }
+    *result = kBlockLengthPrefixCode[index].offset + bits;
+    s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+    return 1;
+  }
 }
 
 /* Transform:
     1) initialize list L with values 0, 1,... 255
     2) For each input element X:
     2.1) let Y = L[X]
     2.2) remove X-th element from L
     2.3) prepend Y to L
@@ -556,21 +847,21 @@ static BROTLI_INLINE int ReadBlockLength
 
    In most cases max(Y) <= 7, so most of L remains intact.
    To reduce the cost of initialization, we reuse L, remember the upper bound
    of Y values, and reinitialize only first elements in L.
 
    Most of input values are 0 and 1. To reduce number of branches, we replace
    inner for loop with do-while.
  */
-static BROTLI_NOINLINE void InverseMoveToFrontTransform(uint8_t* v, int v_len,
-    BrotliState* state) {
+static BROTLI_NOINLINE void InverseMoveToFrontTransform(uint8_t* v,
+    uint32_t v_len, BrotliState* state) {
   /* Reinitialize elements that could have been changed. */
-  int i = 4;
-  int upper_bound = state->mtf_upper_bound;
+  uint32_t i = 4;
+  uint32_t upper_bound = state->mtf_upper_bound;
   uint8_t* mtf = state->mtf;
   /* Load endian-aware constant. */
   const uint8_t b0123[4] = {0, 1, 2, 3};
   uint32_t pattern;
   memcpy(&pattern, &b0123, 4);
 
   /* Initialize list using 4 consequent values pattern. */
   *(uint32_t*)mtf = pattern;
@@ -580,44 +871,39 @@ static BROTLI_NOINLINE void InverseMoveT
     i += 4;
   } while (i <= upper_bound);
 
   /* Transform the input. */
   upper_bound = 0;
   for (i = 0; i < v_len; ++i) {
     int index = v[i];
     uint8_t value = mtf[index];
+    upper_bound |= v[i];
     v[i] = value;
-    upper_bound |= index;
     do {
       index--;
       mtf[index + 1] = mtf[index];
     } while (index > 0);
     mtf[0] = value;
   }
   /* Remember amount of elements to be reinitialized. */
   state->mtf_upper_bound = upper_bound;
 }
 
-/* Expose function for testing. Will be removed by linker as unused. */
-void InverseMoveToFrontTransformForTesting(uint8_t* v, int l, BrotliState* s) {
-  InverseMoveToFrontTransform(v, l, s);
-}
-
 
 /* Decodes a series of Huffman table using ReadHuffmanCode function. */
 static BrotliResult HuffmanTreeGroupDecode(HuffmanTreeGroup* group,
                                            BrotliState* s) {
   if (s->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) {
     s->next = group->codes;
     s->htree_index = 0;
     s->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP;
   }
   while (s->htree_index < group->num_htrees) {
-    int table_size;
+    uint32_t table_size;
     BrotliResult result =
         ReadHuffmanCode(group->alphabet_size, s->next, &table_size, s);
     if (result != BROTLI_RESULT_SUCCESS) return result;
     group->htrees[s->htree_index] = s->next;
     s->next += table_size;
     ++s->htree_index;
   }
   s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
@@ -628,306 +914,392 @@ static BrotliResult HuffmanTreeGroupDeco
    Decoding is done in 4 phases:
     1) Read auxiliary information (6..16 bits) and allocate memory.
        In case of trivial context map, decoding is finished at this phase.
     2) Decode Huffman table using ReadHuffmanCode function.
        This table will be used for reading context map items.
     3) Read context map items; "0" values could be run-length encoded.
     4) Optionally, apply InverseMoveToFront transform to the resulting map.
  */
-static BrotliResult DecodeContextMap(int context_map_size,
-                                     int* num_htrees,
+static BrotliResult DecodeContextMap(uint32_t context_map_size,
+                                     uint32_t* num_htrees,
                                      uint8_t** context_map_arg,
                                      BrotliState* s) {
   BrotliBitReader* br = &s->br;
   BrotliResult result = BROTLI_RESULT_SUCCESS;
-  int use_rle_for_zeros;
 
   switch((int)s->substate_context_map) {
     case BROTLI_STATE_CONTEXT_MAP_NONE:
       result = DecodeVarLenUint8(s, br, num_htrees);
       if (result != BROTLI_RESULT_SUCCESS) {
         return result;
       }
       (*num_htrees)++;
       s->context_index = 0;
       BROTLI_LOG_UINT(context_map_size);
       BROTLI_LOG_UINT(*num_htrees);
-      *context_map_arg = (uint8_t*)malloc((size_t)context_map_size);
+      *context_map_arg = (uint8_t*)BROTLI_ALLOC(s, (size_t)context_map_size);
       if (*context_map_arg == 0) {
         return BROTLI_FAILURE();
       }
       if (*num_htrees <= 1) {
         memset(*context_map_arg, 0, (size_t)context_map_size);
         return BROTLI_RESULT_SUCCESS;
       }
       s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
       /* No break, continue to next state. */
-    case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX:
-      if (!BrotliWarmupBitReader(br) || !BrotliCheckInputAmount(br, 8)) {
+    case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: {
+      uint32_t bits;
+      /* In next stage ReadHuffmanCode uses at least 4 bits, so it is safe
+         to peek 4 bits ahead. */
+      if (!BrotliSafeGetBits(br, 5, &bits)) {
         return BROTLI_RESULT_NEEDS_MORE_INPUT;
       }
-      use_rle_for_zeros = (int)BrotliReadBits(br, 1);
-      if (use_rle_for_zeros) {
-        s->max_run_length_prefix = (int)BrotliReadBits(br, 4) + 1;
+      if ((bits & 1) != 0) { /* Use RLE for zeroes. */
+        s->max_run_length_prefix = (bits >> 1) + 1;
+        BrotliDropBits(br, 5);
       } else {
         s->max_run_length_prefix = 0;
+        BrotliDropBits(br, 1);
       }
       BROTLI_LOG_UINT(s->max_run_length_prefix);
       s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
       /* No break, continue to next state. */
+    }
     case BROTLI_STATE_CONTEXT_MAP_HUFFMAN:
       result = ReadHuffmanCode(*num_htrees + s->max_run_length_prefix,
                                s->context_map_table, NULL, s);
       if (result != BROTLI_RESULT_SUCCESS) return result;
+      s->code = 0xFFFF;
       s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
       /* No break, continue to next state. */
     case BROTLI_STATE_CONTEXT_MAP_DECODE: {
-      int context_index = s->context_index;
-      int max_run_length_prefix = s->max_run_length_prefix;
+      uint32_t context_index = s->context_index;
+      uint32_t max_run_length_prefix = s->max_run_length_prefix;
       uint8_t* context_map = *context_map_arg;
-      int code;
+      uint32_t code = s->code;
+      if (code != 0xFFFF) {
+        goto rleCode;
+      }
       while (context_index < context_map_size) {
-        if (!BrotliCheckInputAmount(br, 32)) {
+        if (!SafeReadSymbol(s->context_map_table, br, &code)) {
+          s->code = 0xFFFF;
           s->context_index = context_index;
           return BROTLI_RESULT_NEEDS_MORE_INPUT;
         }
-        code = ReadSymbol(s->context_map_table, br);
         BROTLI_LOG_UINT(code);
+
         if (code == 0) {
           context_map[context_index++] = 0;
-        } else if (code - max_run_length_prefix <= 0) {
-          int reps = (1 << code) + (int)BrotliReadBits(br, code);
+          continue;
+        }
+        if (code > max_run_length_prefix) {
+          context_map[context_index++] =
+              (uint8_t)(code - max_run_length_prefix);
+          continue;
+        }
+rleCode:
+        {
+          uint32_t reps;
+          if (!BrotliSafeReadBits(br, code, &reps)) {
+            s->code = code;
+            s->context_index = context_index;
+            return BROTLI_RESULT_NEEDS_MORE_INPUT;
+          }
+          reps += 1U << code;
           BROTLI_LOG_UINT(reps);
           if (context_index + reps > context_map_size) {
             return BROTLI_FAILURE();
           }
           do {
             context_map[context_index++] = 0;
           } while (--reps);
-        } else {
-          context_map[context_index++] =
-              (uint8_t)(code - max_run_length_prefix);
         }
       }
-      if (BrotliReadBits(br, 1)) {
-        InverseMoveToFrontTransform(context_map, context_map_size, s);
+      /* No break, continue to next state. */
+    }
+    case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: {
+      uint32_t bits;
+      if (!BrotliSafeReadBits(br, 1, &bits)) {
+        s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_TRANSFORM;
+        return BROTLI_RESULT_NEEDS_MORE_INPUT;
+      }
+      if (bits != 0) {
+        InverseMoveToFrontTransform(*context_map_arg, context_map_size, s);
       }
       s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
       return BROTLI_RESULT_SUCCESS;
     }
+    default:
+      return BROTLI_FAILURE();
   }
-
-  return BROTLI_FAILURE();
 }
 
 /* Decodes a command or literal and updates block type ringbuffer.
-   Reads 0..15 bits. */
-static void DecodeBlockType(const int max_block_type,
-                            const HuffmanCode* trees,
-                            int tree_type,
-                            int* ringbuffers,
-                            BrotliBitReader* br) {
-  int* ringbuffer = ringbuffers + tree_type * 2;
-  int block_type =
-      ReadSymbol(&trees[tree_type * BROTLI_HUFFMAN_MAX_TABLE_SIZE], br) - 2;
-  if (block_type == -1) {
+   Reads 3..54 bits. */
+static BROTLI_INLINE int DecodeBlockTypeAndLength(int safe,
+    BrotliState* s, int tree_type) {
+  uint32_t max_block_type = s->num_block_types[tree_type];
+  const HuffmanCode* type_tree = &s->block_type_trees[
+      tree_type * BROTLI_HUFFMAN_MAX_SIZE_258];
+  const HuffmanCode* len_tree = &s->block_len_trees[
+      tree_type * BROTLI_HUFFMAN_MAX_SIZE_26];
+  BrotliBitReader* br = &s->br;
+  uint32_t* ringbuffer = &s->block_type_rb[tree_type * 2];
+  uint32_t block_type;
+
+  /* Read 0..15 + 3..39 bits */
+  if (!safe) {
+    block_type = ReadSymbol(type_tree, br);
+    s->block_length[tree_type] = ReadBlockLength(len_tree, br);
+  } else {
+    BrotliBitReaderState memento;
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(type_tree, br, &block_type)) return 0;
+    if (!SafeReadBlockLength(s, &s->block_length[tree_type], len_tree, br)) {
+      s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+      BrotliBitReaderRestoreState(br, &memento);
+      return 0;
+    }
+  }
+
+  if (block_type == 1) {
     block_type = ringbuffer[1] + 1;
-  } else if (block_type == -2) {
+  } else if (block_type == 0) {
     block_type = ringbuffer[0];
+  } else {
+    block_type -= 2;
   }
   if (block_type >= max_block_type) {
     block_type -= max_block_type;
   }
   ringbuffer[0] = ringbuffer[1];
   ringbuffer[1] = block_type;
+  return 1;
 }
 
 /* Decodes the block type and updates the state for literal context.
-   Reads 18..54 bits. */
-static void DecodeBlockTypeWithContext(BrotliState* s,
-                                       BrotliBitReader* br) {
+   Reads 3..54 bits. */
+static BROTLI_INLINE int DecodeLiteralBlockSwitchInternal(int safe,
+    BrotliState* s) {
   uint8_t context_mode;
-  int context_offset;
-  DecodeBlockType(s->num_block_types[0], s->block_type_trees, 0,
-                  s->block_type_rb, br); /* Reads 0..15 bits. */
-  s->block_length[0] = ReadBlockLength(s->block_len_trees, br); /* 3..39 bits */
+  uint32_t context_offset;
+  if (!DecodeBlockTypeAndLength(safe, s, 0)) {
+    return 0;
+  }
   context_offset = s->block_type_rb[1] << kLiteralContextBits;
   s->context_map_slice = s->context_map + context_offset;
   s->literal_htree_index = s->context_map_slice[0];
   s->literal_htree = s->literal_hgroup.htrees[s->literal_htree_index];
   context_mode = s->context_modes[s->block_type_rb[1]];
   s->context_lookup1 = &kContextLookup[kContextLookupOffsets[context_mode]];
   s->context_lookup2 = &kContextLookup[kContextLookupOffsets[context_mode + 1]];
+  return 1;
+}
+
+static void BROTLI_NOINLINE DecodeLiteralBlockSwitch(BrotliState* s) {
+  DecodeLiteralBlockSwitchInternal(0, s);
+}
+
+static int BROTLI_NOINLINE SafeDecodeLiteralBlockSwitch(BrotliState* s) {
+  return DecodeLiteralBlockSwitchInternal(1, s);
+}
+
+/* Block switch for insert/copy length.
+   Reads 3..54 bits. */
+static BROTLI_INLINE int DecodeCommandBlockSwitchInternal(int safe,
+    BrotliState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 1)) {
+    return 0;
+  }
+  s->htree_command = s->insert_copy_hgroup.htrees[s->block_type_rb[3]];
+  return 1;
+}
+
+static void BROTLI_NOINLINE DecodeCommandBlockSwitch(BrotliState* s) {
+  DecodeCommandBlockSwitchInternal(0, s);
+}
+static int BROTLI_NOINLINE SafeDecodeCommandBlockSwitch(BrotliState* s) {
+  return DecodeCommandBlockSwitchInternal(1, s);
 }
 
-BrotliResult WriteRingBuffer(BrotliOutput output,
-                             BrotliState* s) {
-  int num_written;
+/* Block switch for distance codes.
+   Reads 3..54 bits. */
+static BROTLI_INLINE int DecodeDistanceBlockSwitchInternal(int safe,
+    BrotliState* s) {
+  if (!DecodeBlockTypeAndLength(safe, s, 2)) {
+    return 0;
+  }
+  s->dist_context_map_slice =
+      s->dist_context_map + (s->block_type_rb[5] << kDistanceContextBits);
+  s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
+  return 1;
+}
+
+static void BROTLI_NOINLINE DecodeDistanceBlockSwitch(BrotliState* s) {
+  DecodeDistanceBlockSwitchInternal(0, s);
+}
+
+static int BROTLI_NOINLINE SafeDecodeDistanceBlockSwitch(BrotliState* s) {
+  return DecodeDistanceBlockSwitchInternal(1, s);
+}
+
+static BrotliResult WriteRingBuffer(size_t* available_out, uint8_t** next_out,
+    size_t* total_out, BrotliState* s) {
+  size_t pos = (s->pos > s->ringbuffer_size) ?
+      (size_t)s->ringbuffer_size : (size_t)(s->pos);
+  uint8_t* start = s->ringbuffer
+      + (s->partial_pos_out & (size_t)s->ringbuffer_mask);
+  size_t partial_pos_rb =
+      (s->rb_roundtrips * (size_t)s->ringbuffer_size) + pos;
+  size_t to_write = (partial_pos_rb - s->partial_pos_out);
+  size_t num_written = *available_out;
+  if (num_written > to_write) {
+    num_written = to_write;
+  }
   if (s->meta_block_remaining_len < 0) {
     return BROTLI_FAILURE();
   }
-  num_written = BrotliWrite(
-      output, s->ringbuffer + s->partially_written,
-      (size_t)(s->to_write - s->partially_written));
-  BROTLI_LOG_UINT(s->partially_written);
-  BROTLI_LOG_UINT(s->to_write);
+  memcpy(*next_out, start, num_written);
+  *next_out += num_written;
+  *available_out -= num_written;
+  BROTLI_LOG_UINT(to_write);
   BROTLI_LOG_UINT(num_written);
-  if (num_written < 0) {
-    return BROTLI_FAILURE();
-  }
-  s->partially_written += num_written;
-  if (s->partially_written < s->to_write) {
+  s->partial_pos_out += num_written;
+  *total_out = s->partial_pos_out;
+  if (num_written < to_write) {
     return BROTLI_RESULT_NEEDS_MORE_OUTPUT;
   }
   return BROTLI_RESULT_SUCCESS;
 }
 
-BrotliResult BROTLI_NOINLINE CopyUncompressedBlockToOutput(BrotliOutput output,
-                                                           int pos,
-                                                           BrotliState* s) {
-  BrotliResult result;
-  int num_read;
-  int nbytes;
+/* Allocates ringbuffer.
+
+  s->ringbuffer_size MUST be updated by BrotliCalculateRingBufferSize before
+  this function is called.
+
+   Last two bytes of ringbuffer are initialized to 0, so context calculation
+   could be done uniformly for the first two and all other positions.
+
+   Custom dictionary, if any, is copied to the end of ringbuffer.
+*/
+static int BROTLI_NOINLINE BrotliAllocateRingBuffer(BrotliState* s) {
+  /* We need the slack region for the following reasons:
+      - doing up to two 16-byte copies for fast backward copying
+      - inserting transformed dictionary word (5 prefix + 24 base + 8 suffix) */
+  static const int kRingBufferWriteAheadSlack = 42;
+  s->ringbuffer = (uint8_t*)BROTLI_ALLOC(s, (size_t)(s->ringbuffer_size +
+      kRingBufferWriteAheadSlack));
+  if (s->ringbuffer == 0) {
+    return 0;
+  }
+
+  s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
+
+  s->ringbuffer[s->ringbuffer_size - 2] = 0;
+  s->ringbuffer[s->ringbuffer_size - 1] = 0;
+
+  if (s->custom_dict) {
+    memcpy(&s->ringbuffer[(-s->custom_dict_size) & s->ringbuffer_mask],
+                          s->custom_dict, (size_t)s->custom_dict_size);
+  }
+
+  return 1;
+}
+
+static BrotliResult BROTLI_NOINLINE CopyUncompressedBlockToOutput(
+    size_t* available_out, uint8_t** next_out, size_t* total_out,
+    BrotliState* s) {
+  /* TODO: avoid allocation for single uncompressed block. */
+  if (!s->ringbuffer && !BrotliAllocateRingBuffer(s)) {
+    return BROTLI_FAILURE();
+  }
+
   /* State machine */
   for (;;) {
-    switch ((int)s->substate_uncompressed) {
-      case BROTLI_STATE_UNCOMPRESSED_NONE:
-        /* For short lengths copy byte-by-byte */
-        if (s->meta_block_remaining_len < 8 ||
-            s->meta_block_remaining_len < BrotliGetRemainingBytes(&s->br)) {
-          s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_SHORT;
-          break;
+    switch (s->substate_uncompressed) {
+      case BROTLI_STATE_UNCOMPRESSED_NONE: {
+        int nbytes = (int)BrotliGetRemainingBytes(&s->br);
+        if (nbytes > s->meta_block_remaining_len) {
+          nbytes = s->meta_block_remaining_len;
+        }
+        if (s->pos + nbytes > s->ringbuffer_size) {
+          nbytes = s->ringbuffer_size - s->pos;
         }
         /* Copy remaining bytes from s->br.buf_ to ringbuffer. */
-        nbytes = (int)BrotliGetRemainingBytes(&s->br);
-        BrotliCopyBytes(&s->ringbuffer[pos], &s->br, (size_t)nbytes);
-        pos += nbytes;
+        BrotliCopyBytes(&s->ringbuffer[s->pos], &s->br, (size_t)nbytes);
+        s->pos += nbytes;
         s->meta_block_remaining_len -= nbytes;
-        if (pos >= s->ringbuffer_size) {
-          s->to_write = s->ringbuffer_size;
-          s->partially_written = 0;
-          s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
-          break;
-        }
-        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_COPY;
-        break;
-      case BROTLI_STATE_UNCOMPRESSED_SHORT:
-        if (!BrotliWarmupBitReader(&s->br)) {
+        if (s->pos < s->ringbuffer_size) {
+          if (s->meta_block_remaining_len == 0) {
+            return BROTLI_RESULT_SUCCESS;
+          }
           return BROTLI_RESULT_NEEDS_MORE_INPUT;
         }
-        while (s->meta_block_remaining_len > 0) {
-          if (!BrotliCheckInputAmount(&s->br, 8)) {
-            return BROTLI_RESULT_NEEDS_MORE_INPUT;
-          }
-          s->ringbuffer[pos++] = (uint8_t)BrotliReadBits(&s->br, 8);
-          s->meta_block_remaining_len--;
-        }
-        if (pos >= s->ringbuffer_size) {
-          s->to_write = s->ringbuffer_size;
-          s->partially_written = 0;
-          s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
-        } else {
-          s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
-          return BROTLI_RESULT_SUCCESS;
-        }
-        /* No break, if state is updated, continue to next state */
-      case BROTLI_STATE_UNCOMPRESSED_WRITE:
-        result = WriteRingBuffer(output, s);
+        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
+        /* No break, continue to next state */
+      }
+      case BROTLI_STATE_UNCOMPRESSED_WRITE: {
+        BrotliResult result = WriteRingBuffer(
+            available_out, next_out, total_out, s);
         if (result != BROTLI_RESULT_SUCCESS) {
           return result;
         }
-        pos &= s->ringbuffer_mask;
+        s->pos = 0;
+        s->rb_roundtrips++;
         s->max_distance = s->max_backward_distance;
-        /* If we wrote past the logical end of the ringbuffer, copy the tail
-           of the ringbuffer to its beginning and flush the ringbuffer to the
-           output. */
-        memcpy(s->ringbuffer, s->ringbuffer_end, (size_t)pos);
-        s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_COPY;
-        /* No break, continue to next state */
-      case BROTLI_STATE_UNCOMPRESSED_COPY:
-        /* Copy straight from the input onto the ringbuffer. The ringbuffer will
-           be flushed to the output at a later time. */
-        nbytes = s->meta_block_remaining_len;
-        if (pos + nbytes > s->ringbuffer_size) {
-          nbytes = s->ringbuffer_size - pos;
-        }
-        num_read = BrotliRead(s->br.input_, &s->ringbuffer[pos],
-                              (size_t)nbytes);
-        pos += num_read;
-        s->meta_block_remaining_len -= num_read;
-        if (num_read < nbytes) {
-          if (num_read < 0) return BROTLI_FAILURE();
-          return BROTLI_RESULT_NEEDS_MORE_INPUT;
-        }
-        if (pos == s->ringbuffer_size) {
-          s->to_write = s->ringbuffer_size;
-          s->partially_written = 0;
-          s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_WRITE;
-          break;
-        }
         s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
-        return BROTLI_RESULT_SUCCESS;
+        break;
+      }
     }
   }
   return BROTLI_FAILURE();
 }
 
 int BrotliDecompressedSize(size_t encoded_size,
                            const uint8_t* encoded_buffer,
                            size_t* decoded_size) {
-  BrotliMemInput memin;
-  BrotliInput in = BrotliInitMemInput(encoded_buffer, encoded_size, &memin);
-  BrotliBitReader br;
   BrotliState s;
   int next_block_header;
-  int offset;
   BrotliStateInit(&s);
-  BrotliInitBitReader(&br, in);
-  if (!BrotliReadInput(&br, 1) || !BrotliWarmupBitReader(&br)) {
+  s.br.next_in = encoded_buffer;
+  s.br.avail_in = encoded_size;
+  if (!BrotliWarmupBitReader(&s.br)) {
     return 0;
   }
-  DecodeWindowBits(&br);
-  if (DecodeMetaBlockLength(&s, &br) != BROTLI_RESULT_SUCCESS) {
+  DecodeWindowBits(&s.br);
+  if (DecodeMetaBlockLength(&s, &s.br) != BROTLI_RESULT_SUCCESS) {
     return 0;
   }
   *decoded_size = (size_t)s.meta_block_remaining_len;
   if (s.is_last_metablock) {
     return 1;
   }
-  if (!s.is_uncompressed || !BrotliJumpToByteBoundary(&br)) {
+  if (!s.is_uncompressed || !BrotliJumpToByteBoundary(&s.br)) {
     return 0;
   }
-  next_block_header = BrotliPeekByte(&br, s.meta_block_remaining_len);
-  if (next_block_header != -1) {
-    return (next_block_header & 3) == 3;
-  }
-  /* Currently bit reader can't peek outside of its buffer... */
-  offset = BROTLI_READ_SIZE - (int)BrotliGetRemainingBytes(&br);
-  offset += s.meta_block_remaining_len;
-  return (offset < encoded_size) && ((encoded_buffer[offset] & 3) == 3);
+  next_block_header = BrotliPeekByte(&s.br, (size_t)s.meta_block_remaining_len);
+  return (next_block_header != -1) && ((next_block_header & 3) == 3);
 }
 
-/* Allocates the smallest feasible ring buffer.
+/* Calculates the smallest feasible ring buffer.
 
    If we know the data size is small, do not allocate more ringbuffer
    size than needed to reduce memory usage.
 
-   This method is called before the first non-empty non-metadata block is
-   processed. When this method is called, metablock size and flags MUST be
-   decoded.
+   When this method is called, metablock size and flags MUST be decoded.
 */
-int BROTLI_NOINLINE BrotliAllocateRingBuffer(BrotliState* s,
+static void BROTLI_NOINLINE BrotliCalculateRingBufferSize(BrotliState* s,
     BrotliBitReader* br) {
-  static const int kRingBufferWriteAheadSlack = BROTLI_READ_SIZE;
   int is_last = s->is_last_metablock;
   s->ringbuffer_size = 1 << s->window_bits;
 
   if (s->is_uncompressed) {
-    int next_block_header = BrotliPeekByte(br, s->meta_block_remaining_len);
+    int next_block_header = BrotliPeekByte(br,
+        (size_t)s->meta_block_remaining_len);
     if (next_block_header != -1) { /* Peek succeeded */
       if ((next_block_header & 3) == 3) { /* ISLAST and ISEMPTY */
         is_last = 1;
       }
     }
   }
 
   /* We need at least 2 bytes of ring buffer size to get the last two
@@ -940,120 +1312,634 @@ int BROTLI_NOINLINE BrotliAllocateRingBu
   }
 
   /* But make it fit the custom dictionary if there is one. */
   while (s->ringbuffer_size < s->custom_dict_size) {
     s->ringbuffer_size <<= 1;
   }
 
   s->ringbuffer_mask = s->ringbuffer_size - 1;
-  s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
-                                         kRingBufferWriteAheadSlack +
-                                         kBrotliMaxDictionaryWordLength));
-  if (!s->ringbuffer) {
-    return 0;
+}
+
+/* Reads 1..256 2-bit context modes. */
+static BrotliResult ReadContextModes(BrotliState* s) {
+  BrotliBitReader* br = &s->br;
+  int i = s->loop_counter;
+
+  while (i < (int)s->num_block_types[0]) {
+    uint32_t bits;
+    if (!BrotliSafeReadBits(br, 2, &bits)) {
+      s->loop_counter = i;
+      return BROTLI_RESULT_NEEDS_MORE_INPUT;
+    }
+    s->context_modes[i] = (uint8_t)(bits << 1);
+    BROTLI_LOG_ARRAY_INDEX(s->context_modes, i);
+    i++;
+  }
+  return BROTLI_RESULT_SUCCESS;
+}
+
+static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliState* s) {
+  if (s->distance_code == 0) {
+    --s->dist_rb_idx;
+    s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
+  } else {
+    int distance_code = s->distance_code << 1;
+    /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: */
+    /* 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */
+    const uint32_t kDistanceShortCodeIndexOffset = 0xaaafff1b;
+    /* kDistanceShortCodeValueOffset has 2-bit values from LSB: */
+    /*-0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */
+    const uint32_t kDistanceShortCodeValueOffset = 0xfa5fa500;
+    int v = (s->dist_rb_idx +
+        (int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3;
+    s->distance_code = s->dist_rb[v];
+    v = (int)(kDistanceShortCodeValueOffset >> distance_code) & 0x3;
+    if ((distance_code & 0x3) != 0) {
+      s->distance_code += v;
+    } else {
+      s->distance_code -= v;
+      if (s->distance_code <= 0) {
+        /* A huge distance will cause a BROTLI_FAILURE() soon. */
+        /* This is a little faster than failing here. */
+        s->distance_code = 0x0fffffff;
+      }
+    }
+  }
+}
+
+static BROTLI_INLINE int SafeReadBits(
+    BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
+  if (n_bits != 0) {
+    return BrotliSafeReadBits(br, n_bits, val);
+  } else {
+    *val = 0;
+    return 1;
+  }
+}
+
+/* Precondition: s->distance_code < 0 */
+static BROTLI_INLINE int ReadDistanceInternal(int safe,
+    BrotliState* s, BrotliBitReader* br) {
+  int distval;
+  BrotliBitReaderState memento;
+  HuffmanCode* distance_tree = s->distance_hgroup.htrees[s->dist_htree_index];
+  if (!safe) {
+    s->distance_code = (int)ReadSymbol(distance_tree, br);
+  } else {
+    uint32_t code;
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(distance_tree, br, &code)) {
+      return 0;
+    }
+    s->distance_code = (int)code;
+  }
+  /* Convert the distance code to the actual distance by possibly */
+  /* looking up past distances from the s->ringbuffer. */
+  if ((s->distance_code & ~0xf) == 0) {
+    TakeDistanceFromRingBuffer(s);
+    --s->block_length[2];
+    return 1;
   }
-  s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
-  s->ringbuffer[s->ringbuffer_size - 2] = 0;
-  s->ringbuffer[s->ringbuffer_size - 1] = 0;
-  if (s->custom_dict) {
-    memcpy(&s->ringbuffer[(-s->custom_dict_size) & s->ringbuffer_mask],
-                          s->custom_dict, (size_t)s->custom_dict_size);
+  distval = s->distance_code - (int)s->num_direct_distance_codes;
+  if (distval >= 0) {
+    uint32_t nbits;
+    int postfix;
+    int offset;
+    if (!safe && (s->distance_postfix_bits == 0)) {
+      nbits = ((uint32_t)distval >> 1) + 1;
+      offset = ((2 + (distval & 1)) << nbits) - 4;
+      s->distance_code = (int)s->num_direct_distance_codes +
+          offset + (int)BrotliReadBits(br, nbits);
+    } else {
+      /* This branch also works well when s->distance_postfix_bits == 0 */
+      uint32_t bits;
+      postfix = distval & s->distance_postfix_mask;
+      distval >>= s->distance_postfix_bits;
+      nbits = ((uint32_t)distval >> 1) + 1;
+      if (safe) {
+        if (!SafeReadBits(br, nbits, &bits)) {
+          s->distance_code = -1; /* Restore precondition. */
+          BrotliBitReaderRestoreState(br, &memento);
+          return 0;
+        }
+      } else {
+        bits = BrotliReadBits(br, nbits);
+      }
+      offset = ((2 + (distval & 1)) << nbits) - 4;
+      s->distance_code = (int)s->num_direct_distance_codes +
+          ((offset + (int)bits) << s->distance_postfix_bits) + postfix;
+    }
+  }
+  s->distance_code = s->distance_code - NUM_DISTANCE_SHORT_CODES + 1;
+  --s->block_length[2];
+  return 1;
+}
+
+static BROTLI_INLINE void ReadDistance(BrotliState* s, BrotliBitReader* br) {
+  ReadDistanceInternal(0, s, br);
+}
+
+static BROTLI_INLINE int SafeReadDistance(BrotliState* s, BrotliBitReader* br) {
+  return ReadDistanceInternal(1, s, br);
+}
+
+static BROTLI_INLINE int ReadCommandInternal(int safe,
+    BrotliState* s, BrotliBitReader* br, int* insert_length) {
+  uint32_t cmd_code;
+  uint32_t insert_len_extra = 0;
+  uint32_t copy_length;
+  CmdLutElement v;
+  BrotliBitReaderState memento;
+  if (!safe) {
+    cmd_code = ReadSymbol(s->htree_command, br);
+  } else {
+    BrotliBitReaderSaveState(br, &memento);
+    if (!SafeReadSymbol(s->htree_command, br, &cmd_code)) {
+      return 0;
+    }
+  }
+  v = kCmdLut[cmd_code];
+  s->distance_code = v.distance_code;
+  s->distance_context = v.context;
+  s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
+  *insert_length = v.insert_len_offset;
+  if (!safe) {
+    if (PREDICT_FALSE(v.insert_len_extra_bits != 0)) {
+      insert_len_extra = BrotliReadBits(br, v.insert_len_extra_bits);
+    }
+    copy_length = BrotliReadBits(br, v.copy_len_extra_bits);
+  } else {
+    if (!SafeReadBits(br, v.insert_len_extra_bits, &insert_len_extra) ||
+        !SafeReadBits(br, v.copy_len_extra_bits, &copy_length)) {
+      BrotliBitReaderRestoreState(br, &memento);
+      return 0;
+    }
+  }
+  s->copy_length = (int)copy_length + v.copy_len_offset;
+  --s->block_length[1];
+  *insert_length += (int)insert_len_extra;
+  return 1;
+}
+
+static BROTLI_INLINE void ReadCommand(BrotliState* s, BrotliBitReader* br,
+    int* insert_length) {
+  ReadCommandInternal(0, s, br, insert_length);
+}
+
+static BROTLI_INLINE int SafeReadCommand(BrotliState* s, BrotliBitReader* br,
+    int* insert_length) {
+  return ReadCommandInternal(1, s, br, insert_length);
+}
+
+static BROTLI_INLINE int CheckInputAmount(int safe,
+    BrotliBitReader* const br, size_t num) {
+  if (safe) {
+    return 1;
+  }
+  return BrotliCheckInputAmount(br, num);
+}
+
+#define BROTLI_SAFE(METHOD) { \
+  if (safe) { \
+    if (! Safe ## METHOD ) { \
+      result = BROTLI_RESULT_NEEDS_MORE_INPUT; \
+      goto saveStateAndReturn; \
+    } \
+  } else { \
+    METHOD ; \
+  } \
+}
+
+static BROTLI_INLINE BrotliResult ProcessCommandsInternal(int safe,
+    BrotliState* s) {
+  int pos = s->pos;
+  int i = s->loop_counter;
+  BrotliResult result = BROTLI_RESULT_SUCCESS;
+  BrotliBitReader* br = &s->br;
+
+  if (!CheckInputAmount(safe, br, 28)) {
+    result = BROTLI_RESULT_NEEDS_MORE_INPUT;
+    goto saveStateAndReturn;
+  }
+  if (!safe) {
+    BROTLI_UNUSED(BrotliWarmupBitReader(br));
+  }
+
+  /* Jump into state machine. */
+  if (s->state == BROTLI_STATE_COMMAND_BEGIN) {
+    goto CommandBegin;
+  } else if (s->state == BROTLI_STATE_COMMAND_INNER) {
+    goto CommandInner;
+  } else if (s->state == BROTLI_STATE_COMMAND_POST_DECODE_LITERALS) {
+    goto CommandPostDecodeLiterals;
+  } else if (s->state == BROTLI_STATE_COMMAND_POST_WRAP_COPY) {
+    goto CommandPostWrapCopy;
+  } else {
+    return BROTLI_FAILURE();
   }
 
-  return 1;
+CommandBegin:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_BEGIN;
+  }
+  if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */
+    s->state = BROTLI_STATE_COMMAND_BEGIN;
+    result = BROTLI_RESULT_NEEDS_MORE_INPUT;
+    goto saveStateAndReturn;
+  }
+  if (PREDICT_FALSE(s->block_length[1] == 0)) {
+    BROTLI_SAFE(DecodeCommandBlockSwitch(s));
+    goto CommandBegin;
+  }
+  /* Read the insert/copy length in the command */
+  BROTLI_SAFE(ReadCommand(s, br, &i));
+  BROTLI_LOG(("[ProcessCommandsInternal] pos = %d insert = %d copy = %d\n",
+              pos, i, s->copy_length));
+  if (i == 0) {
+    goto CommandPostDecodeLiterals;
+  }
+  s->meta_block_remaining_len -= i;
+
+CommandInner:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_INNER;
+  }
+  /* Read the literals in the command */
+  if (s->trivial_literal_context) {
+    uint32_t bits;
+    uint32_t value;
+    PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
+    do {
+      if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
+        s->state = BROTLI_STATE_COMMAND_INNER;
+        result = BROTLI_RESULT_NEEDS_MORE_INPUT;
+        goto saveStateAndReturn;
+      }
+      if (PREDICT_FALSE(s->block_length[0] == 0)) {
+        BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
+        PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
+      }
+      if (!safe) {
+        s->ringbuffer[pos] = (uint8_t)ReadPreloadedSymbol(
+            s->literal_htree, br, &bits, &value);
+      } else {
+        uint32_t literal;
+        if (!SafeReadSymbol(s->literal_htree, br, &literal)) {
+          result = BROTLI_RESULT_NEEDS_MORE_INPUT;
+          goto saveStateAndReturn;
+        }
+        s->ringbuffer[pos] = (uint8_t)literal;
+      }
+      --s->block_length[0];
+      BROTLI_LOG_UINT(s->literal_htree_index);
+      BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
+      ++pos;
+      if (PREDICT_FALSE(pos == s->ringbuffer_size)) {
+        s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
+        --i;
+        goto saveStateAndReturn;
+      }
+    } while (--i != 0);
+  } else {
+    uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
+    uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
+    do {
+      const HuffmanCode* hc;
+      uint8_t context;
+      if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
+        s->state = BROTLI_STATE_COMMAND_INNER;
+        result = BROTLI_RESULT_NEEDS_MORE_INPUT;
+        goto saveStateAndReturn;
+      }
+      if (PREDICT_FALSE(s->block_length[0] == 0)) {
+        BROTLI_SAFE(DecodeLiteralBlockSwitch(s));
+      }
+      context = s->context_lookup1[p1] | s->context_lookup2[p2];
+      BROTLI_LOG_UINT(context);
+      hc = s->literal_hgroup.htrees[s->context_map_slice[context]];
+      p2 = p1;
+      if (!safe) {
+        p1 = (uint8_t)ReadSymbol(hc, br);
+      } else {
+        uint32_t literal;
+        if (!SafeReadSymbol(hc, br, &literal)) {
+          result = BROTLI_RESULT_NEEDS_MORE_INPUT;
+          goto saveStateAndReturn;
+        }
+        p1 = (uint8_t)literal;
+      }
+      s->ringbuffer[pos] = p1;
+      --s->block_length[0];
+      BROTLI_LOG_UINT(s->context_map_slice[context]);
+      BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos & s->ringbuffer_mask);
+      ++pos;
+      if (PREDICT_FALSE(pos == s->ringbuffer_size)) {
+        s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
+        --i;
+        goto saveStateAndReturn;
+      }
+    } while (--i != 0);
+  }
+  BROTLI_LOG_UINT(s->meta_block_remaining_len);
+  if (s->meta_block_remaining_len <= 0) {
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  }
+
+CommandPostDecodeLiterals:
+  if (safe) {
+    s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
+  }
+  if (s->distance_code >= 0) {
+    --s->dist_rb_idx;
+    s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
+    goto postReadDistance;  /* We already have the implicit distance */
+  }
+  /* Read distance code in the command, unless it was implicitly zero. */
+  if (PREDICT_FALSE(s->block_length[2] == 0)) {
+    BROTLI_SAFE(DecodeDistanceBlockSwitch(s));
+  }
+  BROTLI_SAFE(ReadDistance(s, br));
+postReadDistance:
+  BROTLI_LOG(("[ProcessCommandsInternal] pos = %d distance = %d\n",
+              pos, s->distance_code));
+  if (s->max_distance != s->max_backward_distance) {
+    if (pos < s->max_backward_distance_minus_custom_dict_size) {
+      s->max_distance = pos + s->custom_dict_size;
+    } else {
+      s->max_distance = s->max_backward_distance;
+    }
+  }
+  i = s->copy_length;
+  /* Apply copy of LZ77 back-reference, or static dictionary reference if
+  the distance is larger than the max LZ77 distance */
+  if (s->distance_code > s->max_distance) {
+    if (i >= kBrotliMinDictionaryWordLength &&
+        i <= kBrotliMaxDictionaryWordLength) {
+      int offset = (int)kBrotliDictionaryOffsetsByLength[i];
+      int word_id = s->distance_code - s->max_distance - 1;
+      uint32_t shift = kBrotliDictionarySizeBitsByLength[i];
+      int mask = (int)BitMask(shift);
+      int word_idx = word_id & mask;
+      int transform_idx = word_id >> shift;
+      offset += word_idx * i;
+      if (transform_idx < kNumTransforms) {
+        const uint8_t* word = &kBrotliDictionary[offset];
+        int len = i;
+        if (transform_idx == 0) {
+          memcpy(&s->ringbuffer[pos], word, (size_t)len);
+        } else {
+          len = TransformDictionaryWord(
+              &s->ringbuffer[pos], word, len, transform_idx);
+        }
+        pos += len;
+        s->meta_block_remaining_len -= len;
+        if (pos >= s->ringbuffer_size) {
+          /*s->partial_pos_rb += (size_t)s->ringbuffer_size;*/
+          s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
+          goto saveStateAndReturn;
+        }
+      } else {
+        BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+               "len: %d bytes left: %d\n",
+            pos, s->distance_code, i,
+            s->meta_block_remaining_len));
+        return BROTLI_FAILURE();
+      }
+    } else {
+      BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+             "len: %d bytes left: %d\n", pos, s->distance_code, i,
+             s->meta_block_remaining_len));
+      return BROTLI_FAILURE();
+    }
+  } else {
+    const uint8_t *ringbuffer_end_minus_copy_length =
+        s->ringbuffer_end - i;
+    uint8_t* copy_src = &s->ringbuffer[
+        (pos - s->distance_code) & s->ringbuffer_mask];
+    uint8_t* copy_dst = &s->ringbuffer[pos];
+    /* Check for possible underflow and clamp the pointer to 0. */
+    if (PREDICT_FALSE(s->ringbuffer_end < (const uint8_t*)0 + i)) {
+      ringbuffer_end_minus_copy_length = 0;
+    }
+    /* update the recent distances cache */
+    s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
+    ++s->dist_rb_idx;
+    s->meta_block_remaining_len -= i;
+    if (PREDICT_FALSE(s->meta_block_remaining_len < 0)) {
+      BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
+             "len: %d bytes left: %d\n", pos, s->distance_code, i,
+             s->meta_block_remaining_len));
+      return BROTLI_FAILURE();
+    }
+    /* There is 128+ bytes of slack in the ringbuffer allocation.
+       Also, we have 16 short codes, that make these 16 bytes irrelevant
+       in the ringbuffer. Let's copy over them as a first guess.
+     */
+    memmove16(copy_dst, copy_src);
+    /* Now check if the copy extends over the ringbuffer end,
+       or if the copy overlaps with itself, if yes, do wrap-copy. */
+    if (copy_src < copy_dst) {
+      if (copy_dst >= ringbuffer_end_minus_copy_length) {
+        goto CommandPostWrapCopy;
+      }
+      if (copy_src + i > copy_dst) {
+        goto postSelfintersecting;
+      }
+    } else {
+      if (copy_src >= ringbuffer_end_minus_copy_length) {
+        goto CommandPostWrapCopy;
+      }
+      if (copy_dst + i > copy_src) {
+        goto postSelfintersecting;
+      }
+    }
+    pos += i;
+    if (i > 16) {
+      if (i > 32) {
+        memcpy(copy_dst + 16, copy_src + 16, (size_t)(i - 16));
+      } else {
+        /* This branch covers about 45% cases.
+           Fixed size short copy allows more compiler optimizations. */
+        memmove16(copy_dst + 16, copy_src + 16);
+      }
+    }
+  }
+  BROTLI_LOG_UINT(s->meta_block_remaining_len);
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+postSelfintersecting:
+  while (--i >= 0) {
+    s->ringbuffer[pos] =
+        s->ringbuffer[(pos - s->distance_code) & s->ringbuffer_mask];
+    ++pos;
+  }
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+
+CommandPostWrapCopy:
+  s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY;
+  while (--i >= 0) {
+    s->ringbuffer[pos] =
+        s->ringbuffer[(pos - s->distance_code) & s->ringbuffer_mask];
+    ++pos;
+    if (pos == s->ringbuffer_size) {
+      /*s->partial_pos_rb += (size_t)s->ringbuffer_size;*/
+      s->state = BROTLI_STATE_COMMAND_POST_WRITE_2;
+      goto saveStateAndReturn;
+    }
+  }
+  if (s->meta_block_remaining_len <= 0) {
+    /* Next metablock, if any */
+    s->state = BROTLI_STATE_METABLOCK_DONE;
+    goto saveStateAndReturn;
+  } else {
+    goto CommandBegin;
+  }
+
+saveStateAndReturn:
+  s->pos = pos;
+  s->loop_counter = i;
+  return result;
+}
+
+#undef BROTLI_SAFE
+
+static BROTLI_NOINLINE BrotliResult ProcessCommands(BrotliState* s) {
+  return ProcessCommandsInternal(0, s);
+}
+
+static BROTLI_NOINLINE BrotliResult SafeProcessCommands(BrotliState* s) {
+  return ProcessCommandsInternal(1, s);
 }
 
 BrotliResult BrotliDecompressBuffer(size_t encoded_size,
                                     const uint8_t* encoded_buffer,
                                     size_t* decoded_size,
                                     uint8_t* decoded_buffer) {
-  BrotliMemInput memin;
-  BrotliInput in = BrotliInitMemInput(encoded_buffer, encoded_size, &memin);
-  BrotliMemOutput mout;
-  BrotliOutput out = BrotliInitMemOutput(decoded_buffer, *decoded_size, &mout);
-  BrotliResult success = BrotliDecompress(in, out);
-  *decoded_size = mout.pos;
-  return success;
-}
-
-BrotliResult BrotliDecompress(BrotliInput input, BrotliOutput output) {
   BrotliState s;
   BrotliResult result;
+  size_t total_out = 0;
+  size_t available_in = encoded_size;
+  const uint8_t* next_in = encoded_buffer;
+  size_t available_out = *decoded_size;
+  uint8_t* next_out = decoded_buffer;
   BrotliStateInit(&s);
-  result = BrotliDecompressStreaming(input, output, 1, &s);
-  if (result == BROTLI_RESULT_NEEDS_MORE_INPUT) {
-    /* Not ok: it didn't finish even though this is a non-streaming function. */
-    result = BROTLI_FAILURE();
+  result = BrotliDecompressStream(&available_in, &next_in, &available_out,
+      &next_out, &total_out, &s);
+  *decoded_size = total_out;
+  BrotliStateCleanup(&s);
+  if (result != BROTLI_RESULT_SUCCESS) {
+    result = BROTLI_RESULT_ERROR;
   }
-  BrotliStateCleanup(&s);
   return result;
 }
 
-BrotliResult BrotliDecompressBufferStreaming(size_t* available_in,
-                                             const uint8_t** next_in,
-                                             int finish,
-                                             size_t* available_out,
-                                             uint8_t** next_out,
-                                             size_t* total_out,
-                                             BrotliState* s) {
-  BrotliMemInput memin;
-  BrotliInput in = BrotliInitMemInput(*next_in, *available_in, &memin);
-  BrotliMemOutput memout;
-  BrotliOutput out = BrotliInitMemOutput(*next_out, *available_out, &memout);
-  BrotliResult result = BrotliDecompressStreaming(in, out, finish, s);
-  /* The current implementation reads everything, so 0 bytes are available. */
-  *next_in += memin.pos;
-  *available_in -= memin.pos;
-  /* Update the output position to where we write next. */
-  *next_out += memout.pos;
-  *available_out -= memout.pos;
-  *total_out += memout.pos;
-  return result;
-}
-
-BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
-                                       int finish, BrotliState* s) {
-  uint8_t context;
-  int pos = s->pos;
-  int i = s->loop_counter;
+/* Invariant: input stream is never overconsumed:
+    * invalid input implies that the whole stream is invalid -> any amount of
+      input could be read and discarded
+    * when result is "needs more input", then at leat one more byte is REQUIRED
+      to complete decoding; all input data MUST be consumed by decoder, so
+      client could swap the input buffer
+    * when result is "needs more output" decoder MUST ensure that it doesn't
+      hold more than 7 bits in bit reader; this saves client from swapping input
+      buffer ahead of time
+    * when result is "success" decoder MUST return all unused data back to input
+      buffer; this is possible because the invariant is hold on enter
+*/
+BrotliResult BrotliDecompressStream(size_t* available_in,
+    const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
+    size_t* total_out, BrotliState* s) {
   BrotliResult result = BROTLI_RESULT_SUCCESS;
   BrotliBitReader* br = &s->br;
-  int initial_remaining_len;
-  int bytes_copied;
-  uint8_t *copy_src;
-  uint8_t *copy_dst;
-  /* We need the slack region for the following reasons:
-       - doing up to two 16-byte copies for fast backward copying
-       - transforms
-       - flushing the input s->ringbuffer when decoding uncompressed blocks */
-  s->br.input_ = input;
+  if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */
+    br->avail_in = *available_in;
+    br->next_in = *next_in;
+  } else {
+    /* At least one byte of input is required. More than one byte of input may
+       be required to complete the transaction -> reading more data must be
+       done in a loop -> do it in a main loop. */
+    result = BROTLI_RESULT_NEEDS_MORE_INPUT;
+    br->next_in = &s->buffer.u8[0];
+  }
   /* State machine */
   for (;;) {
-    if (result != BROTLI_RESULT_SUCCESS) {
+    if (result != BROTLI_RESULT_SUCCESS) { /* Error | needs more input/output */
       if (result == BROTLI_RESULT_NEEDS_MORE_INPUT) {
-        if (BrotliReadInput(br, finish)) {
-          result = BROTLI_RESULT_SUCCESS;
-          continue;
+        if (s->ringbuffer != 0) { /* Proactively push output. */
+          WriteRingBuffer(available_out, next_out, total_out, s);
         }
-        if (finish) {
-          BROTLI_LOG(("Unexpected end of input. State: %d\n", s->state));
-          result = BROTLI_FAILURE();
+        if (s->buffer_length != 0) { /* Used with internal buffer. */
+          if (br->avail_in == 0) { /* Successfully finished read transaction. */
+            /* Accamulator contains less than 8 bits, because internal buffer
+               is expanded byte-by-byte until it is enough to complete read. */
+            s->buffer_length = 0;
+            /* Switch to input stream and restart. */
+            result = BROTLI_RESULT_SUCCESS;
+            br->avail_in = *available_in;
+            br->next_in = *next_in;
+            continue;
+          } else if (*available_in != 0) {
+            /* Not enough data in buffer, but can take one more byte from
+               input stream. */
+            result = BROTLI_RESULT_SUCCESS;
+            s->buffer.u8[s->buffer_length] = **next_in;
+            s->buffer_length++;
+            br->avail_in = s->buffer_length;
+            (*next_in)++;
+            (*available_in)--;
+            /* Retry with more data in buffer. */
+            continue;
+          }
+          /* Can't finish reading and no more input.*/
+          break;
+        } else { /* Input stream doesn't contain enough input. */
+          /* Copy tail to internal buffer and return. */
+          *next_in = br->next_in;
+          *available_in = br->avail_in;
+          while (*available_in) {
+            s->buffer.u8[s->buffer_length] = **next_in;
+            s->buffer_length++;
+            (*next_in)++;
+            (*available_in)--;
+          }
+          break;
         }
+        /* Unreachable. */
       }
-      break;  /* Fail, or partial data. */
+
+      /* Fail or needs more output. */
+
+      if (s->buffer_length != 0) {
+        /* Just consumed the buffered input and produced some output. Otherwise
+           it would result in "needs more input". Reset internal buffer.*/
+        s->buffer_length = 0;
+      } else {
+        /* Using input stream in last iteration. When decoder switches to input
+           stream it has less than 8 bits in accamulator, so it is safe to
+           return unused accamulator bits there. */
+        BrotliBitReaderUnload(br);
+        *available_in = br->avail_in;
+        *next_in = br->next_in;
+      }
+      break;
     }
     switch (s->state) {
       case BROTLI_STATE_UNINITED:
-        pos = 0;
-        BrotliInitBitReader(br, input);
-
-        s->state = BROTLI_STATE_BITREADER_WARMUP;
-        /* No break, continue to next state */
-      case BROTLI_STATE_BITREADER_WARMUP:
         /* Prepare to the first read. */
         if (!BrotliWarmupBitReader(br)) {
           result = BROTLI_RESULT_NEEDS_MORE_INPUT;
           break;
         }
         /* Decode window size. */
         s->window_bits = DecodeWindowBits(br); /* Reads 1..7 bits. */
         BROTLI_LOG_UINT(s->window_bits);
@@ -1062,37 +1948,36 @@ BrotliResult BrotliDecompressStreaming(B
           result = BROTLI_FAILURE();
           break;
         }
         s->max_backward_distance = (1 << s->window_bits) - 16;
         s->max_backward_distance_minus_custom_dict_size =
             s->max_backward_distance - s->custom_dict_size;
 
         /* Allocate memory for both block_type_trees and block_len_trees. */
-        s->block_type_trees = (HuffmanCode*)malloc(
-            6 * BROTLI_HUFFMAN_MAX_TABLE_SIZE * sizeof(HuffmanCode));
-
-        if (s->block_type_trees == NULL) {
+        s->block_type_trees = (HuffmanCode*)BROTLI_ALLOC(s,
+            sizeof(HuffmanCode) * 3 *
+                (BROTLI_HUFFMAN_MAX_SIZE_258 + BROTLI_HUFFMAN_MAX_SIZE_26));
+        if (s->block_type_trees == 0) {
           result = BROTLI_FAILURE();
           break;
         }
         s->block_len_trees = s->block_type_trees +
-            3 * BROTLI_HUFFMAN_MAX_TABLE_SIZE;
+            3 * BROTLI_HUFFMAN_MAX_SIZE_258;
 
         s->state = BROTLI_STATE_METABLOCK_BEGIN;
         /* No break, continue to next state */
       case BROTLI_STATE_METABLOCK_BEGIN:
         BrotliStateMetablockBegin(s);
-        BROTLI_LOG_UINT(pos);
+        BROTLI_LOG_UINT(s->pos);
         s->state = BROTLI_STATE_METABLOCK_HEADER;
         /* No break, continue to next state */
       case BROTLI_STATE_METABLOCK_HEADER:
         result = DecodeMetaBlockLength(s, br); /* Reads 2 - 31 bits. */
         if (result != BROTLI_RESULT_SUCCESS) {
-          i = s->loop_counter; /* Has been updated in DecodeMetaBlockLength. */
           break;
         }
         BROTLI_LOG_UINT(s->is_last_metablock);
         BROTLI_LOG_UINT(s->meta_block_remaining_len);
         BROTLI_LOG_UINT(s->is_metadata);
         BROTLI_LOG_UINT(s->is_uncompressed);
         if (s->is_metadata || s->is_uncompressed) {
           if (!BrotliJumpToByteBoundary(br)) {
@@ -1104,597 +1989,271 @@ BrotliResult BrotliDecompressStreaming(B
           s->state = BROTLI_STATE_METADATA;
           break;
         }
         if (s->meta_block_remaining_len == 0) {
           s->state = BROTLI_STATE_METABLOCK_DONE;
           break;
         }
         if (!s->ringbuffer) {
-          if (!BrotliAllocateRingBuffer(s, br)) {
-            result = BROTLI_FAILURE();
-            break;
-          }
+          BrotliCalculateRingBufferSize(s, br);
         }
         if (s->is_uncompressed) {
           s->state = BROTLI_STATE_UNCOMPRESSED;
           break;
         }
-        i = 0;
+        s->loop_counter = 0;
         s->state = BROTLI_STATE_HUFFMAN_CODE_0;
         break;
-      case BROTLI_STATE_UNCOMPRESSED:
-        initial_remaining_len = s->meta_block_remaining_len;
-        /* pos is given as argument since s->pos is only updated at the end. */
-        result = CopyUncompressedBlockToOutput(output, pos, s);
-        bytes_copied = initial_remaining_len - s->meta_block_remaining_len;
-        pos = (pos + bytes_copied) & s->ringbuffer_mask;
+      case BROTLI_STATE_UNCOMPRESSED: {
+        int bytes_copied = s->meta_block_remaining_len;
+        result = CopyUncompressedBlockToOutput(
+            available_out, next_out, total_out, s);
+        bytes_copied -= s->meta_block_remaining_len;
         if (result != BROTLI_RESULT_SUCCESS) {
           break;
         }
         s->state = BROTLI_STATE_METABLOCK_DONE;
         break;
+      }
       case BROTLI_STATE_METADATA:
         for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
           uint32_t bits;
           /* Read one byte and ignore it. */
           if (!BrotliSafeReadBits(br, 8, &bits)) {
             result = BROTLI_RESULT_NEEDS_MORE_INPUT;
             break;
           }
         }
         if (result == BROTLI_RESULT_SUCCESS) {
           s->state = BROTLI_STATE_METABLOCK_DONE;
         }
         break;
       case BROTLI_STATE_HUFFMAN_CODE_0:
-        if (i >= 3) {
-          s->state = BROTLI_STATE_CONTEXT_MODES;
+        if (s->loop_counter >= 3) {
+          s->state = BROTLI_STATE_METABLOCK_HEADER_2;
           break;
         }
         /* Reads 1..11 bits. */
-        result = DecodeVarLenUint8(s, br, &s->num_block_types[i]);
+        result = DecodeVarLenUint8(s, br, &s->num_block_types[s->loop_counter]);
         if (result != BROTLI_RESULT_SUCCESS) {
           break;
         }
-        s->num_block_types[i]++;
-        BROTLI_LOG_UINT(s->num_block_types[i]);
+        s->num_block_types[s->loop_counter]++;
+        BROTLI_LOG_UINT(s->num_block_types[s->loop_counter]);
+        if (s->num_block_types[s->loop_counter] < 2) {
+          s->loop_counter++;
+          break;
+        }
         s->state = BROTLI_STATE_HUFFMAN_CODE_1;
         /* No break, continue to next state */
-      case BROTLI_STATE_HUFFMAN_CODE_1:
-        if (!BrotliWarmupBitReader(br)) {
+      case BROTLI_STATE_HUFFMAN_CODE_1: {
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_258;
+        result = ReadHuffmanCode(s->num_block_types[s->loop_counter] + 2,
+            &s->block_type_trees[tree_offset], NULL, s);
+        if (result != BROTLI_RESULT_SUCCESS) break;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_2;
+        /* No break, continue to next state */
+      }
+      case BROTLI_STATE_HUFFMAN_CODE_2: {
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
+        result = ReadHuffmanCode(kNumBlockLengthCodes,
+            &s->block_len_trees[tree_offset], NULL, s);
+        if (result != BROTLI_RESULT_SUCCESS) break;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_3;
+        /* No break, continue to next state */
+      }
+      case BROTLI_STATE_HUFFMAN_CODE_3: {
+        int tree_offset = s->loop_counter * BROTLI_HUFFMAN_MAX_SIZE_26;
+        if (!SafeReadBlockLength(s, &s->block_length[s->loop_counter],
+            &s->block_len_trees[tree_offset], br)) {
           result = BROTLI_RESULT_NEEDS_MORE_INPUT;
           break;
         }
-        if (s->num_block_types[i] >= 2) {
-          result = ReadHuffmanCode(s->num_block_types[i] + 2,
-              &s->block_type_trees[i * BROTLI_HUFFMAN_MAX_TABLE_SIZE],
-              NULL, s);
-          if (result != BROTLI_RESULT_SUCCESS) break;
-          s->state = BROTLI_STATE_HUFFMAN_CODE_2;
-        } else {
-          i++;
-          s->state = BROTLI_STATE_HUFFMAN_CODE_0;
-          break;
-        }
-        /* No break, continue to next state */
-      case BROTLI_STATE_HUFFMAN_CODE_2:
-        result = ReadHuffmanCode(kNumBlockLengthCodes,
-            &s->block_len_trees[i * BROTLI_HUFFMAN_MAX_TABLE_SIZE],
-            NULL, s);
-        if (result != BROTLI_RESULT_SUCCESS) break;
-        s->state = BROTLI_STATE_HUFFMAN_CODE_3;
-        /* No break, continue to next state */
-      case BROTLI_STATE_HUFFMAN_CODE_3:
-        if (!BrotliCheckInputAmount(br, 8)) {
+        BROTLI_LOG_UINT(s->block_length[s->loop_counter]);
+        s->loop_counter++;
+        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
+        break;
+      }
+      case BROTLI_STATE_METABLOCK_HEADER_2: {
+        uint32_t bits;
+        if (!BrotliSafeReadBits(br, 6, &bits)) {
           result = BROTLI_RESULT_NEEDS_MORE_INPUT;
           break;
         }
-        s->block_length[i] = ReadBlockLength( /* Reads 3..39 bits. */
-            &s->block_len_trees[i * BROTLI_HUFFMAN_MAX_TABLE_SIZE], br);
-        BROTLI_LOG_UINT(s->block_length[i]);
-        i++;
-        s->state = BROTLI_STATE_HUFFMAN_CODE_0;
-        break;
-      case BROTLI_STATE_CONTEXT_MODES:
-        /* We need up to 256 * 2 + 6 bits, this fits in 128 bytes. */
-        if (!BrotliCheckInputAmount(br, 128)) {
-          result = BROTLI_RESULT_NEEDS_MORE_INPUT;
-          break;
-        }
-        s->distance_postfix_bits = (int)BrotliReadBits(br, 2);
+        s->distance_postfix_bits = bits & BitMask(2);
+        bits >>= 2;
         s->num_direct_distance_codes = NUM_DISTANCE_SHORT_CODES +
-            ((int)BrotliReadBits(br, 4) << s->distance_postfix_bits);
+            (bits << s->distance_postfix_bits);
         BROTLI_LOG_UINT(s->num_direct_distance_codes);
         BROTLI_LOG_UINT(s->distance_postfix_bits);
         s->distance_postfix_mask = (int)BitMask(s->distance_postfix_bits);
-        s->context_modes = (uint8_t*)malloc((size_t)s->num_block_types[0]);
+        s->context_modes =
+            (uint8_t*)BROTLI_ALLOC(s, (size_t)s->num_block_types[0]);
         if (s->context_modes == 0) {
           result = BROTLI_FAILURE();
           break;
         }
-        for (i = 0; i < s->num_block_types[0]; ++i) {
-          s->context_modes[i] = (uint8_t)(BrotliReadBits(br, 2) << 1);
-          BROTLI_LOG_ARRAY_INDEX(s->context_modes, i);
+        s->loop_counter = 0;
+        s->state = BROTLI_STATE_CONTEXT_MODES;
+        /* No break, continue to next state */
+      }
+      case BROTLI_STATE_CONTEXT_MODES:
+        result = ReadContextModes(s);
+        if (result != BROTLI_RESULT_SUCCESS) {
+          break;
         }
         s->state = BROTLI_STATE_CONTEXT_MAP_1;
         /* No break, continue to next state */
-      case BROTLI_STATE_CONTEXT_MAP_1:
+      case BROTLI_STATE_CONTEXT_MAP_1: {
+        uint32_t j;
         result = DecodeContextMap(s->num_block_types[0] << kLiteralContextBits,
                                   &s->num_literal_htrees, &s->context_map, s);
         if (result != BROTLI_RESULT_SUCCESS) {
           break;
         }
         s->trivial_literal_context = 1;
-        for (i = 0; i < s->num_block_types[0] << kLiteralContextBits; i++) {
-          if (s->context_map[i] != i >> kLiteralContextBits) {
+        for (j = 0; j < s->num_block_types[0] << kLiteralContextBits; j++) {
+          if (s->context_map[j] != j >> kLiteralContextBits) {
             s->trivial_literal_context = 0;
             break;
           }
         }
         s->state = BROTLI_STATE_CONTEXT_MAP_2;
         /* No break, continue to next state */
+      }
       case BROTLI_STATE_CONTEXT_MAP_2:
         {
-          int num_distance_codes =
-              s->num_direct_distance_codes + (48 << s->distance_postfix_bits);
+          uint32_t num_distance_codes =
+              s->num_direct_distance_codes + (48U << s->distance_postfix_bits);
           result = DecodeContextMap(
               s->num_block_types[2] << kDistanceContextBits,
               &s->num_dist_htrees, &s->dist_context_map, s);
           if (result != BROTLI_RESULT_SUCCESS) {
             break;
           }
-          BrotliHuffmanTreeGroupInit(
-              &s->literal_hgroup, kNumLiteralCodes, s->num_literal_htrees);
-          BrotliHuffmanTreeGroupInit(
-              &s->insert_copy_hgroup, kNumInsertAndCopyCodes,
-              s->num_block_types[1]);
-          BrotliHuffmanTreeGroupInit(
-              &s->distance_hgroup, num_distance_codes, s->num_dist_htrees);
+          BrotliHuffmanTreeGroupInit(s, &s->literal_hgroup, kNumLiteralCodes,
+                                     s->num_literal_htrees);
+          BrotliHuffmanTreeGroupInit(s, &s->insert_copy_hgroup,
+                                     kNumInsertAndCopyCodes,
+                                     s->num_block_types[1]);
+          BrotliHuffmanTreeGroupInit(s, &s->distance_hgroup, num_distance_codes,
+                                     s->num_dist_htrees);
+          if (s->literal_hgroup.codes == 0 ||
+              s->insert_copy_hgroup.codes == 0 ||
+              s->distance_hgroup.codes == 0) {
+            return BROTLI_FAILURE();
+          }
         }
-        i = 0;
+        s->loop_counter = 0;
         s->state = BROTLI_STATE_TREE_GROUP;
         /* No break, continue to next state */
       case BROTLI_STATE_TREE_GROUP:
         {
           HuffmanTreeGroup* hgroup = NULL;
-          switch (i) {
+          switch (s->loop_counter) {
             case 0:
               hgroup = &s->literal_hgroup;
               break;
             case 1:
               hgroup = &s->insert_copy_hgroup;
               break;
             case 2:
               hgroup = &s->distance_hgroup;
               break;
           }
           result = HuffmanTreeGroupDecode(hgroup, s);
         }
         if (result != BROTLI_RESULT_SUCCESS) break;
-        i++;
-        if (i >= 3) {
+        s->loop_counter++;
+        if (s->loop_counter >= 3) {
           uint8_t context_mode = s->context_modes[s->block_type_rb[1]];
           s->context_map_slice = s->context_map;
           s->dist_context_map_slice = s->dist_context_map;
           s->context_lookup1 =
               &kContextLookup[kContextLookupOffsets[context_mode]];
           s->context_lookup2 =
               &kContextLookup[kContextLookupOffsets[context_mode + 1]];
           s->htree_command = s->insert_copy_hgroup.htrees[0];
           s->literal_htree = s->literal_hgroup.htrees[s->literal_htree_index];
+          if (!s->ringbuffer && !BrotliAllocateRingBuffer(s)) {
+            result = BROTLI_FAILURE();
+            break;
+          }
           s->state = BROTLI_STATE_COMMAND_BEGIN;
         }
         break;
       case BROTLI_STATE_COMMAND_BEGIN:
-        if (s->meta_block_remaining_len <= 0) {
-          /* Next metablock, if any */
-          s->state = BROTLI_STATE_METABLOCK_DONE;
-          break;
-        }
- /* Decoding of Brotli commands is the inner loop, jumping with goto makes it
-    3% faster */
- CommandBegin:
-        if (!BrotliCheckInputAmount(br, 32)) {
-          s->state = BROTLI_STATE_COMMAND_BEGIN;
-          result = BROTLI_RESULT_NEEDS_MORE_INPUT;
-          break;
-        }
-          /* Read the insert/copy length in the command */
-        if (s->block_length[1] == 0) {
-          /* Block switch for insert/copy length. Reads 0..15 bits. */
-          DecodeBlockType(s->num_block_types[1],
-                          s->block_type_trees, 1,
-                          s->block_type_rb, br);
-          s->htree_command = s->insert_copy_hgroup.htrees[s->block_type_rb[3]];
-          s->block_length[1] = ReadBlockLength( /* Reads 3..39 bits. */
-              &s->block_len_trees[BROTLI_HUFFMAN_MAX_TABLE_SIZE], br);
-        }
-        {
-          int cmd_code = ReadSymbol(s->htree_command, br);
-          int insert_len_extra = 0;
-          CmdLutElement v;
-          --s->block_length[1];
-          v = kCmdLut[cmd_code];
-          s->distance_code = v.distance_code;
-          s->distance_context = v.context;
-          s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
-          i = v.insert_len_offset;
-          if (PREDICT_FALSE(v.insert_len_extra_bits != 0)) {
-            insert_len_extra = (int)BrotliReadBits(br, v.insert_len_extra_bits);
-          }
-          s->copy_length = (int)BrotliReadBits(br, v.copy_len_extra_bits) +
-                           v.copy_len_offset;
-          i += insert_len_extra;
-        }
-        BROTLI_LOG_UINT(i);
-        BROTLI_LOG_UINT(s->copy_length);
-        BROTLI_LOG_UINT(s->distance_code);
-        if (i == 0) {
-          goto postDecodeLiterals;
-        }
-        s->meta_block_remaining_len -= i;
-        /* No break, go to next state */
       case BROTLI_STATE_COMMAND_INNER:
-        /* Read the literals in the command */
-        if (s->trivial_literal_context) {
-          unsigned bits;
-          unsigned value;
-          PreloadSymbol(s->literal_htree, br, &bits, &value);
-          do {
-            if (!BrotliCheckInputAmount(br, 64)) {
-              s->state = BROTLI_STATE_COMMAND_INNER;
-              result = BROTLI_RESULT_NEEDS_MORE_INPUT;
-              break;
-            }
-            if (PREDICT_FALSE(s->block_length[0] == 0)) {
-              /* Block switch for literals */
-              DecodeBlockTypeWithContext(s, br);
-              PreloadSymbol(s->literal_htree, br, &bits, &value);
-            }
-            s->ringbuffer[pos] =
-                (uint8_t)ReadPreloadedSymbol(s->literal_htree,
-                                             br, &bits, &value);
-            --s->block_length[0];
-            BROTLI_LOG_UINT(s->literal_htree_index);
-            BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
-            ++pos;
-            if (PREDICT_FALSE(pos == s->ringbuffer_size)) {
-              s->to_write = s->ringbuffer_size;
-              s->partially_written = 0;
-              s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
-              --i;
-              goto innerWrite;
-            }
-          } while (--i != 0);
-        } else {
-          uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
-          uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
-          do {
-            const HuffmanCode* hc;
-            if (!BrotliCheckInputAmount(br, 64)) {
-              s->state = BROTLI_STATE_COMMAND_INNER;
-              result = BROTLI_RESULT_NEEDS_MORE_INPUT;
-              break;
-            }
-            if (PREDICT_FALSE(s->block_length[0] == 0)) {
-              /* Block switch for literals */
-              DecodeBlockTypeWithContext(s, br);
-            }
-            context = s->context_lookup1[p1] | s->context_lookup2[p2];
-            BROTLI_LOG_UINT(context);
-            hc = s->literal_hgroup.htrees[s->context_map_slice[context]];
-            --s->block_length[0];
-            p2 = p1;
-            p1 = (uint8_t)ReadSymbol(hc, br);
-            s->ringbuffer[pos] = p1;
-            BROTLI_LOG_UINT(s->context_map_slice[context]);
-            BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos & s->ringbuffer_mask);
-            ++pos;
-            if (PREDICT_FALSE(pos == s->ringbuffer_size)) {
-              s->to_write = s->ringbuffer_size;
-              s->partially_written = 0;
-              s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
-              --i;
-              goto innerWrite;
-            }
-          } while (--i != 0);
-        }
-        if (result != BROTLI_RESULT_SUCCESS) break;
-        if (s->meta_block_remaining_len <= 0) {
-          s->state = BROTLI_STATE_METABLOCK_DONE;
-          break;
-        }
-postDecodeLiterals:
-        if (s->distance_code >= 0) {
-          --s->dist_rb_idx;
-          s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
-          goto postReadDistance;  /* We already have the implicit distance */
-        }
-        /* Read distance code in the command, unless it was implicitly zero. */
-        BROTLI_DCHECK(s->distance_code < 0);
-        if (s->block_length[2] == 0) {
-          /* Block switch for distance codes */
-          int dist_context_offset;
-          DecodeBlockType(s->num_block_types[2],
-                          s->block_type_trees, 2,
-                          s->block_type_rb, br); /* Reads 0..15 bits. */
-          s->block_length[2] = ReadBlockLength( /* Reads 3..39 bits. */
-              &s->block_len_trees[2 * BROTLI_HUFFMAN_MAX_TABLE_SIZE], br);
-          dist_context_offset = s->block_type_rb[5] << kDistanceContextBits;
-          s->dist_context_map_slice =
-              s->dist_context_map + dist_context_offset;
-          s->dist_htree_index = s->dist_context_map_slice[s->distance_context];
-        }
-        --s->block_length[2];
-        s->distance_code =
-            ReadSymbol(s->distance_hgroup.htrees[s->dist_htree_index], br);
-        /* Convert the distance code to the actual distance by possibly */
-        /* looking up past distances from the s->ringbuffer. */
-        if ((s->distance_code & ~0xf) == 0) {
-          if (s->distance_code == 0) {
-            --s->dist_rb_idx;
-            s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
-          } else {
-            int distance_code = s->distance_code << 1;
-            /* kDistanceShortCodeIndexOffset has 2-bit values from LSB: */
-            /* 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */
-            const uint32_t kDistanceShortCodeIndexOffset = 0xaaafff1b;
-            /* kDistanceShortCodeValueOffset has 2-bit values from LSB: */
-            /* 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3 */
-            const uint32_t kDistanceShortCodeValueOffset = 0xfa5fa500;
-            int v = (s->dist_rb_idx +
-                (int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3;
-            s->distance_code = s->dist_rb[v];
-            v = (int)(kDistanceShortCodeValueOffset >> distance_code) & 0x3;
-            if ((distance_code & 0x3) != 0) {
-              s->distance_code += v;
-            } else {
-              s->distance_code -= v;
-              if (s->distance_code <= 0) {
-                /* A huge distance will cause a BROTLI_FAILURE() soon. */
-                /* This is a little faster than failing here. */
-                s->distance_code = 0x0fffffff;
-              }
-            }
-          }
-        } else {
-          int distval = s->distance_code - s->num_direct_distance_codes;
-          if (distval >= 0) {
-            int nbits;
-            int postfix;
-            int offset;
-            if (s->distance_postfix_bits == 0) {
-              nbits = (distval >> 1) + 1;
-              offset = ((2 + (distval & 1)) << nbits) - 4;
-              s->distance_code = s->num_direct_distance_codes +
-                  offset + (int)BrotliReadBits(br, nbits);
-            } else {
-              postfix = distval & s->distance_postfix_mask;
-              distval >>= s->distance_postfix_bits;
-              nbits = (distval >> 1) + 1;
-              offset = ((2 + (distval & 1)) << nbits) - 4;
-              s->distance_code = s->num_direct_distance_codes +
-                  ((offset + (int)BrotliReadBits(br, nbits)) <<
-                   s->distance_postfix_bits) + postfix;
-            }
-          }
-          s->distance_code = s->distance_code - NUM_DISTANCE_SHORT_CODES + 1;
-        }
-postReadDistance:
-        BROTLI_LOG_UINT(s->distance_code);
-        if (s->max_distance != s->max_backward_distance) {
-          if (pos < s->max_backward_distance_minus_custom_dict_size) {
-            s->max_distance = pos + s->custom_dict_size;
-          } else {
-            s->max_distance = s->max_backward_distance;
-          }
-        }
-        i = s->copy_length;
-        /* Apply copy of LZ77 back-reference, or static dictionary reference if
-        the distance is larger than the max LZ77 distance */
-        if (s->distance_code > s->max_distance) {
-          if (i >= kBrotliMinDictionaryWordLength &&
-              i <= kBrotliMaxDictionaryWordLength) {
-            int offset = kBrotliDictionaryOffsetsByLength[i];
-            int word_id = s->distance_code - s->max_distance - 1;
-            int shift = kBrotliDictionarySizeBitsByLength[i];
-            int mask = (int)BitMask(shift);
-            int word_idx = word_id & mask;
-            int transform_idx = word_id >> shift;
-            offset += word_idx * i;
-            if (transform_idx < kNumTransforms) {
-              const uint8_t* word = &kBrotliDictionary[offset];
-              int len = i;
-              if (transform_idx == 0) {
-                memcpy(&s->ringbuffer[pos], word, (size_t)len);
-              } else {
-                len = TransformDictionaryWord(
-                    &s->ringbuffer[pos], word, len, transform_idx);
-              }
-              pos += len;
-              s->meta_block_remaining_len -= len;
-              if (pos >= s->ringbuffer_size) {
-                s->to_write = s->ringbuffer_size;
-                s->partially_written = 0;
-                s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
-                break;
-              }
-            } else {
-              BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
-                     "len: %d bytes left: %d\n",
-                  pos, s->distance_code, i,
-                  s->meta_block_remaining_len));
-              result = BROTLI_FAILURE();
-              break;
-            }
-          } else {
-            BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
-                   "len: %d bytes left: %d\n", pos, s->distance_code, i,
-                   s->meta_block_remaining_len));
-            result = BROTLI_FAILURE();
-            break;
-          }
-        } else {
-          const uint8_t *ringbuffer_end_minus_copy_length =
-              s->ringbuffer_end - i;
-          copy_src = &s->ringbuffer[(pos - s->distance_code) &
-                                    s->ringbuffer_mask];
-          copy_dst = &s->ringbuffer[pos];
-          /* update the recent distances cache */
-          s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
-          ++s->dist_rb_idx;
-          s->meta_block_remaining_len -= i;
-          if (PREDICT_FALSE(s->meta_block_remaining_len < 0)) {
-            BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
-                   "len: %d bytes left: %d\n", pos, s->distance_code, i,
-                   s->meta_block_remaining_len));
-            result = BROTLI_FAILURE();
-            break;
-          }
-          /* There is 128+ bytes of slack in the ringbuffer allocation.
-             Also, we have 16 short codes, that make these 16 bytes irrelevant
-             in the ringbuffer. Let's copy over them as a first guess.
-           */
-          memmove16(copy_dst, copy_src);
-          /* Now check if the copy extends over the ringbuffer end,
-             or if the copy overlaps with itself, if yes, do wrap-copy. */
-          if (copy_src < copy_dst) {
-            if (copy_dst >= ringbuffer_end_minus_copy_length) {
-              goto postWrapCopy;
-            }
-            if (copy_src + i > copy_dst) {
-              goto postSelfintersecting;
-            }
-          } else {
-            if (copy_src >= ringbuffer_end_minus_copy_length) {
-              goto postWrapCopy;
-            }
-            if (copy_dst + i > copy_src) {
-              goto postSelfintersecting;
-            }
-          }
-          pos += i;
-          if (i > 16) {
-            if (i > 32) {
-              memcpy(copy_dst + 16, copy_src + 16, (size_t)(i - 16));
-            } else {
-              /* This branch covers about 45% cases.
-                 Fixed size short copy allows more compiler optimizations. */
-              memmove16(copy_dst + 16, copy_src + 16);
-            }
-          }
-        }
-        if (s->meta_block_remaining_len <= 0) {
-          /* Next metablock, if any */
-          s->state = BROTLI_STATE_METABLOCK_DONE;
-          break;
-        } else {
-          goto CommandBegin;
-        }
-      postSelfintersecting:
-        while (--i >= 0) {
-          s->ringbuffer[pos] =
-              s->ringbuffer[(pos - s->distance_code) & s->ringbuffer_mask];
-          ++pos;
-        }
-        if (s->meta_block_remaining_len <= 0) {
-          /* Next metablock, if any */
-          s->state = BROTLI_STATE_METABLOCK_DONE;
-          break;
-        } else {
-          goto CommandBegin;
-        }
-      postWrapCopy:
-        s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY;
-        /* No break, go to next state */
+      case BROTLI_STATE_COMMAND_POST_DECODE_LITERALS:
       case BROTLI_STATE_COMMAND_POST_WRAP_COPY:
-        while (--i >= 0) {
-          s->ringbuffer[pos] =
-              s->ringbuffer[(pos - s->distance_code) & s->ringbuffer_mask];
-          ++pos;
-          if (pos == s->ringbuffer_size) {
-            s->to_write = s->ringbuffer_size;
-            s->partially_written = 0;
-            s->state = BROTLI_STATE_COMMAND_POST_WRITE_2;
-            break;
-          }
-        }
-        if (s->state == BROTLI_STATE_COMMAND_POST_WRAP_COPY) {
-          if (s->meta_block_remaining_len <= 0) {
-            /* Next metablock, if any */
-            s->state = BROTLI_STATE_METABLOCK_DONE;
-            break;
-          } else {
-            goto CommandBegin;
-          }
+        result = ProcessCommands(s);
+        if (result == BROTLI_RESULT_NEEDS_MORE_INPUT) {
+          result = SafeProcessCommands(s);
         }
         break;
       case BROTLI_STATE_COMMAND_INNER_WRITE:
       case BROTLI_STATE_COMMAND_POST_WRITE_1:
       case BROTLI_STATE_COMMAND_POST_WRITE_2:
-innerWrite:
-        result = WriteRingBuffer(output, s);
+        result = WriteRingBuffer(available_out, next_out, total_out, s);
         if (result != BROTLI_RESULT_SUCCESS) {
           break;
         }
-        pos -= s->ringbuffer_size;
+        s->pos -= s->ringbuffer_size;
+        s->rb_roundtrips++;
         s->max_distance = s->max_backward_distance;
         if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
-          memcpy(s->ringbuffer, s->ringbuffer_end, (size_t)pos);
-          if (s->meta_block_remaining_len <= 0) {
+          memcpy(s->ringbuffer, s->ringbuffer_end, (size_t)s->pos);
+          if (s->meta_block_remaining_len == 0) {
             /* Next metablock, if any */
             s->state = BROTLI_STATE_METABLOCK_DONE;
-            break;
           } else {
-            goto CommandBegin;
+            s->state = BROTLI_STATE_COMMAND_BEGIN;
           }
+          break;
         } else if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_2) {
           s->state = BROTLI_STATE_COMMAND_POST_WRAP_COPY;
         } else {  /* BROTLI_STATE_COMMAND_INNER_WRITE */
-          if (i == 0) {
-            if (s->meta_block_remaining_len <= 0) {
+          if (s->loop_counter == 0) {
+            if (s->meta_block_remaining_len == 0) {
               s->state = BROTLI_STATE_METABLOCK_DONE;
-              break;
+            } else {
+              s->state = BROTLI_STATE_COMMAND_POST_DECODE_LITERALS;
             }
-            goto postDecodeLiterals;
+            break;
           }
           s->state = BROTLI_STATE_COMMAND_INNER;
         }
         break;
       case BROTLI_STATE_METABLOCK_DONE:
         BrotliStateCleanupAfterMetablock(s);
         if (!s->is_last_metablock) {
           s->state = BROTLI_STATE_METABLOCK_BEGIN;
           break;
         }
-        s->to_write = pos;
-        s->partially_written = 0;
+        if (!BrotliJumpToByteBoundary(br)) {
+          result = BROTLI_FAILURE();
+        }
+        if (s->buffer_length == 0) {
+          BrotliBitReaderUnload(br);
+          *available_in = br->avail_in;
+          *next_in = br->next_in;
+        }
         s->state = BROTLI_STATE_DONE;
         /* No break, continue to next state */
       case BROTLI_STATE_DONE:
         if (s->ringbuffer != 0) {
-          result = WriteRingBuffer(output, s);
+          result = WriteRingBuffer(available_out, next_out, total_out, s);
           if (result != BROTLI_RESULT_SUCCESS) {
             break;
           }
         }
-        if (!BrotliJumpToByteBoundary(br)) {
-          result = BROTLI_FAILURE();
-        }
-        if (!BrotliIsBitReaderOK(br)) {
-          /* The brotli input stream was too small, does not follow the spec.
-             NOTE: larger input is allowed, smaller not. */
-          result = BROTLI_FAILURE();
-        }
         return result;
     }
   }
-  s->pos = pos;
-  s->loop_counter = i;
   return result;
 }
 
 void BrotliSetCustomDictionary(
     size_t size, const uint8_t* dict, BrotliState* s) {
   s->custom_dict = dict;
   s->custom_dict_size = (int) size;
 }
--- a/modules/brotli/dec/decode.h
+++ b/modules/brotli/dec/decode.h
@@ -1,161 +1,96 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* API for Brotli decompression */
 
 #ifndef BROTLI_DEC_DECODE_H_
 #define BROTLI_DEC_DECODE_H_
 
 #include "./state.h"
-#include "./streams.h"
 #include "./types.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
 typedef enum {
-  /* Decoding error, e.g. corrupt input or no memory */
+  /* Decoding error, e.g. corrupt input or memory allocation problem */
   BROTLI_RESULT_ERROR = 0,
-  /* Successfully completely done */
+  /* Decoding successfully completed */
   BROTLI_RESULT_SUCCESS = 1,
-  /* Partially done, but must be called again with more input */
+  /* Partially done; should be called again with more input */
   BROTLI_RESULT_NEEDS_MORE_INPUT = 2,
-  /* Partially done, but must be called again with more output */
+  /* Partially done; should be called again with more output */
   BROTLI_RESULT_NEEDS_MORE_OUTPUT = 3
 } BrotliResult;
 
-/* BROTLI_FAILURE macro unwraps to BROTLI_RESULT_ERROR in non-debug build. */
-/* In debug build it dumps file name, line and pretty function name. */
-#if defined(_MSC_VER) || !defined(BROTLI_DEBUG)
-#define BROTLI_FAILURE() BROTLI_RESULT_ERROR
-#else
-#define BROTLI_FAILURE() \
-    BrotliFailure(__FILE__, __LINE__, __PRETTY_FUNCTION__)
-static inline BrotliResult BrotliFailure(const char *f, int l, const char *fn) {
-  fprintf(stderr, "ERROR at %s:%d (%s)\n", f, l, fn);
-  fflush(stderr);
-  return BROTLI_RESULT_ERROR;
-}
-#endif
+/* Creates the instance of BrotliState and initializes it. |alloc_func| and
+   |free_func| MUST be both zero or both non-zero. In the case they are both
+   zero, default memory allocators are used. |opaque| is passed to |alloc_func|
+   and |free_func| when they are called. */
+BrotliState* BrotliCreateState(
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
 
-/* Sets *decoded_size to the decompressed size of the given encoded stream. */
-/* This function only works if the encoded buffer has a single meta block, */
-/* or if it has two meta-blocks, where the first is uncompressed and the */
-/* second is empty. */
-/* Returns 1 on success, 0 on failure. */
+/* Deinitializes and frees BrotliState instance. */
+void BrotliDestroyState(BrotliState* state);
+
+/* Sets |*decoded_size| to the decompressed size of the given encoded stream.
+   This function only works if the encoded buffer has a single meta block,
+   or if it has two meta-blocks, where the first is uncompressed and the
+   second is empty.
+   Returns 1 on success, 0 on failure. */
 int BrotliDecompressedSize(size_t encoded_size,
                            const uint8_t* encoded_buffer,
                            size_t* decoded_size);
 
-/* Decompresses the data in encoded_buffer into decoded_buffer, and sets */
-/* *decoded_size to the decompressed length. */
-/* Returns 0 if there was either a bit stream error or memory allocation */
-/* error, and 1 otherwise. */
-/* If decoded size is zero, returns 1 and keeps decoded_buffer unchanged. */
+/* Decompresses the data in |encoded_buffer| into |decoded_buffer|, and sets
+   |*decoded_size| to the decompressed length. */
 BrotliResult BrotliDecompressBuffer(size_t encoded_size,
                                     const uint8_t* encoded_buffer,
                                     size_t* decoded_size,
                                     uint8_t* decoded_buffer);
 
-/* Same as above, but uses the specified input and output callbacks instead */
-/* of reading from and writing to pre-allocated memory buffers. */
-BrotliResult BrotliDecompress(BrotliInput input, BrotliOutput output);
+/* Decompresses the data. Supports partial input and output.
 
-/* Same as above, but supports the caller to call the decoder repeatedly with
-   partial data to support streaming. The state must be initialized with
-   BrotliStateInit and reused with every call for the same stream.
-   Return values:
-   0: failure.
-   1: success, and done.
-   2: success so far, end not reached so should call again with more input.
-   The finish parameter is used as follows, for a series of calls with the
-   same state:
-   0: Every call except the last one must be called with finish set to 0. The
-      last call may have finish set to either 0 or 1. Only if finish is 0, can
-      the function return 2. It may also return 0 or 1, in that case no more
-      calls (even with finish 1) may be made.
-   1: Only the last call may have finish set to 1. It's ok to give empty input
-      if all input was already given to previous calls. It is also ok to have
-      only one single call in total, with finish 1, and with all input
-      available immediately. That matches the non-streaming case. If finish is
-      1, the function can only return 0 or 1, never 2. After a finish, no more
-      calls may be done.
-   After everything is done, the state must be cleaned with BrotliStateCleanup
-   to free allocated resources.
-   The given BrotliOutput must always accept all output and make enough space,
-   it returning a smaller value than the amount of bytes to write always results
-   in an error.
-*/
-BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
-                                       int finish, BrotliState* s);
+   Must be called with an allocated input buffer in |*next_in| and an allocated
+   output buffer in |*next_out|. The values |*available_in| and |*available_out|
+   must specify the allocated size in |*next_in| and |*next_out| respectively.
 
-/* Same as above, but with memory buffers.
-   Must be called with an allocated input buffer in *next_in and an allocated
-   output buffer in *next_out. The values *available_in and *available_out
-   must specify the allocated size in *next_in and *next_out respectively.
-   The value *total_out must be 0 initially, and will be summed with the
-   amount of output bytes written after each call, so that at the end it
-   gives the complete decoded size.
-   After each call, *available_in will be decremented by the amount of input
-   bytes consumed, and the *next_in pointer will be incremented by that amount.
-   Similarly, *available_out will be decremented by the amount of output
-   bytes written, and the *next_out pointer will be incremented by that
-   amount.
+   After each call, |*available_in| will be decremented by the amount of input
+   bytes consumed, and the |*next_in| pointer will be incremented by that
+   amount. Similarly, |*available_out| will be decremented by the amount of
+   output bytes written, and the |*next_out| pointer will be incremented by that
+   amount. |total_out| will be set to the number of bytes decompressed since
+   last state initialization.
 
-   The input may be partial. With each next function call, *next_in and
-   *available_in must be updated to point to a next part of the compressed
-   input. The current implementation will always consume all input unless
-   an error occurs, so normally *available_in will always be 0 after
-   calling this function and the next adjacent part of input is desired.
-
-   In the current implementation, the function requires that there is enough
-   output buffer size to write all currently processed input, so
-   *available_out must be large enough. Since the function updates *next_out
-   each time, as long as the output buffer is large enough you can keep
-   reusing this variable. It is also possible to update *next_out and
-   *available_out yourself before a next call, e.g. to point to a new larger
-   buffer.
-*/
-BrotliResult BrotliDecompressBufferStreaming(size_t* available_in,
-                                             const uint8_t** next_in,
-                                             int finish,
-                                             size_t* available_out,
-                                             uint8_t** next_out,
-                                             size_t* total_out,
-                                             BrotliState* s);
+   Input is never overconsumed, so |next_in| and |available_in| could be passed
+   to the next consumer after decoding is complete. */
+BrotliResult BrotliDecompressStream(size_t* available_in,
+                                    const uint8_t** next_in,
+                                    size_t* available_out,
+                                    uint8_t** next_out,
+                                    size_t* total_out,
+                                    BrotliState* s);
 
 /* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
    e.g. for custom static dictionaries for data formats.
    Not to be confused with the built-in transformable dictionary of Brotli.
    The dictionary must exist in memory until decoding is done and is owned by
    the caller. To use:
-   -initialize state with BrotliStateInit
-   -use BrotliSetCustomDictionary
-   -use BrotliDecompressBufferStreaming
-   -clean up with BrotliStateCleanup
+    1) initialize state with BrotliStateInit
+    2) use BrotliSetCustomDictionary
+    3) use BrotliDecompressStream
+    4) clean up with BrotliStateCleanup
 */
 void BrotliSetCustomDictionary(
     size_t size, const uint8_t* dict, BrotliState* s);
 
 
-/* Escalate internal functions visibility; for testing purposes only. */
-void InverseMoveToFrontTransformForTesting(uint8_t* v, int l, BrotliState* s);
-
 #if defined(__cplusplus) || defined(c_plusplus)
 } /* extern "C" */
 #endif
 
 #endif  /* BROTLI_DEC_DECODE_H_ */
--- a/modules/brotli/dec/dictionary.c
+++ b/modules/brotli/dec/dictionary.c
@@ -1,25 +1,20 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 #include "./dictionary.h"
 
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
 /* In case of multiple definition linker error with dictionary.cc from the
   encoder: include only one of enc/dictionary.cc or dec/dictionary.c in a
   target using both enc and dec. */
 const uint8_t kBrotliDictionary[122784] = {
   0x74, 0x69, 0x6d, 0x65, 0x64, 0x6f, 0x77, 0x6e, 0x6c, 0x69, 0x66, 0x65, 0x6c,
   0x65, 0x66, 0x74, 0x62, 0x61, 0x63, 0x6b, 0x63, 0x6f, 0x64, 0x65, 0x64, 0x61,
   0x74, 0x61, 0x73, 0x68, 0x6f, 0x77, 0x6f, 0x6e, 0x6c, 0x79, 0x73, 0x69, 0x74,
   0x65, 0x63, 0x69, 0x74, 0x79, 0x6f, 0x70, 0x65, 0x6e, 0x6a, 0x75, 0x73, 0x74,
@@ -9460,8 +9455,12 @@ const uint8_t kBrotliDictionary[122784] 
   0xe0, 0xa4, 0xaa, 0xe0, 0xa5, 0x80, 0xe0, 0xa4, 0xb0, 0xe0, 0xa4, 0xbe, 0xe0,
   0xa4, 0x87, 0xe0, 0xa4, 0x9f, 0xe0, 0xa4, 0xb5, 0xe0, 0xa4, 0xbf, 0xe0, 0xa4,
   0x9c, 0xe0, 0xa5, 0x8d, 0xe0, 0xa4, 0x9e, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4, 0xaa,
   0xe0, 0xa4, 0xa8, 0xe0, 0xa4, 0x95, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4, 0xb0, 0xe0,
   0xa5, 0x8d, 0xe0, 0xa4, 0xb0, 0xe0, 0xa4, 0xb5, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4,
   0x88, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0x95, 0xe0, 0xa5, 0x8d, 0xe0, 0xa4, 0xb0,
   0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xaf, 0xe0, 0xa4, 0xa4, 0xe0, 0xa4, 0xbe,
 };
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    /* extern "C" */
+#endif
--- a/modules/brotli/dec/dictionary.h
+++ b/modules/brotli/dec/dictionary.h
@@ -1,43 +1,34 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Collection of static dictionary words. */
 
 #ifndef BROTLI_DEC_DICTIONARY_H_
 #define BROTLI_DEC_DICTIONARY_H_
 
 #include "./types.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
 extern const uint8_t kBrotliDictionary[122784];
 
-static const int kBrotliDictionaryOffsetsByLength[] = {
+static const uint32_t kBrotliDictionaryOffsetsByLength[] = {
      0,     0,     0,     0,     0,  4096,  9216, 21504, 35840, 44032,
  53248, 63488, 74752, 87040, 93696, 100864, 104704, 106752, 108928, 113536,
  115968, 118528, 119872, 121280, 122016,
 };
 
-static const int8_t kBrotliDictionarySizeBitsByLength[] = {
+static const uint8_t kBrotliDictionarySizeBitsByLength[] = {
   0,  0,  0,  0, 10, 10, 11, 11, 10, 10,
  10, 10, 10,  9,  9,  8,  7,  7,  8,  7,
   7,  6,  6,  5,  5,
 };
 
 static const int kBrotliMinDictionaryWordLength = 4;
 static const int kBrotliMaxDictionaryWordLength = 24;
 
--- a/modules/brotli/dec/huffman.c
+++ b/modules/brotli/dec/huffman.c
@@ -1,46 +1,80 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Utilities for building Huffman decoding tables. */
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
 #include "./huffman.h"
+
+#include <string.h>  /* memcpy, memset */
+
 #include "./port.h"
+#include "./types.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
-/* Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the
-   bit-wise reversal of the len least significant bits of key. */
-static BROTLI_INLINE uint32_t GetNextKey(uint32_t key, int len) {
+#define BROTLI_REVERSE_BITS_MAX 8
+
 #ifdef BROTLI_RBIT
-  return BROTLI_RBIT(BROTLI_RBIT(key) + (1 << (8 * sizeof(unsigned) - len)));
+#define BROTLI_REVERSE_BITS_BASE (32 - BROTLI_REVERSE_BITS_MAX)
 #else
-  unsigned step = (unsigned)(1 << (len - 1));
-  while (key & step) {
-    step >>= 1;
-  }
-  return (key & (step - 1)) + step;
+#define BROTLI_REVERSE_BITS_BASE 0
+static uint8_t kReverseBits[1 << BROTLI_REVERSE_BITS_MAX] = {
+    0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
+    0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+    0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+    0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+    0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
+    0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+    0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
+    0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+    0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+    0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+    0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
+    0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+    0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+    0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+    0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+    0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+    0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
+    0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+    0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+    0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+    0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+    0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+    0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
+    0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+    0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
+    0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+    0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+    0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+    0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
+    0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+    0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
+    0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+#endif /* BROTLI_RBIT */
+
+#define BROTLI_REVERSE_BITS_LOWEST \
+    (1U << (BROTLI_REVERSE_BITS_MAX - 1 + BROTLI_REVERSE_BITS_BASE))
+
+/* Returns reverse(num >> BROTLI_REVERSE_BITS_BASE, BROTLI_REVERSE_BITS_MAX),
+   where reverse(value, len) is the bit-wise reversal of the len least
+   significant bits of value. */
+static BROTLI_INLINE uint32_t BrotliReverseBits(uint32_t num) {
+#ifdef BROTLI_RBIT
+  return BROTLI_RBIT(num);
+#else
+  return kReverseBits[num];
 #endif
 }
 
 /* Stores code in table[0], table[step], table[2*step], ..., table[end] */
 /* Assumes that end is an integer multiple of step */
 static BROTLI_INLINE void ReplicateValue(HuffmanCode* table,
                                          int step, int end,
                                          HuffmanCode code) {
@@ -66,24 +100,27 @@ static BROTLI_INLINE int NextTableBitSiz
 }
 
 
 void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* table,
                                         const uint8_t* const code_lengths,
                                         uint16_t *count) {
   HuffmanCode code;    /* current table entry */
   int symbol;          /* symbol index in original or sorted table */
-  unsigned key;        /* reversed prefix code */
+  uint32_t key;        /* prefix code */
+  uint32_t key_step;   /* prefix code addend */
   int step;            /* step size to replicate values in current table */
   int table_size;      /* size of current table */
   int sorted[18];      /* symbols sorted by code length */
   /* offsets in sorted table for each length */
   int offset[BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1];
   int bits;
   int bits_count;
+  BROTLI_DCHECK(
+      BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH <= BROTLI_REVERSE_BITS_MAX);
 
   /* generate offsets into sorted symbol table by code length */
   symbol = -1;
   bits = 1;
   BROTLI_REPEAT(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH, {
     symbol += count[bits];
     offset[bits] = symbol;
     bits++;
@@ -101,127 +138,142 @@ void BrotliBuildCodeLengthsHuffmanTable(
   } while (symbol != 0);
 
   table_size = 1 << BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH;
 
   /* Special case: all symbols but one have 0 code length. */
   if (offset[0] == 0) {
     code.bits = 0;
     code.value = (uint16_t)sorted[0];
-    for (key = 0; key < table_size; ++key) {
+    for (key = 0; key < (uint32_t)table_size; ++key) {
       table[key] = code;
     }
     return;
   }
 
   /* fill in table */
   key = 0;
+  key_step = BROTLI_REVERSE_BITS_LOWEST;
   symbol = 0;
   bits = 1;
   step = 2;
   do {
     code.bits = (uint8_t)bits;
     for (bits_count = count[bits]; bits_count != 0; --bits_count) {
       code.value = (uint16_t)sorted[symbol++];
-      ReplicateValue(&table[key], step, table_size, code);
-      key = GetNextKey(key, bits);
+      ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
+      key += key_step;
     }
     step <<= 1;
+    key_step >>= 1;
   } while (++bits <= BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH);
 }
 
-int BrotliBuildHuffmanTable(HuffmanCode* root_table,
-                            int root_bits,
-                            const uint16_t* const symbol_lists,
-                            uint16_t *count) {
+uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
+                                 int root_bits,
+                                 const uint16_t* const symbol_lists,
+                                 uint16_t *count) {
   HuffmanCode code;    /* current table entry */
   HuffmanCode* table;  /* next available space in table */
   int len;             /* current code length */
   int symbol;          /* symbol index in original or sorted table */
-  unsigned key;        /* reversed prefix code */
+  uint32_t key;        /* prefix code */
+  uint32_t key_step;   /* prefix code addend */
+  uint32_t sub_key;    /* 2nd level table prefix code */
+  uint32_t sub_key_step;/* 2nd level table prefix code addend */
   int step;            /* step size to replicate values in current table */
-  unsigned low;        /* low bits for current root entry */
-  unsigned mask;       /* mask for low bits */
   int table_bits;      /* key length of current table */
   int table_size;      /* size of current table */
   int total_size;      /* sum of root table size and 2nd level table sizes */
   int max_length = -1;
   int bits;
   int bits_count;
 
+  BROTLI_DCHECK(root_bits <= BROTLI_REVERSE_BITS_MAX);
+  BROTLI_DCHECK(
+      BROTLI_HUFFMAN_MAX_CODE_LENGTH - root_bits <= BROTLI_REVERSE_BITS_MAX);
+
   while (symbol_lists[max_length] == 0xFFFF) max_length--;
   max_length += BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1;
 
   table = root_table;
   table_bits = root_bits;
   table_size = 1 << table_bits;
   total_size = table_size;
 
   /* fill in root table */
   /* let's reduce the table size to a smaller size if possible, and */
   /* create the repetitions by memcpy if possible in the coming loop */
   if (table_bits > max_length) {
     table_bits = max_length;
     table_size = 1 << table_bits;
   }
   key = 0;
+  key_step = BROTLI_REVERSE_BITS_LOWEST;
   bits = 1;
   step = 2;
   do {
     code.bits = (uint8_t)bits;
     symbol = bits - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
     for (bits_count = count[bits]; bits_count != 0; --bits_count) {
       symbol = symbol_lists[symbol];
       code.value = (uint16_t)symbol;
-      ReplicateValue(&table[key], step, table_size, code);
-      key = GetNextKey(key, bits);
+      ReplicateValue(&table[BrotliReverseBits(key)], step, table_size, code);
+      key += key_step;
     }
     step <<= 1;
+    key_step >>= 1;
   } while (++bits <= table_bits);
 
   /* if root_bits != table_bits we only created one fraction of the */
   /* table, and we need to replicate it now. */
   while (total_size != table_size) {
     memcpy(&table[table_size], &table[0],
            (size_t)table_size * sizeof(table[0]));
     table_size <<= 1;
   }
 
   /* fill in 2nd level tables and add pointers to root table */
-  mask = (unsigned)(total_size - 1);
-  low = (unsigned)-1;
-  for (len = root_bits + 1, step = 2; len <= max_length; ++len, step <<= 1) {
+  key_step = BROTLI_REVERSE_BITS_LOWEST >> (root_bits - 1);
+  sub_key = (BROTLI_REVERSE_BITS_LOWEST << 1);
+  sub_key_step = BROTLI_REVERSE_BITS_LOWEST;
+  for (len = root_bits + 1, step = 2; len <= max_length; ++len) {
     symbol = len - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
     for (; count[len] != 0; --count[len]) {
-      if ((key & mask) != low) {
+      if (sub_key == (BROTLI_REVERSE_BITS_LOWEST << 1U)) {
         table += table_size;
         table_bits = NextTableBitSize(count, len, root_bits);
         table_size = 1 << table_bits;
         total_size += table_size;
-        low = key & mask;
-        root_table[low].bits = (uint8_t)(table_bits + root_bits);
-        root_table[low].value = (uint16_t)(
-            ((size_t)(table - root_table)) - low);
+        sub_key = BrotliReverseBits(key);
+        key += key_step;
+        root_table[sub_key].bits = (uint8_t)(table_bits + root_bits);
+        root_table[sub_key].value = (uint16_t)(
+            ((size_t)(table - root_table)) - sub_key);
+        sub_key = 0;
       }
       code.bits = (uint8_t)(len - root_bits);
       symbol = symbol_lists[symbol];
       code.value = (uint16_t)symbol;
-      ReplicateValue(&table[key >> root_bits], step, table_size, code);
-      key = GetNextKey(key, len);
+      ReplicateValue(
+          &table[BrotliReverseBits(sub_key)], step, table_size, code);
+      sub_key += sub_key_step;
     }
+    step <<= 1;
+    sub_key_step >>= 1;
   }
-  return total_size;
+  return (uint32_t)total_size;
 }
 
-int BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
-                                  int root_bits,
-                                  uint16_t *val,
-                                  uint32_t num_symbols) {
-  int table_size = 1;
-  const int goal_size = 1 << root_bits;
+uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
+                                       int root_bits,
+                                       uint16_t *val,
+                                       uint32_t num_symbols) {
+  uint32_t table_size = 1;
+  const uint32_t goal_size = 1U << root_bits;
   switch (num_symbols) {
     case 0:
       table[0].bits = 0;
       table[0].value = val[0];
       break;
     case 1:
       table[0].bits = 1;
       table[1].bits = 1;
@@ -297,30 +349,11 @@ int BrotliBuildSimpleHuffmanTable(Huffma
   while (table_size != goal_size) {
     memcpy(&table[table_size], &table[0],
            (size_t)table_size * sizeof(table[0]));
     table_size <<= 1;
   }
   return goal_size;
 }
 
-void BrotliHuffmanTreeGroupInit(HuffmanTreeGroup* group, int alphabet_size,
-                                int ntrees) {
-  /* Pack two mallocs into one */
-  const size_t code_size =
-      sizeof(HuffmanCode) * (size_t)(ntrees * BROTLI_HUFFMAN_MAX_TABLE_SIZE);
-  const size_t htree_size = sizeof(HuffmanCode*) * (size_t)ntrees;
-  char *p = (char*)malloc(code_size + htree_size);
-  group->alphabet_size = (int16_t)alphabet_size;
-  group->num_htrees = (int16_t)ntrees;
-  group->codes = (HuffmanCode*)p;
-  group->htrees = (HuffmanCode**)(p + code_size);
-}
-
-void BrotliHuffmanTreeGroupRelease(HuffmanTreeGroup* group) {
-  if (group->codes) {
-    free(group->codes);
-  }
-}
-
 #if defined(__cplusplus) || defined(c_plusplus)
 }    /* extern "C" */
 #endif
--- a/modules/brotli/dec/huffman.h
+++ b/modules/brotli/dec/huffman.h
@@ -1,21 +1,12 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Utilities for building Huffman decoding tables. */
 
 #ifndef BROTLI_DEC_HUFFMAN_H_
 #define BROTLI_DEC_HUFFMAN_H_
 
 #include "./types.h"
@@ -24,58 +15,59 @@
 extern "C" {
 #endif
 
 #define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
 
 /* For current format this constant equals to kNumInsertAndCopyCodes */
 #define BROTLI_HUFFMAN_MAX_CODE_LENGTHS_SIZE 704
 
-/* Maximum possible Huffman table size for an alphabet size of 704, max code
- * length 15 and root table bits 8. */
-#define BROTLI_HUFFMAN_MAX_TABLE_SIZE 1080
+/* Maximum possible Huffman table size for an alphabet size of (index * 32),
+ * max code length 15 and root table bits 8. */
+static const uint16_t kMaxHuffmanTableSize[] = {
+  256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822,
+  854, 886, 920, 952, 984, 1016, 1048, 1080};
+#define BROTLI_HUFFMAN_MAX_SIZE_26 396
+#define BROTLI_HUFFMAN_MAX_SIZE_258 632
+#define BROTLI_HUFFMAN_MAX_SIZE_272 646
 
 #define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5
 
 typedef struct {
   uint8_t bits;     /* number of bits used for this symbol */
   uint16_t value;   /* symbol value or table offset */
 } HuffmanCode;
 
 
 /* Builds Huffman lookup table assuming code lengths are in symbol order. */
 void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
                                         const uint8_t* const code_lengths,
                                         uint16_t *count);
 
 /* Builds Huffman lookup table assuming code lengths are in symbol order. */
 /* Returns size of resulting table. */
-int BrotliBuildHuffmanTable(HuffmanCode* root_table,
-                            int root_bits,
-                            const uint16_t* const symbol_lists,
-                            uint16_t *count_arg);
+uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
+                                 int root_bits,
+                                 const uint16_t* const symbol_lists,
+                                 uint16_t *count_arg);
 
 /* Builds a simple Huffman table. The num_symbols parameter is to be */
 /* interpreted as follows: 0 means 1 symbol, 1 means 2 symbols, 2 means 3 */
 /* symbols, 3 means 4 symbols with lengths 2,2,2,2, 4 means 4 symbols with */
 /* lengths 1,2,3,3. */
-int BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
-                                  int root_bits,
-                                  uint16_t *symbols,
-                                  uint32_t num_symbols);
+uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
+                                       int root_bits,
+                                       uint16_t *symbols,
+                                       uint32_t num_symbols);
 
 /* Contains a collection of Huffman trees with the same alphabet size. */
 typedef struct {
   HuffmanCode** htrees;
   HuffmanCode* codes;
-  int16_t alphabet_size;
-  int16_t num_htrees;
+  uint16_t alphabet_size;
+  uint16_t num_htrees;
 } HuffmanTreeGroup;
 
-void BrotliHuffmanTreeGroupInit(HuffmanTreeGroup* group,
-                                int alphabet_size, int ntrees);
-void BrotliHuffmanTreeGroupRelease(HuffmanTreeGroup* group);
-
 #if defined(__cplusplus) || defined(c_plusplus)
 }    /* extern "C" */
 #endif
 
 #endif  /* BROTLI_DEC_HUFFMAN_H_ */
--- a/modules/brotli/dec/port.h
+++ b/modules/brotli/dec/port.h
@@ -1,26 +1,24 @@
 /* Copyright 2015 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Macros for compiler / platform specific features and build options.
 
    Build options are:
+    * BROTLI_BUILD_32_BIT disables 64-bit optimizations
+    * BROTLI_BUILD_64_BIT forces to use 64-bit optimizations
+    * BROTLI_BUILD_BIG_ENDIAN forces to use big-endian optimizations
+    * BROTLI_BUILD_ENDIAN_NEUTRAL disables endian-aware optimizations
+    * BROTLI_BUILD_LITTLE_ENDIAN forces to use little-endian optimizations
+    * BROTLI_BUILD_MODERN_COMPILER forces to use modern compilers built-ins,
+      features and attributes
     * BROTLI_BUILD_PORTABLE disables dangerous optimizations, like unaligned
       read and overlapping memcpy; this reduces decompression speed by 5%
     * BROTLI_DEBUG dumps file name and line number when decoder detects stream
       or memory error
     * BROTLI_DECODE_DEBUG enables asserts and dumps various state information
  */
 
 #ifndef BROTLI_DEC_PORT_H_
@@ -36,26 +34,75 @@
 #ifndef __has_attribute
 #define __has_attribute(x) 0
 #endif
 
 #ifndef __has_feature
 #define __has_feature(x) 0
 #endif
 
+#if defined(__sparc)
+#define BROTLI_TARGET_SPARC
+#endif
+
+#if defined(__arm__) || defined(__thumb__) || \
+    defined(_M_ARM) || defined(_M_ARMT)
+#define BROTLI_TARGET_ARM
+#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) || \
+    (defined(M_ARM) && (M_ARM >= 7))
+#define BROTLI_TARGET_ARMV7
+#endif  /* ARMv7 */
+#if defined(__aarch64__)
+#define BROTLI_TARGET_ARMV8
+#endif  /* ARMv8 */
+#endif  /* ARM */
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define BROTLI_TARGET_X64
+#endif
+
+#if defined(__PPC64__)
+#define BROTLI_TARGET_POWERPC64
+#endif
+
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define BROTLI_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define BROTLI_GCC_VERSION 0
+#endif
+
+#if defined(__ICC)
+#define BROTLI_ICC_VERSION __ICC
+#else
+#define BROTLI_ICC_VERSION 0
+#endif
+
+#if defined(BROTLI_BUILD_MODERN_COMPILER)
+#define BROTLI_MODERN_COMPILER 1
+#elif (BROTLI_GCC_VERSION > 300) || (BROTLI_ICC_VERSION >= 1600)
+#define BROTLI_MODERN_COMPILER 1
+#else
+#define BROTLI_MODERN_COMPILER 0
+#endif
+
+/* SPARC and ARMv6 don't support unaligned read.
+   Choose portable build for them. */
+#if !defined(BROTLI_BUILD_PORTABLE)
+#if defined(BROTLI_TARGET_SPARC) || \
+    (defined(BROTLI_TARGET_ARM) && !defined(BROTLI_TARGET_ARMV7))
+#define BROTLI_BUILD_PORTABLE
+#endif  /* SPARK or ARMv6 */
+#endif  /* portable build */
+
 #ifdef BROTLI_BUILD_PORTABLE
 #define BROTLI_ALIGNED_READ 1
-#define BROTLI_SAFE_MEMMOVE 1
 #else
 #define BROTLI_ALIGNED_READ 0
-#define BROTLI_SAFE_MEMMOVE 0
 #endif
 
-#define BROTLI_ASAN_BUILD __has_feature(address_sanitizer)
-
 /* Define "PREDICT_TRUE" and "PREDICT_FALSE" macros for capable compilers.
 
 To apply compiler hint, enclose the branching condition into macros, like this:
 
   if (PREDICT_TRUE(zero == 0)) {
     // main execution path
   } else {
     // compiler should place this code outside of main execution path
@@ -63,35 +110,32 @@ To apply compiler hint, enclose the bran
 
 OR:
 
   if (PREDICT_FALSE(something_rare_or_unexpected_happens)) {
     // compiler should place this code outside of main execution path
   }
 
 */
-#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
-    (defined(__llvm__) && __has_builtin(__builtin_expect))
+#if BROTLI_MODERN_COMPILER || __has_builtin(__builtin_expect)
 #define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
 #define PREDICT_FALSE(x) (__builtin_expect(x, 0))
 #else
 #define PREDICT_FALSE(x) (x)
 #define PREDICT_TRUE(x) (x)
 #endif
 
 /* IS_CONSTANT macros returns true for compile-time constant expressions. */
-#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0) || \
-    (defined(__llvm__) && __has_builtin(__builtin_constant_p))
+#if BROTLI_MODERN_COMPILER || __has_builtin(__builtin_constant_p)
 #define IS_CONSTANT(x) __builtin_constant_p(x)
 #else
 #define IS_CONSTANT(x) 0
 #endif
 
-#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0) || \
-    (defined(__llvm__) && __has_attribute(always_inline))
+#if BROTLI_MODERN_COMPILER || __has_attribute(always_inline)
 #define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline))
 #else
 #define ATTRIBUTE_ALWAYS_INLINE
 #endif
 
 #ifndef _MSC_VER
 #if defined(__cplusplus) || !defined(__STRICT_ANSI__) \
     || __STDC_VERSION__ >= 199901L
@@ -104,61 +148,83 @@ OR:
 #endif  /* _MSC_VER */
 
 #ifdef BROTLI_DECODE_DEBUG
 #define BROTLI_DCHECK(x) assert(x)
 #else
 #define BROTLI_DCHECK(x)
 #endif
 
-#if (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || \
-     defined(__PPC64__))
+#if defined(BROTLI_BUILD_64_BIT)
+#define BROTLI_64_BITS 1
+#elif defined(BROTLI_BUILD_32_BIT)
+#define BROTLI_64_BITS 0
+#elif defined(BROTLI_TARGET_X64) || defined(BROTLI_TARGET_ARMV8) || \
+    defined(BROTLI_TARGET_POWERPC64)
 #define BROTLI_64_BITS 1
 #else
 #define BROTLI_64_BITS 0
 #endif
 
-#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#if defined(BROTLI_BUILD_BIG_ENDIAN)
+#define BROTLI_LITTLE_ENDIAN 0
+#define BROTLI_BIG_ENDIAN 1
+#elif defined(BROTLI_BUILD_LITTLE_ENDIAN)
 #define BROTLI_LITTLE_ENDIAN 1
+#define BROTLI_BIG_ENDIAN 0
+#elif defined(BROTLI_BUILD_ENDIAN_NEUTRAL)
+#define BROTLI_LITTLE_ENDIAN 0
+#define BROTLI_BIG_ENDIAN 0
+#elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define BROTLI_LITTLE_ENDIAN 1
+#define BROTLI_BIG_ENDIAN 0
 #elif defined(_WIN32)
 /* Win32 can currently always be assumed to be little endian */
 #define BROTLI_LITTLE_ENDIAN 1
+#define BROTLI_BIG_ENDIAN 0
 #else
+#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+#define BROTLI_BIG_ENDIAN 1
+#else
+#define BROTLI_BIG_ENDIAN 0
+#endif
 #define BROTLI_LITTLE_ENDIAN 0
 #endif
 
-#if (BROTLI_64_BITS && BROTLI_LITTLE_ENDIAN)
-#define BROTLI_64_BITS_LITTLE_ENDIAN 1
-#else
-#define BROTLI_64_BITS_LITTLE_ENDIAN 0
-#endif
-
-#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) || \
-    (defined(__llvm__) && __has_attribute(noinline))
+#if BROTLI_MODERN_COMPILER || __has_attribute(noinline)
 #define BROTLI_NOINLINE __attribute__ ((noinline))
 #else
 #define BROTLI_NOINLINE
 #endif
 
-#if BROTLI_ASAN_BUILD && !defined(BROTLI_BUILD_PORTABLE)
-#define BROTLI_NO_ASAN __attribute__((no_sanitize("address"))) BROTLI_NOINLINE
-#else
-#define BROTLI_NO_ASAN
-#endif
-
 #define BROTLI_REPEAT(N, X) { \
   if ((N & 1) != 0) {X;} \
   if ((N & 2) != 0) {X; X;} \
   if ((N & 4) != 0) {X; X; X; X;} \
 }
 
-#if (__GNUC__ > 2) || defined(__llvm__)
-#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7))
+#if BROTLI_MODERN_COMPILER || defined(__llvm__)
+#if defined(BROTLI_TARGET_ARMV7)
 static BROTLI_INLINE unsigned BrotliRBit(unsigned input) {
   unsigned output;
   __asm__("rbit %0, %1\n" : "=r"(output) : "r"(input));
   return output;
 }
 #define BROTLI_RBIT(x) BrotliRBit(x)
 #endif  /* armv7 */
 #endif  /* gcc || clang */
 
+#if defined(BROTLI_TARGET_ARM)
+#define BROTLI_HAS_UBFX 1
+#else
+#define BROTLI_HAS_UBFX 0
+#endif
+
+#define BROTLI_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)
+
+#define BROTLI_FREE(S, X) { \
+  S->free_func(S->memory_manager_opaque, X); \
+  X = NULL; \
+}
+
+#define BROTLI_UNUSED(X) (void)(X)
+
 #endif  /* BROTLI_DEC_PORT_H_ */
--- a/modules/brotli/dec/prefix.h
+++ b/modules/brotli/dec/prefix.h
@@ -1,35 +1,28 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Lookup tables to map prefix codes to value ranges. This is used during
    decoding of the block lengths, literal insertion lengths and copy lengths.
 */
 
 #ifndef BROTLI_DEC_PREFIX_H_
 #define BROTLI_DEC_PREFIX_H_
 
+#include "./types.h"
+
 /* Represents the range of values belonging to a prefix code: */
 /* [offset, offset + 2^nbits) */
 struct PrefixCodeRange {
-  int16_t offset;
-  int8_t nbits;
+  uint16_t offset;
+  uint8_t nbits;
 };
 
 static const struct PrefixCodeRange kBlockLengthPrefixCode[] = {
   {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
   {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
   {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
   { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
   { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
--- a/modules/brotli/dec/state.c
+++ b/modules/brotli/dec/state.c
@@ -1,52 +1,79 @@
 /* Copyright 2015 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
-#include "./huffman.h"
 #include "./state.h"
 
-#include <stdlib.h>
-#include <string.h>
+#include <stdlib.h>  /* free, malloc */
+
+#include "./huffman.h"
+#include "./types.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
+static void* DefaultAllocFunc(void* opaque, size_t size) {
+  BROTLI_UNUSED(opaque);
+  return malloc(size);
+}
+
+static void DefaultFreeFunc(void* opaque, void* address) {
+  BROTLI_UNUSED(opaque);
+  free(address);
+}
+
 void BrotliStateInit(BrotliState* s) {
+  BrotliStateInitWithCustomAllocators(s, 0, 0, 0);
+}
+
+void BrotliStateInitWithCustomAllocators(BrotliState* s,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  if (!alloc_func) {
+    s->alloc_func = DefaultAllocFunc;
+    s->free_func = DefaultFreeFunc;
+    s->memory_manager_opaque = 0;
+  } else {
+    s->alloc_func = alloc_func;
+    s->free_func = free_func;
+    s->memory_manager_opaque = opaque;
+  }
+
+  BrotliInitBitReader(&s->br);
   s->state = BROTLI_STATE_UNINITED;
   s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
   s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
   s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
   s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
   s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
   s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
+  s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
+
+  s->buffer_length = 0;
+  s->loop_counter = 0;
+  s->pos = 0;
+  s->rb_roundtrips = 0;
+  s->partial_pos_out = 0;
 
   s->block_type_trees = NULL;
   s->block_len_trees = NULL;
   s->ringbuffer = NULL;
 
   s->context_map = NULL;
   s->context_modes = NULL;
   s->dist_context_map = NULL;
   s->context_map_slice = NULL;
   s->dist_context_map_slice = NULL;
 
+  s->sub_loop_counter = 0;
+
   s->literal_hgroup.codes = NULL;
   s->literal_hgroup.htrees = NULL;
   s->insert_copy_hgroup.codes = NULL;
   s->insert_copy_hgroup.htrees = NULL;
   s->distance_hgroup.codes = NULL;
   s->distance_hgroup.htrees = NULL;
 
 
@@ -67,19 +94,19 @@ void BrotliStateInit(BrotliState* s) {
   /* Make small negative indexes addressable. */
   s->symbol_lists = &s->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1];
 
   s->mtf_upper_bound = 255;
 }
 
 void BrotliStateMetablockBegin(BrotliState* s) {
   s->meta_block_remaining_len = 0;
-  s->block_length[0] = 1 << 28;
-  s->block_length[1] = 1 << 28;
-  s->block_length[2] = 1 << 28;
+  s->block_length[0] = 1U << 28;
+  s->block_length[1] = 1U << 28;
+  s->block_length[2] = 1U << 28;
   s->num_block_types[0] = 1;
   s->num_block_types[1] = 1;
   s->num_block_types[2] = 1;
   s->block_type_rb[0] = 1;
   s->block_type_rb[1] = 0;
   s->block_type_rb[2] = 1;
   s->block_type_rb[3] = 0;
   s->block_type_rb[4] = 1;
@@ -98,57 +125,54 @@ void BrotliStateMetablockBegin(BrotliSta
   s->literal_hgroup.htrees = NULL;
   s->insert_copy_hgroup.codes = NULL;
   s->insert_copy_hgroup.htrees = NULL;
   s->distance_hgroup.codes = NULL;
   s->distance_hgroup.htrees = NULL;
 }
 
 void BrotliStateCleanupAfterMetablock(BrotliState* s) {
-  if (s->context_modes != 0) {
-    free(s->context_modes);
-    s->context_modes = NULL;
-  }
-  if (s->context_map != 0) {
-    free(s->context_map);
-    s->context_map = NULL;
-  }
-  if (s->dist_context_map != 0) {
-    free(s->dist_context_map);
-    s->dist_context_map = NULL;
-  }
+  BROTLI_FREE(s, s->context_modes);
+  BROTLI_FREE(s, s->context_map);
+  BROTLI_FREE(s, s->dist_context_map);
 
-  BrotliHuffmanTreeGroupRelease(&s->literal_hgroup);
-  BrotliHuffmanTreeGroupRelease(&s->insert_copy_hgroup);
-  BrotliHuffmanTreeGroupRelease(&s->distance_hgroup);
-  s->literal_hgroup.codes = NULL;
-  s->literal_hgroup.htrees = NULL;
-  s->insert_copy_hgroup.codes = NULL;
-  s->insert_copy_hgroup.htrees = NULL;
-  s->distance_hgroup.codes = NULL;
-  s->distance_hgroup.htrees = NULL;
+  BrotliHuffmanTreeGroupRelease(s, &s->literal_hgroup);
+  BrotliHuffmanTreeGroupRelease(s, &s->insert_copy_hgroup);
+  BrotliHuffmanTreeGroupRelease(s, &s->distance_hgroup);
 }
 
 void BrotliStateCleanup(BrotliState* s) {
-  if (s->context_modes != 0) {
-    free(s->context_modes);
-  }
-  if (s->context_map != 0) {
-    free(s->context_map);
-  }
-  if (s->dist_context_map != 0) {
-    free(s->dist_context_map);
-  }
-  BrotliHuffmanTreeGroupRelease(&s->literal_hgroup);
-  BrotliHuffmanTreeGroupRelease(&s->insert_copy_hgroup);
-  BrotliHuffmanTreeGroupRelease(&s->distance_hgroup);
+  BrotliStateCleanupAfterMetablock(s);
+
+  BROTLI_FREE(s, s->ringbuffer);
+  BROTLI_FREE(s, s->block_type_trees);
+}
+
+int BrotliStateIsStreamStart(const BrotliState* s) {
+  return (s->state == BROTLI_STATE_UNINITED &&
+      BrotliGetAvailableBits(&s->br) == 0);
+}
+
+int BrotliStateIsStreamEnd(const BrotliState* s) {
+  return s->state == BROTLI_STATE_DONE;
+}
 
-  if (s->ringbuffer != 0) {
-    free(s->ringbuffer);
-  }
-  if (s->block_type_trees != 0) {
-    free(s->block_type_trees);
-  }
+void BrotliHuffmanTreeGroupInit(BrotliState* s, HuffmanTreeGroup* group,
+    uint32_t alphabet_size, uint32_t ntrees) {
+  /* Pack two allocations into one */
+  const size_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5];
+  const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size;
+  const size_t htree_size = sizeof(HuffmanCode*) * ntrees;
+  char *p = (char*)BROTLI_ALLOC(s, code_size + htree_size);
+  group->alphabet_size = (uint16_t)alphabet_size;
+  group->num_htrees = (uint16_t)ntrees;
+  group->codes = (HuffmanCode*)p;
+  group->htrees = (HuffmanCode**)(p + code_size);
+}
+
+void BrotliHuffmanTreeGroupRelease(BrotliState* s, HuffmanTreeGroup* group) {
+  BROTLI_FREE(s, group->codes);
+  group->htrees = NULL;
 }
 
 #if defined(__cplusplus) || defined(c_plusplus)
 } /* extern "C" */
 #endif
--- a/modules/brotli/dec/state.h
+++ b/modules/brotli/dec/state.h
@@ -1,53 +1,43 @@
 /* Copyright 2015 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Brotli state for partial streaming decoding. */
 
 #ifndef BROTLI_DEC_STATE_H_
 #define BROTLI_DEC_STATE_H_
 
-#include <stdio.h>
 #include "./bit_reader.h"
 #include "./huffman.h"
-#include "./streams.h"
 #include "./types.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
 typedef enum {
   BROTLI_STATE_UNINITED,
-  BROTLI_STATE_BITREADER_WARMUP,
   BROTLI_STATE_METABLOCK_BEGIN,
   BROTLI_STATE_METABLOCK_HEADER,
+  BROTLI_STATE_METABLOCK_HEADER_2,
   BROTLI_STATE_CONTEXT_MODES,
   BROTLI_STATE_COMMAND_BEGIN,
   BROTLI_STATE_COMMAND_INNER,
+  BROTLI_STATE_COMMAND_POST_DECODE_LITERALS,
+  BROTLI_STATE_COMMAND_POST_WRAP_COPY,
   BROTLI_STATE_UNCOMPRESSED,
   BROTLI_STATE_METADATA,
   BROTLI_STATE_COMMAND_INNER_WRITE,
   BROTLI_STATE_METABLOCK_DONE,
   BROTLI_STATE_COMMAND_POST_WRITE_1,
   BROTLI_STATE_COMMAND_POST_WRITE_2,
-  BROTLI_STATE_COMMAND_POST_WRAP_COPY,
   BROTLI_STATE_HUFFMAN_CODE_0,
   BROTLI_STATE_HUFFMAN_CODE_1,
   BROTLI_STATE_HUFFMAN_CODE_2,
   BROTLI_STATE_HUFFMAN_CODE_3,
   BROTLI_STATE_CONTEXT_MAP_1,
   BROTLI_STATE_CONTEXT_MAP_2,
   BROTLI_STATE_TREE_GROUP,
   BROTLI_STATE_DONE
@@ -61,98 +51,123 @@ typedef enum {
   BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED,
   BROTLI_STATE_METABLOCK_HEADER_RESERVED,
   BROTLI_STATE_METABLOCK_HEADER_BYTES,
   BROTLI_STATE_METABLOCK_HEADER_METADATA
 } BrotliRunningMetablockHeaderState;
 
 typedef enum {
   BROTLI_STATE_UNCOMPRESSED_NONE,
-  BROTLI_STATE_UNCOMPRESSED_SHORT,
-  BROTLI_STATE_UNCOMPRESSED_COPY,
   BROTLI_STATE_UNCOMPRESSED_WRITE
 } BrotliRunningUncompressedState;
 
 typedef enum {
   BROTLI_STATE_TREE_GROUP_NONE,
   BROTLI_STATE_TREE_GROUP_LOOP
 } BrotliRunningTreeGroupState;
 
 typedef enum {
   BROTLI_STATE_CONTEXT_MAP_NONE,
   BROTLI_STATE_CONTEXT_MAP_READ_PREFIX,
   BROTLI_STATE_CONTEXT_MAP_HUFFMAN,
-  BROTLI_STATE_CONTEXT_MAP_DECODE
+  BROTLI_STATE_CONTEXT_MAP_DECODE,
+  BROTLI_STATE_CONTEXT_MAP_TRANSFORM
 } BrotliRunningContextMapState;
 
 typedef enum {
   BROTLI_STATE_HUFFMAN_NONE,
+  BROTLI_STATE_HUFFMAN_SIMPLE_SIZE,
+  BROTLI_STATE_HUFFMAN_SIMPLE_READ,
+  BROTLI_STATE_HUFFMAN_SIMPLE_BUILD,
+  BROTLI_STATE_HUFFMAN_COMPLEX,
   BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS
 } BrotliRunningHuffmanState;
 
 typedef enum {
   BROTLI_STATE_DECODE_UINT8_NONE,
   BROTLI_STATE_DECODE_UINT8_SHORT,
   BROTLI_STATE_DECODE_UINT8_LONG
 } BrotliRunningDecodeUint8State;
 
-typedef struct {
+typedef enum {
+  BROTLI_STATE_READ_BLOCK_LENGTH_NONE,
+  BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
+} BrotliRunningReadBlockLengthState;
+
+struct BrotliStateStruct {
   BrotliRunningState state;
+
   /* This counter is reused for several disjoint loops. */
+  int loop_counter;
+
   BrotliBitReader br;
-  int loop_counter;
+
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* memory_manager_opaque;
+
+  /* Temporary storage for remaining input. */
+  union {
+    uint64_t u64;
+    uint8_t u8[8];
+  } buffer;
+  uint32_t buffer_length;
+
   int pos;
   int max_backward_distance;
   int max_backward_distance_minus_custom_dict_size;
   int max_distance;
   int ringbuffer_size;
   int ringbuffer_mask;
   int dist_rb_idx;
   int dist_rb[4];
   uint8_t* ringbuffer;
   uint8_t* ringbuffer_end;
   HuffmanCode* htree_command;
   const uint8_t* context_lookup1;
   const uint8_t* context_lookup2;
   uint8_t* context_map_slice;
   uint8_t* dist_context_map_slice;
 
+  uint32_t sub_loop_counter;
+
   /* This ring buffer holds a few past copy distances that will be used by */
   /* some special distance codes. */
   HuffmanTreeGroup literal_hgroup;
   HuffmanTreeGroup insert_copy_hgroup;
   HuffmanTreeGroup distance_hgroup;
   HuffmanCode* block_type_trees;
   HuffmanCode* block_len_trees;
   /* This is true if the literal context map histogram type always matches the
   block type. It is then not needed to keep the context (faster decoding). */
   int trivial_literal_context;
   int distance_context;
   int meta_block_remaining_len;
-  int block_length[3];
-  int num_block_types[3];
-  int block_type_rb[6];
-  int distance_postfix_bits;
-  int num_direct_distance_codes;
+  uint32_t block_length_index;
+  uint32_t block_length[3];
+  uint32_t num_block_types[3];
+  uint32_t block_type_rb[6];
+  uint32_t distance_postfix_bits;
+  uint32_t num_direct_distance_codes;
   int distance_postfix_mask;
-  int num_dist_htrees;
+  uint32_t num_dist_htrees;
   uint8_t* dist_context_map;
   HuffmanCode *literal_htree;
   uint8_t literal_htree_index;
   uint8_t dist_htree_index;
-  uint8_t repeat_code_len;
-  uint8_t prev_code_len;
+  uint32_t repeat_code_len;
+  uint32_t prev_code_len;
 
 
   int copy_length;
   int distance_code;
 
   /* For partial write operations */
-  int to_write;
-  int partially_written;
+  size_t rb_roundtrips;  /* How many times we went around the ringbuffer */
+  size_t partial_pos_out;  /* How much output to the user in total (<= rb) */
 
   /* For ReadHuffmanCode */
   uint32_t symbol;
   uint32_t repeat;
   uint32_t space;
 
   HuffmanCode table[32];
   /* List of of symbol chains. */
@@ -166,50 +181,70 @@ typedef struct {
   /* Population counts for the code lengths */
   uint16_t code_length_histo[16];
 
   /* For HuffmanTreeGroupDecode */
   int htree_index;
   HuffmanCode* next;
 
   /* For DecodeContextMap */
-  int context_index;
-  int max_run_length_prefix;
-  HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_TABLE_SIZE];
+  uint32_t context_index;
+  uint32_t max_run_length_prefix;
+  uint32_t code;
+  HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
 
   /* For InverseMoveToFrontTransform */
-  int mtf_upper_bound;
+  uint32_t mtf_upper_bound;
   uint8_t mtf[256];
 
   /* For custom dictionaries */
   const uint8_t* custom_dict;
   int custom_dict_size;
 
   /* less used attributes are in the end of this struct */
   /* States inside function calls */
   BrotliRunningMetablockHeaderState substate_metablock_header;
   BrotliRunningTreeGroupState substate_tree_group;
   BrotliRunningContextMapState substate_context_map;
   BrotliRunningUncompressedState substate_uncompressed;
   BrotliRunningHuffmanState substate_huffman;
   BrotliRunningDecodeUint8State substate_decode_uint8;
+  BrotliRunningReadBlockLengthState substate_read_block_length;
 
   uint8_t is_last_metablock;
   uint8_t is_uncompressed;
   uint8_t is_metadata;
   uint8_t size_nibbles;
   uint32_t window_bits;
 
-  int num_literal_htrees;
+  uint32_t num_literal_htrees;
   uint8_t* context_map;
   uint8_t* context_modes;
-} BrotliState;
+};
+
+typedef struct BrotliStateStruct BrotliState;
 
 void BrotliStateInit(BrotliState* s);
+void BrotliStateInitWithCustomAllocators(BrotliState* s,
+                                         brotli_alloc_func alloc_func,
+                                         brotli_free_func free_func,
+                                         void* opaque);
 void BrotliStateCleanup(BrotliState* s);
 void BrotliStateMetablockBegin(BrotliState* s);
 void BrotliStateCleanupAfterMetablock(BrotliState* s);
+void BrotliHuffmanTreeGroupInit(BrotliState* s, HuffmanTreeGroup* group,
+                                uint32_t alphabet_size, uint32_t ntrees);
+void BrotliHuffmanTreeGroupRelease(BrotliState* s, HuffmanTreeGroup* group);
+
+/* Returns 1, if s is in a state where we have not read any input bytes yet,
+   and 0 otherwise */
+int BrotliStateIsStreamStart(const BrotliState* s);
+
+/* Returns 1, if s is in a state where we reached the end of the input and
+   produced all of the output, and 0 otherwise. */
+int BrotliStateIsStreamEnd(const BrotliState* s);
+
 
 #if defined(__cplusplus) || defined(c_plusplus)
 } /* extern "C" */
 #endif
 
 #endif  /* BROTLI_DEC_STATE_H_ */
--- a/modules/brotli/dec/streams.c
+++ b/modules/brotli/dec/streams.c
@@ -1,21 +1,12 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Functions for streaming input and output. */
 
 #include <string.h>
 #ifndef _WIN32
 #include <unistd.h>
 #endif
@@ -88,21 +79,23 @@ int BrotliFileOutputFunction(void* data,
 
 BrotliOutput BrotliFileOutput(FILE* f) {
   BrotliOutput out;
   out.cb_ = BrotliFileOutputFunction;
   out.data_ = f;
   return out;
 }
 
-int BrotliNullOutputFunction(void* data, const uint8_t* buf, size_t count) {
+int BrotliNullOutputFunction(void* data , const uint8_t* buf, size_t count) {
+  BROTLI_UNUSED(data);
+  BROTLI_UNUSED(buf);
   return (int)count;
 }
 
-BrotliOutput BrotliNullOutput() {
+BrotliOutput BrotliNullOutput(void) {
   BrotliOutput out;
   out.cb_ = BrotliNullOutputFunction;
   out.data_ = NULL;
   return out;
 }
 
 #if defined(__cplusplus) || defined(c_plusplus)
 }    /* extern "C" */
--- a/modules/brotli/dec/streams.h
+++ b/modules/brotli/dec/streams.h
@@ -1,21 +1,12 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Functions for streaming input and output. */
 
 #ifndef BROTLI_DEC_STREAMS_H_
 #define BROTLI_DEC_STREAMS_H_
 
 #include <stdio.h>
@@ -90,15 +81,15 @@ int BrotliFileInputFunction(void* data, 
 BrotliInput BrotliFileInput(FILE* f);
 
 /* Output callback that writes to a file. */
 int BrotliFileOutputFunction(void* data, const uint8_t* buf, size_t count);
 BrotliOutput BrotliFileOutput(FILE* f);
 
 /* Output callback that does nothing, always consumes the whole input. */
 int BrotliNullOutputFunction(void* data, const uint8_t* buf, size_t count);
-BrotliOutput BrotliNullOutput();
+BrotliOutput BrotliNullOutput(void);
 
 #if defined(__cplusplus) || defined(c_plusplus)
 }    /* extern "C" */
 #endif
 
 #endif  /* BROTLI_DEC_STREAMS_H_ */
--- a/modules/brotli/dec/transform.h
+++ b/modules/brotli/dec/transform.h
@@ -1,30 +1,19 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Transformations on dictionary words. */
 
 #ifndef BROTLI_DEC_TRANSFORM_H_
 #define BROTLI_DEC_TRANSFORM_H_
 
-#include <stdio.h>
-#include <ctype.h>
 #include "./port.h"
 #include "./types.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
 enum WordTransformType {
@@ -273,32 +262,29 @@ static BROTLI_NOINLINE int TransformDict
     uint8_t* dst, const uint8_t* word, int len, int transform) {
   int idx = 0;
   {
     const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
     while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
   }
   {
     const int t = kTransforms[transform].transform;
-    int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1);
     int i = 0;
-    uint8_t* uppercase;
-    if (skip > len) {
-      skip = len;
-    }
-    word += skip;
-    len -= skip;
-    if (t <= kOmitLast9) {
+    int skip = t - (kOmitFirst1 - 1);
+    if (skip > 0) {
+      word += skip;
+      len -= skip;
+    } else if (t <= kOmitLast9) {
       len -= t;
     }
     while (i < len) { dst[idx++] = word[i++]; }
-    uppercase = &dst[idx - len];
     if (t == kUppercaseFirst) {
-      ToUpperCase(uppercase);
+      ToUpperCase(&dst[idx - len]);
     } else if (t == kUppercaseAll) {
+      uint8_t* uppercase = &dst[idx - len];
       while (len > 0) {
         int step = ToUpperCase(uppercase);
         uppercase += step;
         len -= step;
       }
     }
   }
   {
--- a/modules/brotli/dec/types.h
+++ b/modules/brotli/dec/types.h
@@ -1,36 +1,38 @@
 /* Copyright 2013 Google Inc. All Rights Reserved.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 
 /* Common types */
 
 #ifndef BROTLI_DEC_TYPES_H_
 #define BROTLI_DEC_TYPES_H_
 
 #include <stddef.h>  /* for size_t */
 
 #if defined(_MSC_VER) && (_MSC_VER < 1600)
-typedef signed   char int8_t;
-typedef unsigned char uint8_t;
-typedef signed   short int16_t;
-typedef unsigned short uint16_t;
-typedef signed   int int32_t;
-typedef unsigned int uint32_t;
-typedef unsigned long long int uint64_t;
-typedef long long int int64_t;
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int64 int64_t;
 #else
 #include <stdint.h>
 #endif  /* defined(_MSC_VER) && (_MSC_VER < 1600) */
 
+/* Allocating function pointer. Function MUST return 0 in the case of failure.
+   Otherwise it MUST return a valid pointer to a memory region of at least
+   size length. Neither items nor size are allowed to be 0.
+   opaque argument is a pointer provided by client and could be used to bind
+   function to specific object (memory pool). */
+typedef void* (*brotli_alloc_func) (void* opaque, size_t size);
+
+/* Deallocating function pointer. Function SHOULD be no-op in the case the
+   address is 0. */
+typedef void  (*brotli_free_func)  (void* opaque, void* address);
+
 #endif  /* BROTLI_DEC_TYPES_H_ */
--- a/modules/brotli/moz.build
+++ b/modules/brotli/moz.build
@@ -5,27 +5,25 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 EXPORTS += [
     'dec/bit_reader.h',
     'dec/decode.h',
     'dec/huffman.h',
     'dec/port.h',
     'dec/state.h',
-    'dec/streams.h',
     'dec/types.h',
 ]
 
 UNIFIED_SOURCES += [
     'dec/bit_reader.c',
     'dec/decode.c',
     'dec/dictionary.c',
     'dec/huffman.c',
     'dec/state.c',
-    'dec/streams.c',
 ]
 
 # We allow warnings for third-party code that can be updated from upstream.
 ALLOW_COMPILER_WARNINGS = True
 
 CFLAGS += ['-DBROTLI_BUILD_PORTABLE']
 
 Library('brotli')
--- a/modules/brotli/update.sh
+++ b/modules/brotli/update.sh
@@ -1,14 +1,14 @@
 #!/bin/sh
 
 # Script to update the mozilla in-tree copy of the Brotli decompressor.
 # Run this within the /modules/brotli directory of the source tree.
 
-MY_TEMP_DIR=`mktemp -d -t brotli_update` || exit 1
+MY_TEMP_DIR=`mktemp -d -t brotli_update.XXXXXX` || exit 1
 
 git clone https://github.com/google/brotli ${MY_TEMP_DIR}/brotli
 
 COMMIT=`(cd ${MY_TEMP_DIR}/brotli && git log | head -n 1)`
 perl -p -i -e "s/\[commit [0-9a-f]{40}\]/[${COMMIT}]/" README.mozilla;
 
 rm -rf dec
 mv ${MY_TEMP_DIR}/brotli/dec dec
--- a/netwerk/streamconv/converters/nsHTTPCompressConv.cpp
+++ b/netwerk/streamconv/converters/nsHTTPCompressConv.cpp
@@ -131,40 +131,42 @@ nsHTTPCompressConv::OnStopRequest(nsIReq
   // by content sniffing but only via header.
   if (!mStreamEnded && NS_SUCCEEDED(status) &&
       (mFailUncleanStops && (mMode == HTTP_COMPRESS_GZIP)) ) {
     // This is not a clean end of gzip stream: the transfer is incomplete.
     status = NS_ERROR_NET_PARTIAL_TRANSFER;
     LOG(("nsHttpCompresssConv %p onstop partial gzip\n", this));
   }
   if (NS_SUCCEEDED(status) && mMode == HTTP_COMPRESS_BROTLI) {
-    uint32_t waste;
     nsCOMPtr<nsIForcePendingChannel> fpChannel = do_QueryInterface(request);
     bool isPending = false;
     if (request) {
       request->IsPending(&isPending);
     }
     if (fpChannel && !isPending) {
       fpChannel->ForcePending(true);
     }
-    status = BrotliHandler(nullptr, this, nullptr, 0, 0, &waste);
+    if (mBrotli->mTotalOut == 0 && !BrotliStateIsStreamEnd(&mBrotli->mState)) {
+      status = NS_ERROR_INVALID_CONTENT_ENCODING;
+    }
     LOG(("nsHttpCompresssConv %p onstop brotlihandler rv %x\n", this, status));
     if (fpChannel && !isPending) {
       fpChannel->ForcePending(false);
     }
   }
   return mListener->OnStopRequest(request, aContext, status);
 }
 
 
 // static
 NS_METHOD
 nsHTTPCompressConv::BrotliHandler(nsIInputStream *stream, void *closure, const char *dataIn,
                                   uint32_t, uint32_t aAvail, uint32_t *countRead)
 {
+  MOZ_ASSERT(stream);
   nsHTTPCompressConv *self = static_cast<nsHTTPCompressConv *>(closure);
   *countRead = 0;
 
   const uint32_t kOutSize = 128 * 1024; // just a chunk size, we call in a loop
   unsigned char outBuffer[kOutSize];
   unsigned char *outPtr;
   size_t outSize;
   size_t avail = aAvail;
@@ -174,38 +176,40 @@ nsHTTPCompressConv::BrotliHandler(nsIInp
     *countRead = aAvail;
     return NS_OK;
   }
 
   do {
     outSize = kOutSize;
     outPtr = outBuffer;
 
-    // brotli api is documented in brotli/dec/decode.h
-    LOG(("nsHttpCompresssConv %p brotlihandler decompress %d finish %d\n",
-         self, avail, !stream));
-    res = ::BrotliDecompressBufferStreaming(
-      &avail, reinterpret_cast<const unsigned char **>(&dataIn), stream ? 0 : 1,
+    // brotli api is documented in brotli/dec/decode.h and brotli/dec/decode.c
+    LOG(("nsHttpCompresssConv %p brotlihandler decompress %d\n", self, avail));
+    res = ::BrotliDecompressStream(
+      &avail, reinterpret_cast<const unsigned char **>(&dataIn),
       &outSize, &outPtr, &self->mBrotli->mTotalOut, &self->mBrotli->mState);
     outSize = kOutSize - outSize;
     LOG(("nsHttpCompresssConv %p brotlihandler decompress rv=%x out=%d\n",
          self, res, outSize));
 
     if (res == BROTLI_RESULT_ERROR) {
       LOG(("nsHttpCompressConv %p marking invalid encoding", self));
       self->mBrotli->mStatus = NS_ERROR_INVALID_CONTENT_ENCODING;
       return self->mBrotli->mStatus;
     }
 
-    // in 'the current implementation' brotli consumes all input on success
-    MOZ_ASSERT(!avail);
-    if (avail) {
-      LOG(("nsHttpCompressConv %p did not consume all input", self));
-      self->mBrotli->mStatus = NS_ERROR_UNEXPECTED;
-      return self->mBrotli->mStatus;
+    // in 'the current implementation' brotli must consume everything before
+    // asking for more input
+    if (res == BROTLI_RESULT_NEEDS_MORE_INPUT) {
+      MOZ_ASSERT(!avail);
+      if (avail) {
+        LOG(("nsHttpCompressConv %p did not consume all input", self));
+        self->mBrotli->mStatus = NS_ERROR_UNEXPECTED;
+        return self->mBrotli->mStatus;
+      }
     }
     if (outSize > 0) {
       nsresult rv = self->do_OnDataAvailable(self->mBrotli->mRequest,
                                              self->mBrotli->mContext,
                                              self->mBrotli->mSourceOffset,
                                              reinterpret_cast<const char *>(outBuffer),
                                              outSize);
       LOG(("nsHttpCompressConv %p BrotliHandler ODA rv=%x", self, rv));