Update (#914)

 * slimmer stack frames in encoder
 * fix MSAN problem in hasher_composite
   (not dangerous, only in large_window mode)
 * fix JNI decoder wrapper - power-of-two payloads fail to decode sometimes
 * reformat polyfil.js and decode_test.js
diff --git a/c/enc/backward_references_hq.c b/c/enc/backward_references_hq.c
index 5651cae..61b7ff1 100644
--- a/c/enc/backward_references_hq.c
+++ b/c/enc/backward_references_hq.c
@@ -73,6 +73,14 @@
   return ZopfliNodeCopyLength(self) + (self->dcode_insert_length & 0x7FFFFFF);
 }
 
+/* Temporary data for ZopfliCostModelSetFromCommands. */
+typedef struct ZopfliCostModelArena {
+  uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
+  float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+} ZopfliCostModelArena;
+
 /* Histogram based cost model for zopflification. */
 typedef struct ZopfliCostModel {
   /* The insert and copy length symbols. */
@@ -83,6 +91,12 @@
   float* literal_costs_;
   float min_cost_cmd_;
   size_t num_bytes_;
+
+  /* Temporary data. */
+  union {
+    size_t literal_histograms[3 * 256];
+    ZopfliCostModelArena arena;
+  };
 } ZopfliCostModel;
 
 static void InitZopfliCostModel(
@@ -139,18 +153,15 @@
                                            const Command* commands,
                                            size_t num_commands,
                                            size_t last_insert_len) {
-  uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
-  uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
-  uint32_t histogram_dist[BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE];
-  float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
+  ZopfliCostModelArena* arena = &self->arena;
   size_t pos = position - last_insert_len;
   float min_cost_cmd = kInfinity;
   size_t i;
   float* cost_cmd = self->cost_cmd_;
 
-  memset(histogram_literal, 0, sizeof(histogram_literal));
-  memset(histogram_cmd, 0, sizeof(histogram_cmd));
-  memset(histogram_dist, 0, sizeof(histogram_dist));
+  memset(arena->histogram_literal, 0, sizeof(arena->histogram_literal));
+  memset(arena->histogram_cmd, 0, sizeof(arena->histogram_cmd));
+  memset(arena->histogram_dist, 0, sizeof(arena->histogram_dist));
 
   for (i = 0; i < num_commands; i++) {
     size_t inslength = commands[i].insert_len_;
@@ -159,21 +170,21 @@
     size_t cmdcode = commands[i].cmd_prefix_;
     size_t j;
 
-    histogram_cmd[cmdcode]++;
-    if (cmdcode >= 128) histogram_dist[distcode]++;
+    arena->histogram_cmd[cmdcode]++;
+    if (cmdcode >= 128) arena->histogram_dist[distcode]++;
 
     for (j = 0; j < inslength; j++) {
-      histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
+      arena->histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
     }
 
     pos += inslength + copylength;
   }
 
-  SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
-          cost_literal);
-  SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
+  SetCost(arena->histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, BROTLI_TRUE,
+          arena->cost_literal);
+  SetCost(arena->histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, BROTLI_FALSE,
           cost_cmd);
-  SetCost(histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
+  SetCost(arena->histogram_dist, self->distance_histogram_size, BROTLI_FALSE,
           self->cost_dist_);
 
   for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
@@ -188,7 +199,7 @@
     literal_costs[0] = 0.0;
     for (i = 0; i < num_bytes; ++i) {
       literal_carry +=
-          cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
+          arena->cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
       literal_costs[i + 1] = literal_costs[i] + literal_carry;
       literal_carry -= literal_costs[i + 1] - literal_costs[i];
     }
@@ -206,7 +217,8 @@
   size_t num_bytes = self->num_bytes_;
   size_t i;
   BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
-                                    ringbuffer, &literal_costs[1]);
+                                    ringbuffer, self->literal_histograms,
+                                    &literal_costs[1]);
   literal_costs[0] = 0.0;
   for (i = 0; i < num_bytes; ++i) {
     literal_carry += literal_costs[i + 1];
@@ -661,21 +673,25 @@
   const size_t stream_offset = params->stream_offset;
   const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
   const size_t max_zopfli_len = MaxZopfliLen(params);
-  ZopfliCostModel model;
   StartPosQueue queue;
-  BackwardMatch matches[2 * (MAX_NUM_MATCHES_H10 + 64)];
+  BackwardMatch* BROTLI_RESTRICT matches =
+      BROTLI_ALLOC(m, BackwardMatch, 2 * (MAX_NUM_MATCHES_H10 + 64));
   const size_t store_end = num_bytes >= StoreLookaheadH10() ?
       position + num_bytes - StoreLookaheadH10() + 1 : position;
   size_t i;
   size_t gap = 0;
   size_t lz_matches_offset = 0;
+  ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
   BROTLI_UNUSED(literal_context_lut);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) || BROTLI_IS_NULL(matches)) {
+    return 0;
+  }
   nodes[0].length = 0;
   nodes[0].u.cost = 0;
-  InitZopfliCostModel(m, &model, &params->dist, num_bytes);
+  InitZopfliCostModel(m, model, &params->dist, num_bytes);
   if (BROTLI_IS_OOM(m)) return 0;
   ZopfliCostModelSetFromLiteralCosts(
-      &model, position, ringbuffer, ringbuffer_mask);
+      model, position, ringbuffer, ringbuffer_mask);
   InitStartPosQueue(&queue);
   for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
     const size_t pos = position + i;
@@ -694,7 +710,7 @@
       num_matches = 1;
     }
     skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
-        params, max_backward_limit, dist_cache, num_matches, matches, &model,
+        params, max_backward_limit, dist_cache, num_matches, matches, model,
         &queue, nodes);
     if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
     if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
@@ -710,12 +726,14 @@
         i++;
         if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
         EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
-            dist_cache, &model, &queue, nodes);
+            dist_cache, model, &queue, nodes);
         skip--;
       }
     }
   }
-  CleanupZopfliCostModel(m, &model);
+  CleanupZopfliCostModel(m, model);
+  BROTLI_FREE(m, model);
+  BROTLI_FREE(m, matches);
   return ComputeShortestPathFromNodes(num_bytes, nodes);
 }
 
@@ -753,14 +771,14 @@
   size_t orig_last_insert_len;
   int orig_dist_cache[4];
   size_t orig_num_commands;
-  ZopfliCostModel model;
+  ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
   ZopfliNode* nodes;
   BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
   size_t gap = 0;
   size_t shadow_matches = 0;
   BROTLI_UNUSED(literal_context_lut);
-  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(num_matches) ||
-      BROTLI_IS_NULL(matches)) {
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) ||
+      BROTLI_IS_NULL(num_matches) || BROTLI_IS_NULL(matches)) {
     return;
   }
   for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
@@ -810,15 +828,15 @@
   orig_num_commands = *num_commands;
   nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
   if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) return;
-  InitZopfliCostModel(m, &model, &params->dist, num_bytes);
+  InitZopfliCostModel(m, model, &params->dist, num_bytes);
   if (BROTLI_IS_OOM(m)) return;
   for (i = 0; i < 2; i++) {
     BrotliInitZopfliNodes(nodes, num_bytes + 1);
     if (i == 0) {
       ZopfliCostModelSetFromLiteralCosts(
-          &model, position, ringbuffer, ringbuffer_mask);
+          model, position, ringbuffer, ringbuffer_mask);
     } else {
-      ZopfliCostModelSetFromCommands(&model, position, ringbuffer,
+      ZopfliCostModelSetFromCommands(model, position, ringbuffer,
           ringbuffer_mask, commands, *num_commands - orig_num_commands,
           orig_last_insert_len);
     }
@@ -827,12 +845,13 @@
     *last_insert_len = orig_last_insert_len;
     memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
     *num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
-        ringbuffer_mask, params, gap, dist_cache, &model, num_matches, matches,
+        ringbuffer_mask, params, gap, dist_cache, model, num_matches, matches,
         nodes);
     BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
         last_insert_len, params, commands, num_literals);
   }
-  CleanupZopfliCostModel(m, &model);
+  CleanupZopfliCostModel(m, model);
+  BROTLI_FREE(m, model);
   BROTLI_FREE(m, nodes);
   BROTLI_FREE(m, matches);
   BROTLI_FREE(m, num_matches);
diff --git a/c/enc/block_splitter_inc.h b/c/enc/block_splitter_inc.h
index 169348d..0abc2f5 100644
--- a/c/enc/block_splitter_inc.h
+++ b/c/enc/block_splitter_inc.h
@@ -46,17 +46,17 @@
 static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
                                    size_t stride,
                                    size_t num_histograms,
-                                   HistogramType* histograms) {
+                                   HistogramType* histograms,
+                                   HistogramType* tmp) {
   size_t iters =
       kIterMulForRefining * length / stride + kMinItersForRefining;
   uint32_t seed = 7;
   size_t iter;
   iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
   for (iter = 0; iter < iters; ++iter) {
-    HistogramType sample;
-    FN(HistogramClear)(&sample);
-    FN(RandomSample)(&seed, data, length, stride, &sample);
-    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
+    FN(HistogramClear)(tmp);
+    FN(RandomSample)(&seed, data, length, stride, tmp);
+    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
   }
 }
 
@@ -204,7 +204,8 @@
                               uint8_t* block_ids,
                               BlockSplit* split) {
   uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
-  uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  uint32_t* u32 =
+      BROTLI_ALLOC(m, uint32_t, num_blocks + 4 * HISTOGRAMS_PER_BATCH);
   const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
       (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
   size_t all_histograms_size = 0;
@@ -227,19 +228,23 @@
   static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
   uint32_t* new_index;
   size_t i;
-  uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 };
-  uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 };
-  uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 };
-  uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 };
+  uint32_t* BROTLI_RESTRICT const sizes = u32 + 0 * HISTOGRAMS_PER_BATCH;
+  uint32_t* BROTLI_RESTRICT const new_clusters = u32 + 1 * HISTOGRAMS_PER_BATCH;
+  uint32_t* BROTLI_RESTRICT const symbols = u32 + 2 * HISTOGRAMS_PER_BATCH;
+  uint32_t* BROTLI_RESTRICT const remap = u32 + 3 * HISTOGRAMS_PER_BATCH;
+  uint32_t* BROTLI_RESTRICT const block_lengths =
+      u32 + 4 * HISTOGRAMS_PER_BATCH;
+  /* TODO: move to arena? */
+  HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 2);
 
   if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
-      BROTLI_IS_NULL(block_lengths) || BROTLI_IS_NULL(all_histograms) ||
+      BROTLI_IS_NULL(u32) || BROTLI_IS_NULL(all_histograms) ||
       BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
-      BROTLI_IS_NULL(pairs)) {
+      BROTLI_IS_NULL(pairs) || BROTLI_IS_NULL(tmp)) {
     return;
   }
 
-  memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
+  memset(u32, 0, (num_blocks + 4 * HISTOGRAMS_PER_BATCH) * sizeof(uint32_t));
 
   /* Calculate block lengths (convert repeating values -> series length). */
   {
@@ -273,7 +278,7 @@
       sizes[j] = 1;
     }
     num_new_clusters = FN(BrotliHistogramCombine)(
-        histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
+        histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
         num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
     BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
         all_histograms_capacity, all_histograms_size + num_new_clusters);
@@ -308,7 +313,7 @@
     clusters[i] = (uint32_t)i;
   }
   num_final_clusters = FN(BrotliHistogramCombine)(
-      all_histograms, cluster_size, histogram_symbols, clusters, pairs,
+      all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
       num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
       max_num_pairs);
   BROTLI_FREE(m, pairs);
@@ -322,22 +327,21 @@
   {
     uint32_t next_index = 0;
     for (i = 0; i < num_blocks; ++i) {
-      HistogramType histo;
       size_t j;
       uint32_t best_out;
       double best_bits;
-      FN(HistogramClear)(&histo);
+      FN(HistogramClear)(tmp);
       for (j = 0; j < block_lengths[i]; ++j) {
-        FN(HistogramAdd)(&histo, data[pos++]);
+        FN(HistogramAdd)(tmp, data[pos++]);
       }
       /* Among equally good histograms prefer last used. */
       /* TODO: should we give a block-switch discount here? */
       best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
-      best_bits =
-          FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
+      best_bits = FN(BrotliHistogramBitCostDistance)(
+          tmp, &all_histograms[best_out], tmp + 1);
       for (j = 0; j < num_final_clusters; ++j) {
         const double cur_bits = FN(BrotliHistogramBitCostDistance)(
-            &histo, &all_histograms[clusters[j]]);
+            tmp, &all_histograms[clusters[j]], tmp + 1);
         if (cur_bits < best_bits) {
           best_bits = cur_bits;
           best_out = clusters[j];
@@ -349,6 +353,7 @@
       }
     }
   }
+  BROTLI_FREE(m, tmp);
   BROTLI_FREE(m, clusters);
   BROTLI_FREE(m, all_histograms);
   BROTLI_ENSURE_CAPACITY(
@@ -379,7 +384,7 @@
     split->num_types = (size_t)max_type + 1;
   }
   BROTLI_FREE(m, new_index);
-  BROTLI_FREE(m, block_lengths);
+  BROTLI_FREE(m, u32);
   BROTLI_FREE(m, histogram_symbols);
 }
 
@@ -399,6 +404,7 @@
                                 BlockSplit* split) {
   const size_t data_size = FN(HistogramDataSize)();
   HistogramType* histograms;
+  HistogramType* tmp;
   /* Calculate number of histograms; initial estimate is one histogram per
    * specified amount of symbols; however, this value is capped. */
   size_t num_histograms = length / symbols_per_histogram + 1;
@@ -424,7 +430,8 @@
     split->num_blocks++;
     return;
   }
-  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
+  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms + 1);
+  tmp = histograms + num_histograms;
   if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
   /* Find good entropy codes. */
   FN(InitialEntropyCodes)(data, length,
@@ -432,7 +439,7 @@
                           num_histograms, histograms);
   FN(RefineEntropyCodes)(data, length,
                          sampling_stride_length,
-                         num_histograms, histograms);
+                         num_histograms, histograms, tmp);
   {
     /* Find a good path through literals with the good entropy codes. */
     uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
diff --git a/c/enc/brotli_bit_stream.c b/c/enc/brotli_bit_stream.c
index 9348a97..cbab3e2 100644
--- a/c/enc/brotli_bit_stream.c
+++ b/c/enc/brotli_bit_stream.c
@@ -286,6 +286,7 @@
   /* Write the Huffman tree into the brotli-representation.
      The command alphabet is the largest, so this allocation will fit all
      alphabets. */
+  /* TODO: fix me */
   uint8_t huffman_tree[BROTLI_NUM_COMMAND_SYMBOLS];
   uint8_t huffman_tree_extra_bits[BROTLI_NUM_COMMAND_SYMBOLS];
   size_t huffman_tree_size = 0;
@@ -400,7 +401,7 @@
   return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
 }
 
-void BrotliBuildAndStoreHuffmanTreeFast(MemoryManager* m,
+void BrotliBuildAndStoreHuffmanTreeFast(HuffmanTree* tree,
                                         const uint32_t* histogram,
                                         const size_t histogram_total,
                                         const size_t max_bits,
@@ -432,10 +433,7 @@
 
   memset(depth, 0, length * sizeof(depth[0]));
   {
-    const size_t max_tree_size = 2 * length + 1;
-    HuffmanTree* tree = BROTLI_ALLOC(m, HuffmanTree, max_tree_size);
     uint32_t count_limit;
-    if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tree)) return;
     for (count_limit = 1; ; count_limit *= 2) {
       HuffmanTree* node = tree;
       size_t l;
@@ -500,7 +498,6 @@
         }
       }
     }
-    BROTLI_FREE(m, tree);
   }
   BrotliConvertBitDepthsToSymbols(depth, length, bits);
   if (count <= 4) {
@@ -677,7 +674,14 @@
 
 #define SYMBOL_BITS 9
 
+typedef struct EncodeContextMapArena {
+  uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+} EncodeContextMapArena;
+
 static void EncodeContextMap(MemoryManager* m,
+                             EncodeContextMapArena* arena,
                              const uint32_t* context_map,
                              size_t context_map_size,
                              size_t num_clusters,
@@ -687,10 +691,10 @@
   uint32_t* rle_symbols;
   uint32_t max_run_length_prefix = 6;
   size_t num_rle_symbols = 0;
-  uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint32_t* BROTLI_RESTRICT const histogram = arena->histogram;
   static const uint32_t kSymbolMask = (1u << SYMBOL_BITS) - 1u;
-  uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
-  uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+  uint8_t* BROTLI_RESTRICT const depths = arena->depths;
+  uint16_t* BROTLI_RESTRICT const bits = arena->bits;
 
   StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
 
@@ -703,7 +707,7 @@
   MoveToFrontTransform(context_map, context_map_size, rle_symbols);
   RunLengthCodeZeros(context_map_size, rle_symbols,
                      &num_rle_symbols, &max_run_length_prefix);
-  memset(histogram, 0, sizeof(histogram));
+  memset(histogram, 0, sizeof(arena->histogram));
   for (i = 0; i < num_rle_symbols; ++i) {
     ++histogram[rle_symbols[i] & kSymbolMask];
   }
@@ -787,7 +791,8 @@
 }
 
 /* Stores a context map where the histogram type is always the block type. */
-static void StoreTrivialContextMap(size_t num_types,
+static void StoreTrivialContextMap(EncodeContextMapArena* arena,
+                                   size_t num_types,
                                    size_t context_bits,
                                    HuffmanTree* tree,
                                    size_t* storage_ix,
@@ -797,9 +802,9 @@
     size_t repeat_code = context_bits - 1u;
     size_t repeat_bits = (1u << repeat_code) - 1u;
     size_t alphabet_size = num_types + repeat_code;
-    uint32_t histogram[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
-    uint8_t depths[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
-    uint16_t bits[BROTLI_MAX_CONTEXT_MAP_SYMBOLS];
+    uint32_t* BROTLI_RESTRICT const histogram = arena->histogram;
+    uint8_t* BROTLI_RESTRICT const depths = arena->depths;
+    uint16_t* BROTLI_RESTRICT const bits = arena->bits;
     size_t i;
     memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
     /* Write RLEMAX. */
@@ -932,6 +937,13 @@
   storage[*storage_ix >> 3] = 0;
 }
 
+typedef struct StoreMetablockArena {
+  BlockEncoder literal_enc;
+  BlockEncoder command_enc;
+  BlockEncoder distance_enc;
+  EncodeContextMapArena context_map_arena;
+} StoreMetablockArena;
+
 void BrotliStoreMetaBlock(MemoryManager* m,
     const uint8_t* input, size_t start_pos, size_t length, size_t mask,
     uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
@@ -945,9 +957,10 @@
   uint32_t num_effective_distance_symbols = params->dist.alphabet_size_limit;
   HuffmanTree* tree;
   ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
-  BlockEncoder literal_enc;
-  BlockEncoder command_enc;
-  BlockEncoder distance_enc;
+  StoreMetablockArena* arena = NULL;
+  BlockEncoder* literal_enc = NULL;
+  BlockEncoder* command_enc = NULL;
+  BlockEncoder* distance_enc = NULL;
   const BrotliDistanceParams* dist = &params->dist;
   BROTLI_DCHECK(
       num_effective_distance_symbols <= BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS);
@@ -955,21 +968,24 @@
   StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
 
   tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
-  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tree)) return;
-  InitBlockEncoder(&literal_enc, BROTLI_NUM_LITERAL_SYMBOLS,
+  arena = BROTLI_ALLOC(m, StoreMetablockArena, 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tree) || BROTLI_IS_NULL(arena)) return;
+  literal_enc = &arena->literal_enc;
+  command_enc = &arena->command_enc;
+  distance_enc = &arena->distance_enc;
+  InitBlockEncoder(literal_enc, BROTLI_NUM_LITERAL_SYMBOLS,
       mb->literal_split.num_types, mb->literal_split.types,
       mb->literal_split.lengths, mb->literal_split.num_blocks);
-  InitBlockEncoder(&command_enc, BROTLI_NUM_COMMAND_SYMBOLS,
+  InitBlockEncoder(command_enc, BROTLI_NUM_COMMAND_SYMBOLS,
       mb->command_split.num_types, mb->command_split.types,
       mb->command_split.lengths, mb->command_split.num_blocks);
-  InitBlockEncoder(&distance_enc, num_effective_distance_symbols,
+  InitBlockEncoder(distance_enc, num_effective_distance_symbols,
       mb->distance_split.num_types, mb->distance_split.types,
       mb->distance_split.lengths, mb->distance_split.num_blocks);
 
-  BuildAndStoreBlockSwitchEntropyCodes(&literal_enc, tree, storage_ix, storage);
-  BuildAndStoreBlockSwitchEntropyCodes(&command_enc, tree, storage_ix, storage);
-  BuildAndStoreBlockSwitchEntropyCodes(
-      &distance_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(literal_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(command_enc, tree, storage_ix, storage);
+  BuildAndStoreBlockSwitchEntropyCodes(distance_enc, tree, storage_ix, storage);
 
   BrotliWriteBits(2, dist->distance_postfix_bits, storage_ix, storage);
   BrotliWriteBits(
@@ -980,34 +996,36 @@
   }
 
   if (mb->literal_context_map_size == 0) {
-    StoreTrivialContextMap(mb->literal_histograms_size,
+    StoreTrivialContextMap(
+        &arena->context_map_arena, mb->literal_histograms_size,
         BROTLI_LITERAL_CONTEXT_BITS, tree, storage_ix, storage);
   } else {
-    EncodeContextMap(m,
+    EncodeContextMap(m, &arena->context_map_arena,
         mb->literal_context_map, mb->literal_context_map_size,
         mb->literal_histograms_size, tree, storage_ix, storage);
     if (BROTLI_IS_OOM(m)) return;
   }
 
   if (mb->distance_context_map_size == 0) {
-    StoreTrivialContextMap(mb->distance_histograms_size,
+    StoreTrivialContextMap(
+        &arena->context_map_arena, mb->distance_histograms_size,
         BROTLI_DISTANCE_CONTEXT_BITS, tree, storage_ix, storage);
   } else {
-    EncodeContextMap(m,
+    EncodeContextMap(m, &arena->context_map_arena,
         mb->distance_context_map, mb->distance_context_map_size,
         mb->distance_histograms_size, tree, storage_ix, storage);
     if (BROTLI_IS_OOM(m)) return;
   }
 
-  BuildAndStoreEntropyCodesLiteral(m, &literal_enc, mb->literal_histograms,
+  BuildAndStoreEntropyCodesLiteral(m, literal_enc, mb->literal_histograms,
       mb->literal_histograms_size, BROTLI_NUM_LITERAL_SYMBOLS, tree,
       storage_ix, storage);
   if (BROTLI_IS_OOM(m)) return;
-  BuildAndStoreEntropyCodesCommand(m, &command_enc, mb->command_histograms,
+  BuildAndStoreEntropyCodesCommand(m, command_enc, mb->command_histograms,
       mb->command_histograms_size, BROTLI_NUM_COMMAND_SYMBOLS, tree,
       storage_ix, storage);
   if (BROTLI_IS_OOM(m)) return;
-  BuildAndStoreEntropyCodesDistance(m, &distance_enc, mb->distance_histograms,
+  BuildAndStoreEntropyCodesDistance(m, distance_enc, mb->distance_histograms,
       mb->distance_histograms_size, num_distance_symbols, tree,
       storage_ix, storage);
   if (BROTLI_IS_OOM(m)) return;
@@ -1016,12 +1034,12 @@
   for (i = 0; i < n_commands; ++i) {
     const Command cmd = commands[i];
     size_t cmd_code = cmd.cmd_prefix_;
-    StoreSymbol(&command_enc, cmd_code, storage_ix, storage);
+    StoreSymbol(command_enc, cmd_code, storage_ix, storage);
     StoreCommandExtra(&cmd, storage_ix, storage);
     if (mb->literal_context_map_size == 0) {
       size_t j;
       for (j = cmd.insert_len_; j != 0; --j) {
-        StoreSymbol(&literal_enc, input[pos & mask], storage_ix, storage);
+        StoreSymbol(literal_enc, input[pos & mask], storage_ix, storage);
         ++pos;
       }
     } else {
@@ -1030,7 +1048,7 @@
         size_t context =
             BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
         uint8_t literal = input[pos & mask];
-        StoreSymbolWithContext(&literal_enc, literal, context,
+        StoreSymbolWithContext(literal_enc, literal, context,
             mb->literal_context_map, storage_ix, storage,
             BROTLI_LITERAL_CONTEXT_BITS);
         prev_byte2 = prev_byte;
@@ -1047,10 +1065,10 @@
         uint32_t distnumextra = cmd.dist_prefix_ >> 10;
         uint64_t distextra = cmd.dist_extra_;
         if (mb->distance_context_map_size == 0) {
-          StoreSymbol(&distance_enc, dist_code, storage_ix, storage);
+          StoreSymbol(distance_enc, dist_code, storage_ix, storage);
         } else {
           size_t context = CommandDistanceContext(&cmd);
-          StoreSymbolWithContext(&distance_enc, dist_code, context,
+          StoreSymbolWithContext(distance_enc, dist_code, context,
               mb->distance_context_map, storage_ix, storage,
               BROTLI_DISTANCE_CONTEXT_BITS);
         }
@@ -1058,9 +1076,10 @@
       }
     }
   }
-  CleanupBlockEncoder(m, &distance_enc);
-  CleanupBlockEncoder(m, &command_enc);
-  CleanupBlockEncoder(m, &literal_enc);
+  CleanupBlockEncoder(m, distance_enc);
+  CleanupBlockEncoder(m, command_enc);
+  CleanupBlockEncoder(m, literal_enc);
+  BROTLI_FREE(m, arena);
   if (is_last) {
     JumpToByteBoundary(storage_ix, storage);
   }
@@ -1131,54 +1150,60 @@
   }
 }
 
-void BrotliStoreMetaBlockTrivial(MemoryManager* m,
-    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
-    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
-    const Command* commands, size_t n_commands,
-    size_t* storage_ix, uint8_t* storage) {
+/* TODO: pull alloc/dealloc to caller? */
+typedef struct MetablockArena {
   HistogramLiteral lit_histo;
   HistogramCommand cmd_histo;
   HistogramDistance dist_histo;
+  /* TODO: merge bits and depth? */
   uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
   uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
   uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
   uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
   uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
   uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
-  HuffmanTree* tree;
+  HuffmanTree tree[MAX_HUFFMAN_TREE_SIZE];
+} MetablockArena;
+
+void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+    const uint8_t* input, size_t start_pos, size_t length, size_t mask,
+    BROTLI_BOOL is_last, const BrotliEncoderParams* params,
+    const Command* commands, size_t n_commands,
+    size_t* storage_ix, uint8_t* storage) {
+  MetablockArena* arena = BROTLI_ALLOC(m, MetablockArena, 1);
   uint32_t num_distance_symbols = params->dist.alphabet_size_max;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
 
   StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
 
-  HistogramClearLiteral(&lit_histo);
-  HistogramClearCommand(&cmd_histo);
-  HistogramClearDistance(&dist_histo);
+  HistogramClearLiteral(&arena->lit_histo);
+  HistogramClearCommand(&arena->cmd_histo);
+  HistogramClearDistance(&arena->dist_histo);
 
   BuildHistograms(input, start_pos, mask, commands, n_commands,
-                  &lit_histo, &cmd_histo, &dist_histo);
+                  &arena->lit_histo, &arena->cmd_histo, &arena->dist_histo);
 
   BrotliWriteBits(13, 0, storage_ix, storage);
 
-  tree = BROTLI_ALLOC(m, HuffmanTree, MAX_HUFFMAN_TREE_SIZE);
-  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tree)) return;
-  BuildAndStoreHuffmanTree(lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS,
-                           BROTLI_NUM_LITERAL_SYMBOLS, tree,
-                           lit_depth, lit_bits,
+  BuildAndStoreHuffmanTree(arena->lit_histo.data_, BROTLI_NUM_LITERAL_SYMBOLS,
+                           BROTLI_NUM_LITERAL_SYMBOLS, arena->tree,
+                           arena->lit_depth, arena->lit_bits,
                            storage_ix, storage);
-  BuildAndStoreHuffmanTree(cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS,
-                           BROTLI_NUM_COMMAND_SYMBOLS, tree,
-                           cmd_depth, cmd_bits,
+  BuildAndStoreHuffmanTree(arena->cmd_histo.data_, BROTLI_NUM_COMMAND_SYMBOLS,
+                           BROTLI_NUM_COMMAND_SYMBOLS, arena->tree,
+                           arena->cmd_depth, arena->cmd_bits,
                            storage_ix, storage);
-  BuildAndStoreHuffmanTree(dist_histo.data_, MAX_SIMPLE_DISTANCE_ALPHABET_SIZE,
-                           num_distance_symbols, tree,
-                           dist_depth, dist_bits,
+  BuildAndStoreHuffmanTree(arena->dist_histo.data_,
+                           MAX_SIMPLE_DISTANCE_ALPHABET_SIZE,
+                           num_distance_symbols, arena->tree,
+                           arena->dist_depth, arena->dist_bits,
                            storage_ix, storage);
-  BROTLI_FREE(m, tree);
   StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
-                            n_commands, lit_depth, lit_bits,
-                            cmd_depth, cmd_bits,
-                            dist_depth, dist_bits,
+                            n_commands, arena->lit_depth, arena->lit_bits,
+                            arena->cmd_depth, arena->cmd_bits,
+                            arena->dist_depth, arena->dist_bits,
                             storage_ix, storage);
+  BROTLI_FREE(m, arena);
   if (is_last) {
     JumpToByteBoundary(storage_ix, storage);
   }
@@ -1189,9 +1214,11 @@
     BROTLI_BOOL is_last, const BrotliEncoderParams* params,
     const Command* commands, size_t n_commands,
     size_t* storage_ix, uint8_t* storage) {
+  MetablockArena* arena = BROTLI_ALLOC(m, MetablockArena, 1);
   uint32_t num_distance_symbols = params->dist.alphabet_size_max;
   uint32_t distance_alphabet_bits =
       Log2FloorNonZero(num_distance_symbols - 1) + 1;
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
 
   StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
 
@@ -1202,8 +1229,6 @@
     size_t pos = start_pos;
     size_t num_literals = 0;
     size_t i;
-    uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
-    uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
     for (i = 0; i < n_commands; ++i) {
       const Command cmd = commands[i];
       size_t j;
@@ -1214,61 +1239,50 @@
       num_literals += cmd.insert_len_;
       pos += CommandCopyLen(&cmd);
     }
-    BrotliBuildAndStoreHuffmanTreeFast(m, histogram, num_literals,
+    BrotliBuildAndStoreHuffmanTreeFast(arena->tree, histogram, num_literals,
                                        /* max_bits = */ 8,
-                                       lit_depth, lit_bits,
+                                       arena->lit_depth, arena->lit_bits,
                                        storage_ix, storage);
-    if (BROTLI_IS_OOM(m)) return;
     StoreStaticCommandHuffmanTree(storage_ix, storage);
     StoreStaticDistanceHuffmanTree(storage_ix, storage);
     StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
-                              n_commands, lit_depth, lit_bits,
+                              n_commands, arena->lit_depth, arena->lit_bits,
                               kStaticCommandCodeDepth,
                               kStaticCommandCodeBits,
                               kStaticDistanceCodeDepth,
                               kStaticDistanceCodeBits,
                               storage_ix, storage);
   } else {
-    HistogramLiteral lit_histo;
-    HistogramCommand cmd_histo;
-    HistogramDistance dist_histo;
-    uint8_t lit_depth[BROTLI_NUM_LITERAL_SYMBOLS];
-    uint16_t lit_bits[BROTLI_NUM_LITERAL_SYMBOLS];
-    uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS];
-    uint16_t cmd_bits[BROTLI_NUM_COMMAND_SYMBOLS];
-    uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
-    uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
-    HistogramClearLiteral(&lit_histo);
-    HistogramClearCommand(&cmd_histo);
-    HistogramClearDistance(&dist_histo);
+    HistogramClearLiteral(&arena->lit_histo);
+    HistogramClearCommand(&arena->cmd_histo);
+    HistogramClearDistance(&arena->dist_histo);
     BuildHistograms(input, start_pos, mask, commands, n_commands,
-                    &lit_histo, &cmd_histo, &dist_histo);
-    BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo.data_,
-                                       lit_histo.total_count_,
+                    &arena->lit_histo, &arena->cmd_histo, &arena->dist_histo);
+    BrotliBuildAndStoreHuffmanTreeFast(arena->tree, arena->lit_histo.data_,
+                                       arena->lit_histo.total_count_,
                                        /* max_bits = */ 8,
-                                       lit_depth, lit_bits,
+                                       arena->lit_depth, arena->lit_bits,
                                        storage_ix, storage);
-    if (BROTLI_IS_OOM(m)) return;
-    BrotliBuildAndStoreHuffmanTreeFast(m, cmd_histo.data_,
-                                       cmd_histo.total_count_,
+    BrotliBuildAndStoreHuffmanTreeFast(arena->tree, arena->cmd_histo.data_,
+                                       arena->cmd_histo.total_count_,
                                        /* max_bits = */ 10,
-                                       cmd_depth, cmd_bits,
+                                       arena->cmd_depth, arena->cmd_bits,
                                        storage_ix, storage);
-    if (BROTLI_IS_OOM(m)) return;
-    BrotliBuildAndStoreHuffmanTreeFast(m, dist_histo.data_,
-                                       dist_histo.total_count_,
+    BrotliBuildAndStoreHuffmanTreeFast(arena->tree, arena->dist_histo.data_,
+                                       arena->dist_histo.total_count_,
                                        /* max_bits = */
                                        distance_alphabet_bits,
-                                       dist_depth, dist_bits,
+                                       arena->dist_depth, arena->dist_bits,
                                        storage_ix, storage);
-    if (BROTLI_IS_OOM(m)) return;
     StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
-                              n_commands, lit_depth, lit_bits,
-                              cmd_depth, cmd_bits,
-                              dist_depth, dist_bits,
+                              n_commands, arena->lit_depth, arena->lit_bits,
+                              arena->cmd_depth, arena->cmd_bits,
+                              arena->dist_depth, arena->dist_bits,
                               storage_ix, storage);
   }
 
+  BROTLI_FREE(m, arena);
+
   if (is_last) {
     JumpToByteBoundary(storage_ix, storage);
   }
diff --git a/c/enc/brotli_bit_stream.h b/c/enc/brotli_bit_stream.h
index 2ed703b..70b118e 100644
--- a/c/enc/brotli_bit_stream.h
+++ b/c/enc/brotli_bit_stream.h
@@ -35,7 +35,7 @@
     HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
 
 BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
-    MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
+    HuffmanTree* tree, const uint32_t* histogram, const size_t histogram_total,
     const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
     uint8_t* storage);
 
diff --git a/c/enc/cluster_inc.h b/c/enc/cluster_inc.h
index 3d4f40e..00b3539 100644
--- a/c/enc/cluster_inc.h
+++ b/c/enc/cluster_inc.h
@@ -12,8 +12,8 @@
 /* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
    it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
 BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
-    const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
-    uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
+    const HistogramType* out, HistogramType* tmp, const uint32_t* cluster_size,
+    uint32_t idx1, uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
     size_t* num_pairs) CODE({
   BROTLI_BOOL is_good_pair = BROTLI_FALSE;
   HistogramPair p;
@@ -42,10 +42,10 @@
   } else {
     double threshold = *num_pairs == 0 ? 1e99 :
         BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
-    HistogramType combo = out[idx1];
     double cost_combo;
-    FN(HistogramAddHistogram)(&combo, &out[idx2]);
-    cost_combo = FN(BrotliPopulationCost)(&combo);
+    *tmp = out[idx1];
+    FN(HistogramAddHistogram)(tmp, &out[idx2]);
+    cost_combo = FN(BrotliPopulationCost)(tmp);
     if (cost_combo < threshold - p.cost_diff) {
       p.cost_combo = cost_combo;
       is_good_pair = BROTLI_TRUE;
@@ -68,6 +68,7 @@
 })
 
 BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
+                                                  HistogramType* tmp,
                                                   uint32_t* cluster_size,
                                                   uint32_t* symbols,
                                                   uint32_t* clusters,
@@ -87,7 +88,7 @@
     for (idx1 = 0; idx1 < num_clusters; ++idx1) {
       size_t idx2;
       for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
-        FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
+        FN(BrotliCompareAndPushToQueue)(out, tmp, cluster_size, clusters[idx1],
             clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
       }
     }
@@ -146,8 +147,8 @@
 
     /* Push new pairs formed with the combined histogram to the heap. */
     for (i = 0; i < num_clusters; ++i) {
-      FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
-                                      max_num_pairs, &pairs[0], &num_pairs);
+      FN(BrotliCompareAndPushToQueue)(out, tmp, cluster_size, best_idx1,
+          clusters[i], max_num_pairs, &pairs[0], &num_pairs);
     }
   }
   return num_clusters;
@@ -155,13 +156,14 @@
 
 /* What is the bit cost of moving histogram from cur_symbol to candidate. */
 BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
-    const HistogramType* histogram, const HistogramType* candidate) CODE({
+    const HistogramType* histogram, const HistogramType* candidate,
+    HistogramType* tmp) CODE({
   if (histogram->total_count_ == 0) {
     return 0.0;
   } else {
-    HistogramType tmp = *histogram;
-    FN(HistogramAddHistogram)(&tmp, candidate);
-    return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
+    *tmp = *histogram;
+    FN(HistogramAddHistogram)(tmp, candidate);
+    return FN(BrotliPopulationCost)(tmp) - candidate->bit_cost_;
   }
 })
 
@@ -171,16 +173,16 @@
    Note: we assume that out[]->bit_cost_ is already up-to-date. */
 BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
     size_t in_size, const uint32_t* clusters, size_t num_clusters,
-    HistogramType* out, uint32_t* symbols) CODE({
+    HistogramType* out, HistogramType* tmp, uint32_t* symbols) CODE({
   size_t i;
   for (i = 0; i < in_size; ++i) {
     uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
     double best_bits =
-        FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
+        FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out], tmp);
     size_t j;
     for (j = 0; j < num_clusters; ++j) {
       const double cur_bits =
-          FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
+          FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]], tmp);
       if (cur_bits < best_bits) {
         best_bits = cur_bits;
         best_out = clusters[j];
@@ -257,10 +259,12 @@
   size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
   /* For the first pass of clustering, we allow all pairs. */
   HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
+  /* TODO: move to "persistent" arena? */
+  HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 1);
   size_t i;
 
   if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(cluster_size) ||
-      BROTLI_IS_NULL(clusters) || BROTLI_IS_NULL(pairs)) {
+      BROTLI_IS_NULL(clusters) || BROTLI_IS_NULL(pairs)|| BROTLI_IS_NULL(tmp)) {
     return;
   }
 
@@ -283,7 +287,7 @@
       clusters[num_clusters + j] = (uint32_t)(i + j);
     }
     num_new_clusters =
-        FN(BrotliHistogramCombine)(out, cluster_size,
+        FN(BrotliHistogramCombine)(out, tmp, cluster_size,
                                    &histogram_symbols[i],
                                    &clusters[num_clusters], pairs,
                                    num_to_combine, num_to_combine,
@@ -301,7 +305,7 @@
     if (BROTLI_IS_OOM(m)) return;
 
     /* Collapse similar histograms. */
-    num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
+    num_clusters = FN(BrotliHistogramCombine)(out, tmp, cluster_size,
                                               histogram_symbols, clusters,
                                               pairs, num_clusters, in_size,
                                               max_histograms, max_num_pairs);
@@ -310,7 +314,8 @@
   BROTLI_FREE(m, cluster_size);
   /* Find the optimal map from original histograms to the final ones. */
   FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
-                           out, histogram_symbols);
+                           out, tmp, histogram_symbols);
+  BROTLI_FREE(m, tmp);
   BROTLI_FREE(m, clusters);
   /* Convert the context map to a canonical form. */
   *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
diff --git a/c/enc/compress_fragment.c b/c/enc/compress_fragment.c
index 9e50b20..a6a7e62 100644
--- a/c/enc/compress_fragment.c
+++ b/c/enc/compress_fragment.c
@@ -16,14 +16,12 @@
 
 #include <string.h>  /* memcmp, memcpy, memset */
 
-#include "../common/constants.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./brotli_bit_stream.h"
 #include "./entropy_encode.h"
 #include "./fast_log.h"
 #include "./find_match_length.h"
-#include "./memory.h"
 #include "./write_bits.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
@@ -69,16 +67,18 @@
    and thus have to assign a non-zero depth for each literal.
    Returns estimated compression ratio millibytes/char for encoding given input
    with generated code. */
-static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
+static size_t BuildAndStoreLiteralPrefixCode(BrotliOnePassArena* s,
                                              const uint8_t* input,
                                              const size_t input_size,
                                              uint8_t depths[256],
                                              uint16_t bits[256],
                                              size_t* storage_ix,
                                              uint8_t* storage) {
-  uint32_t histogram[256] = { 0 };
+  uint32_t* BROTLI_RESTRICT const histogram = s->histogram;
   size_t histogram_total;
   size_t i;
+  memset(histogram, 0, sizeof(s->histogram));
+
   if (input_size < (1 << 15)) {
     for (i = 0; i < input_size; ++i) {
       ++histogram[input[i]];
@@ -108,10 +108,9 @@
       histogram_total += adjust;
     }
   }
-  BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
+  BrotliBuildAndStoreHuffmanTreeFast(s->tree, histogram, histogram_total,
                                      /* max_bits = */ 8,
                                      depths, bits, storage_ix, storage);
-  if (BROTLI_IS_OOM(m)) return 0;
   {
     size_t literal_ratio = 0;
     for (i = 0; i < 256; ++i) {
@@ -124,53 +123,56 @@
 
 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
    "bits" based on "histogram" and stores it into the bit stream. */
-static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
-    uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
-    uint8_t* storage) {
-  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
-  HuffmanTree tree[129];
-  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
-  uint16_t cmd_bits[64];
+static void BuildAndStoreCommandPrefixCode(BrotliOnePassArena* s,
+    size_t* storage_ix, uint8_t* storage) {
+  const uint32_t* const histogram = s->cmd_histo;
+  uint8_t* const depth = s->cmd_depth;
+  uint16_t* const bits = s->cmd_bits;
+  uint8_t* BROTLI_RESTRICT const tmp_depth = s->tmp_depth;
+  uint16_t* BROTLI_RESTRICT const tmp_bits = s->tmp_bits;
+  /* TODO: do only once on initialization. */
+  memset(tmp_depth, 0, BROTLI_NUM_COMMAND_SYMBOLS);
 
-  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
-  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  BrotliCreateHuffmanTree(histogram, 64, 15, s->tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, s->tree, &depth[64]);
   /* We have to jump through a few hoops here in order to compute
      the command bits because the symbols are in a different order than in
      the full alphabet. This looks complicated, but having the symbols
      in this order in the command bits saves a few branches in the Emit*
      functions. */
-  memcpy(cmd_depth, depth, 24);
-  memcpy(cmd_depth + 24, depth + 40, 8);
-  memcpy(cmd_depth + 32, depth + 24, 8);
-  memcpy(cmd_depth + 40, depth + 48, 8);
-  memcpy(cmd_depth + 48, depth + 32, 8);
-  memcpy(cmd_depth + 56, depth + 56, 8);
-  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
-  memcpy(bits, cmd_bits, 48);
-  memcpy(bits + 24, cmd_bits + 32, 16);
-  memcpy(bits + 32, cmd_bits + 48, 16);
-  memcpy(bits + 40, cmd_bits + 24, 16);
-  memcpy(bits + 48, cmd_bits + 40, 16);
-  memcpy(bits + 56, cmd_bits + 56, 16);
+  memcpy(tmp_depth, depth, 24);
+  memcpy(tmp_depth + 24, depth + 40, 8);
+  memcpy(tmp_depth + 32, depth + 24, 8);
+  memcpy(tmp_depth + 40, depth + 48, 8);
+  memcpy(tmp_depth + 48, depth + 32, 8);
+  memcpy(tmp_depth + 56, depth + 56, 8);
+  BrotliConvertBitDepthsToSymbols(tmp_depth, 64, tmp_bits);
+  memcpy(bits, tmp_bits, 48);
+  memcpy(bits + 24, tmp_bits + 32, 16);
+  memcpy(bits + 32, tmp_bits + 48, 16);
+  memcpy(bits + 40, tmp_bits + 24, 16);
+  memcpy(bits + 48, tmp_bits + 40, 16);
+  memcpy(bits + 56, tmp_bits + 56, 16);
   BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
   {
     /* Create the bit length array for the full command alphabet. */
     size_t i;
-    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
-    memcpy(cmd_depth, depth, 8);
-    memcpy(cmd_depth + 64, depth + 8, 8);
-    memcpy(cmd_depth + 128, depth + 16, 8);
-    memcpy(cmd_depth + 192, depth + 24, 8);
-    memcpy(cmd_depth + 384, depth + 32, 8);
+    memset(tmp_depth, 0, 64);  /* only 64 first values were used */
+    memcpy(tmp_depth, depth, 8);
+    memcpy(tmp_depth + 64, depth + 8, 8);
+    memcpy(tmp_depth + 128, depth + 16, 8);
+    memcpy(tmp_depth + 192, depth + 24, 8);
+    memcpy(tmp_depth + 384, depth + 32, 8);
     for (i = 0; i < 8; ++i) {
-      cmd_depth[128 + 8 * i] = depth[40 + i];
-      cmd_depth[256 + 8 * i] = depth[48 + i];
-      cmd_depth[448 + 8 * i] = depth[56 + i];
+      tmp_depth[128 + 8 * i] = depth[40 + i];
+      tmp_depth[256 + 8 * i] = depth[48 + i];
+      tmp_depth[448 + 8 * i] = depth[56 + i];
     }
+    /* TODO: could/should full-length machinery be avoided? */
     BrotliStoreHuffmanTree(
-        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
+        tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS, s->tree, storage_ix, storage);
   }
-  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+  BrotliStoreHuffmanTree(&depth[64], 64, s->tree, storage_ix, storage);
 }
 
 /* REQUIRES: insertlen < 6210 */
@@ -369,11 +371,12 @@
   *storage_ix = new_storage_ix;
 }
 
-static BROTLI_BOOL ShouldMergeBlock(
+static BROTLI_BOOL ShouldMergeBlock(BrotliOnePassArena* s,
     const uint8_t* data, size_t len, const uint8_t* depths) {
-  size_t histo[256] = { 0 };
+  uint32_t* BROTLI_RESTRICT const histo = s->histogram;
   static const size_t kSampleRate = 43;
   size_t i;
+  memset(histo, 0, sizeof(s->histogram));
   for (i = 0; i < len; i += kSampleRate) {
     ++histo[data[i]];
   }
@@ -423,11 +426,14 @@
 };
 
 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
-    MemoryManager* m, const uint8_t* input, size_t input_size,
-    BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
-    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
+    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, int* table, size_t table_bits,
     size_t* storage_ix, uint8_t* storage) {
-  uint32_t cmd_histo[128];
+  uint8_t* BROTLI_RESTRICT const cmd_depth = s->cmd_depth;
+  uint16_t* BROTLI_RESTRICT const cmd_bits = s->cmd_bits;
+  uint32_t* BROTLI_RESTRICT const cmd_histo = s->cmd_histo;
+  uint8_t* BROTLI_RESTRICT const lit_depth = s->lit_depth;
+  uint16_t* BROTLI_RESTRICT const lit_bits = s->lit_bits;
   const uint8_t* ip_end;
 
   /* "next_emit" is a pointer to the first byte that is not covered by a
@@ -451,9 +457,6 @@
      we can update it later if we decide to extend this meta-block. */
   size_t mlen_storage_ix = *storage_ix + 3;
 
-  uint8_t lit_depth[256];
-  uint16_t lit_bits[256];
-
   size_t literal_ratio;
 
   const uint8_t* ip;
@@ -466,25 +469,24 @@
   BrotliWriteBits(13, 0, storage_ix, storage);
 
   literal_ratio = BuildAndStoreLiteralPrefixCode(
-      m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
-  if (BROTLI_IS_OOM(m)) return;
+      s, input, block_size, s->lit_depth, s->lit_bits, storage_ix, storage);
 
   {
     /* Store the pre-compressed command and distance prefix codes. */
     size_t i;
-    for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
-      BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
+    for (i = 0; i + 7 < s->cmd_code_numbits; i += 8) {
+      BrotliWriteBits(8, s->cmd_code[i >> 3], storage_ix, storage);
     }
   }
-  BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
-                  storage_ix, storage);
+  BrotliWriteBits(s->cmd_code_numbits & 7,
+                  s->cmd_code[s->cmd_code_numbits >> 3], storage_ix, storage);
 
  emit_commands:
   /* Initialize the command and distance histograms. We will gather
      statistics of command and distance codes during the processing
      of this block and use it to update the command and distance
      prefix codes for the next block. */
-  memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
+  memcpy(s->cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
 
   /* "ip" is the input pointer. */
   ip = input;
@@ -667,7 +669,7 @@
      last insert-only command. */
   if (input_size > 0 &&
       total_block_size + block_size <= (1 << 20) &&
-      ShouldMergeBlock(input, block_size, lit_depth)) {
+      ShouldMergeBlock(s, input, block_size, lit_depth)) {
     BROTLI_DCHECK(total_block_size > (1 << 16));
     /* Update the size of the current meta-block and continue emitting commands.
        We can do this because the current size and the new size both have 5
@@ -711,20 +713,17 @@
     /* No block splits, no contexts. */
     BrotliWriteBits(13, 0, storage_ix, storage);
     literal_ratio = BuildAndStoreLiteralPrefixCode(
-        m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
-    if (BROTLI_IS_OOM(m)) return;
-    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
-                                   storage_ix, storage);
+        s, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+    BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
     goto emit_commands;
   }
 
   if (!is_last) {
     /* If this is not the last block, update the command and distance prefix
        codes for the next block and store the compressed forms. */
-    cmd_code[0] = 0;
-    *cmd_code_numbits = 0;
-    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
-                                   cmd_code_numbits, cmd_code);
+    s->cmd_code[0] = 0;
+    s->cmd_code_numbits = 0;
+    BuildAndStoreCommandPrefixCode(s, &s->cmd_code_numbits, s->cmd_code);
   }
 }
 
@@ -732,20 +731,17 @@
 
 #define BAKE_METHOD_PARAM_(B) \
 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
-    MemoryManager* m, const uint8_t* input, size_t input_size,               \
-    BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128],                 \
-    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,     \
-    size_t* storage_ix, uint8_t* storage) {                                  \
-  BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B,    \
-      cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
+    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,          \
+    BROTLI_BOOL is_last, int* table, size_t* storage_ix, uint8_t* storage) { \
+  BrotliCompressFragmentFastImpl(s, input, input_size, is_last, table, B,    \
+      storage_ix, storage);                                                  \
 }
 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
 #undef BAKE_METHOD_PARAM_
 
 void BrotliCompressFragmentFast(
-    MemoryManager* m, const uint8_t* input, size_t input_size,
-    BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
-    uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
+    BrotliOnePassArena* s, const uint8_t* input, size_t input_size,
+    BROTLI_BOOL is_last, int* table, size_t table_size,
     size_t* storage_ix, uint8_t* storage) {
   const size_t initial_storage_ix = *storage_ix;
   const size_t table_bits = Log2FloorNonZero(table_size);
@@ -762,8 +758,7 @@
 #define CASE_(B)                                                     \
     case B:                                                          \
       BrotliCompressFragmentFastImpl ## B(                           \
-          m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
-          cmd_code_numbits, cmd_code, storage_ix, storage);          \
+          s, input, input_size, is_last, table, storage_ix, storage);\
       break;
     FOR_TABLE_BITS_(CASE_)
 #undef CASE_
diff --git a/c/enc/compress_fragment.h b/c/enc/compress_fragment.h
index 80007f5..a7d6a6a 100644
--- a/c/enc/compress_fragment.h
+++ b/c/enc/compress_fragment.h
@@ -12,14 +12,42 @@
 #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
 #define BROTLI_ENC_COMPRESS_FRAGMENT_H_
 
+#include "../common/constants.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
-#include "./memory.h"
+#include "./entropy_encode.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
+typedef struct BrotliOnePassArena {
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];
+
+  /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
+     used for the next block. The command prefix code is over a smaller alphabet
+     with the following 64 symbols:
+        0 - 15: insert length code 0, copy length code 0 - 15, same distance
+       16 - 39: insert length code 0, copy length code 0 - 23
+       40 - 63: insert length code 0 - 23, copy length code 0
+     Note that symbols 16 and 40 represent the same code in the full alphabet,
+     but we do not use either of them. */
+  uint8_t cmd_depth[128];
+  uint16_t cmd_bits[128];
+  uint32_t cmd_histo[128];
+
+  /* The compressed form of the command and distance prefix codes for the next
+     block. */
+  uint8_t cmd_code[512];
+  size_t cmd_code_numbits;
+
+  HuffmanTree tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
+  uint32_t histogram[256];
+  uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint16_t tmp_bits[64];
+} BrotliOnePassArena;
+
 /* Compresses "input" string to the "*storage" buffer as one or more complete
    meta-blocks, and updates the "*storage_ix" bit position.
 
@@ -42,15 +70,11 @@
    REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
    OUTPUT: maximal copy distance <= |input_size|
    OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
-BROTLI_INTERNAL void BrotliCompressFragmentFast(MemoryManager* m,
+BROTLI_INTERNAL void BrotliCompressFragmentFast(BrotliOnePassArena* s,
                                                 const uint8_t* input,
                                                 size_t input_size,
                                                 BROTLI_BOOL is_last,
                                                 int* table, size_t table_size,
-                                                uint8_t cmd_depth[128],
-                                                uint16_t cmd_bits[128],
-                                                size_t* cmd_code_numbits,
-                                                uint8_t* cmd_code,
                                                 size_t* storage_ix,
                                                 uint8_t* storage);
 
diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c
index ca68b38..4f14843 100644
--- a/c/enc/compress_fragment_two_pass.c
+++ b/c/enc/compress_fragment_two_pass.c
@@ -22,7 +22,6 @@
 #include "./entropy_encode.h"
 #include "./fast_log.h"
 #include "./find_match_length.h"
-#include "./memory.h"
 #include "./write_bits.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
@@ -66,53 +65,53 @@
 
 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
    "bits" based on "histogram" and stores it into the bit stream. */
-static void BuildAndStoreCommandPrefixCode(
-    const uint32_t histogram[128],
-    uint8_t depth[128], uint16_t bits[128],
-    size_t* storage_ix, uint8_t* storage) {
+static void BuildAndStoreCommandPrefixCode(BrotliTwoPassArena* s,
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
   /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
-  HuffmanTree tree[129];
-  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
-  uint16_t cmd_bits[64];
-  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
-  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  /* TODO: initialize once. */
+  memset(s->tmp_depth, 0, sizeof(s->tmp_depth));
+  BrotliCreateHuffmanTree(s->cmd_histo, 64, 15, s->tmp_tree, s->cmd_depth);
+  BrotliCreateHuffmanTree(&s->cmd_histo[64], 64, 14, s->tmp_tree,
+                          &s->cmd_depth[64]);
   /* We have to jump through a few hoops here in order to compute
      the command bits because the symbols are in a different order than in
      the full alphabet. This looks complicated, but having the symbols
      in this order in the command bits saves a few branches in the Emit*
      functions. */
-  memcpy(cmd_depth, depth + 24, 24);
-  memcpy(cmd_depth + 24, depth, 8);
-  memcpy(cmd_depth + 32, depth + 48, 8);
-  memcpy(cmd_depth + 40, depth + 8, 8);
-  memcpy(cmd_depth + 48, depth + 56, 8);
-  memcpy(cmd_depth + 56, depth + 16, 8);
-  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
-  memcpy(bits, cmd_bits + 24, 16);
-  memcpy(bits + 8, cmd_bits + 40, 16);
-  memcpy(bits + 16, cmd_bits + 56, 16);
-  memcpy(bits + 24, cmd_bits, 48);
-  memcpy(bits + 48, cmd_bits + 32, 16);
-  memcpy(bits + 56, cmd_bits + 48, 16);
-  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  memcpy(s->tmp_depth, s->cmd_depth + 24, 24);
+  memcpy(s->tmp_depth + 24, s->cmd_depth, 8);
+  memcpy(s->tmp_depth + 32, s->cmd_depth + 48, 8);
+  memcpy(s->tmp_depth + 40, s->cmd_depth + 8, 8);
+  memcpy(s->tmp_depth + 48, s->cmd_depth + 56, 8);
+  memcpy(s->tmp_depth + 56, s->cmd_depth + 16, 8);
+  BrotliConvertBitDepthsToSymbols(s->tmp_depth, 64, s->tmp_bits);
+  memcpy(s->cmd_bits, s->tmp_bits + 24, 16);
+  memcpy(s->cmd_bits + 8, s->tmp_bits + 40, 16);
+  memcpy(s->cmd_bits + 16, s->tmp_bits + 56, 16);
+  memcpy(s->cmd_bits + 24, s->tmp_bits, 48);
+  memcpy(s->cmd_bits + 48, s->tmp_bits + 32, 16);
+  memcpy(s->cmd_bits + 56, s->tmp_bits + 48, 16);
+  BrotliConvertBitDepthsToSymbols(&s->cmd_depth[64], 64, &s->cmd_bits[64]);
   {
     /* Create the bit length array for the full command alphabet. */
     size_t i;
-    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
-    memcpy(cmd_depth, depth + 24, 8);
-    memcpy(cmd_depth + 64, depth + 32, 8);
-    memcpy(cmd_depth + 128, depth + 40, 8);
-    memcpy(cmd_depth + 192, depth + 48, 8);
-    memcpy(cmd_depth + 384, depth + 56, 8);
+    memset(s->tmp_depth, 0, 64); /* only 64 first values were used */
+    memcpy(s->tmp_depth, s->cmd_depth + 24, 8);
+    memcpy(s->tmp_depth + 64, s->cmd_depth + 32, 8);
+    memcpy(s->tmp_depth + 128, s->cmd_depth + 40, 8);
+    memcpy(s->tmp_depth + 192, s->cmd_depth + 48, 8);
+    memcpy(s->tmp_depth + 384, s->cmd_depth + 56, 8);
     for (i = 0; i < 8; ++i) {
-      cmd_depth[128 + 8 * i] = depth[i];
-      cmd_depth[256 + 8 * i] = depth[8 + i];
-      cmd_depth[448 + 8 * i] = depth[16 + i];
+      s->tmp_depth[128 + 8 * i] = s->cmd_depth[i];
+      s->tmp_depth[256 + 8 * i] = s->cmd_depth[8 + i];
+      s->tmp_depth[448 + 8 * i] = s->cmd_depth[16 + i];
     }
-    BrotliStoreHuffmanTree(
-        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
+    BrotliStoreHuffmanTree(s->tmp_depth, BROTLI_NUM_COMMAND_SYMBOLS,
+                           s->tmp_tree, storage_ix, storage);
   }
-  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+  BrotliStoreHuffmanTree(&s->cmd_depth[64], 64, s->tmp_tree, storage_ix,
+                         storage);
 }
 
 static BROTLI_INLINE void EmitInsertLen(
@@ -452,65 +451,64 @@
   }
 }
 
-static void StoreCommands(MemoryManager* m,
+static void StoreCommands(BrotliTwoPassArena* s,
                           const uint8_t* literals, const size_t num_literals,
                           const uint32_t* commands, const size_t num_commands,
                           size_t* storage_ix, uint8_t* storage) {
   static const uint32_t kNumExtraBits[128] = {
-    0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
-    9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
-    17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
+      0,  0,  0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,
+      6,  7,  8,  9,  10, 12, 14, 24, 0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  2,  2,  3,  3,  4,  4,  0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  7,  8,  9,  10, 24,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  8,
+      9,  9,  10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
+      17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
   };
   static const uint32_t kInsertOffset[24] = {
-    0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
-    1090, 2114, 6210, 22594,
+      0,  1,  2,  3,  4,   5,   6,   8,   10,   14,   18,   26,
+      34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594,
   };
 
-  uint8_t lit_depths[256];
-  uint16_t lit_bits[256];
-  uint32_t lit_histo[256] = { 0 };
-  uint8_t cmd_depths[128] = { 0 };
-  uint16_t cmd_bits[128] = { 0 };
-  uint32_t cmd_histo[128] = { 0 };
   size_t i;
+  memset(s->lit_histo, 0, sizeof(s->lit_histo));
+  /* TODO: is that necessary? */
+  memset(s->cmd_depth, 0, sizeof(s->cmd_depth));
+  /* TODO: is that necessary? */
+  memset(s->cmd_bits, 0, sizeof(s->cmd_bits));
+  memset(s->cmd_histo, 0, sizeof(s->cmd_histo));
   for (i = 0; i < num_literals; ++i) {
-    ++lit_histo[literals[i]];
+    ++s->lit_histo[literals[i]];
   }
-  BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
-                                     /* max_bits = */ 8,
-                                     lit_depths, lit_bits,
-                                     storage_ix, storage);
-  if (BROTLI_IS_OOM(m)) return;
+  BrotliBuildAndStoreHuffmanTreeFast(s->tmp_tree, s->lit_histo, num_literals,
+                                     /* max_bits = */ 8, s->lit_depth,
+                                     s->lit_bits, storage_ix, storage);
 
   for (i = 0; i < num_commands; ++i) {
     const uint32_t code = commands[i] & 0xFF;
     BROTLI_DCHECK(code < 128);
-    ++cmd_histo[code];
+    ++s->cmd_histo[code];
   }
-  cmd_histo[1] += 1;
-  cmd_histo[2] += 1;
-  cmd_histo[64] += 1;
-  cmd_histo[84] += 1;
-  BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
-                                 storage_ix, storage);
+  s->cmd_histo[1] += 1;
+  s->cmd_histo[2] += 1;
+  s->cmd_histo[64] += 1;
+  s->cmd_histo[84] += 1;
+  BuildAndStoreCommandPrefixCode(s, storage_ix, storage);
 
   for (i = 0; i < num_commands; ++i) {
     const uint32_t cmd = commands[i];
     const uint32_t code = cmd & 0xFF;
     const uint32_t extra = cmd >> 8;
     BROTLI_DCHECK(code < 128);
-    BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
+    BrotliWriteBits(s->cmd_depth[code], s->cmd_bits[code], storage_ix, storage);
     BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
     if (code < 24) {
       const uint32_t insert = kInsertOffset[code] + extra;
       uint32_t j;
       for (j = 0; j < insert; ++j) {
         const uint8_t lit = *literals;
-        BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
+        BrotliWriteBits(s->lit_depth[lit], s->lit_bits[lit], storage_ix,
+                        storage);
         ++literals;
       }
     }
@@ -521,19 +519,19 @@
 #define MIN_RATIO 0.98
 #define SAMPLE_RATE 43
 
-static BROTLI_BOOL ShouldCompress(
+static BROTLI_BOOL ShouldCompress(BrotliTwoPassArena* s,
     const uint8_t* input, size_t input_size, size_t num_literals) {
   double corpus_size = (double)input_size;
   if ((double)num_literals < MIN_RATIO * corpus_size) {
     return BROTLI_TRUE;
   } else {
-    uint32_t literal_histo[256] = { 0 };
     const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
     size_t i;
+    memset(s->lit_histo, 0, sizeof(s->lit_histo));
     for (i = 0; i < input_size; i += SAMPLE_RATE) {
-      ++literal_histo[input[i]];
+      ++s->lit_histo[input[i]];
     }
-    return TO_BROTLI_BOOL(BitsEntropy(literal_histo, 256) < max_total_bit_cost);
+    return TO_BROTLI_BOOL(BitsEntropy(s->lit_histo, 256) < max_total_bit_cost);
   }
 }
 
@@ -555,7 +553,7 @@
 }
 
 static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
-    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
     int* table, size_t table_bits, size_t min_match,
     size_t* storage_ix, uint8_t* storage) {
@@ -573,14 +571,13 @@
     CreateCommands(input, block_size, input_size, base_ip, table,
                    table_bits, min_match, &literals, &commands);
     num_literals = (size_t)(literals - literal_buf);
-    if (ShouldCompress(input, block_size, num_literals)) {
+    if (ShouldCompress(s, input, block_size, num_literals)) {
       const size_t num_commands = (size_t)(commands - command_buf);
       BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
       /* No block splits, no contexts. */
       BrotliWriteBits(13, 0, storage_ix, storage);
-      StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
+      StoreCommands(s, literal_buf, num_literals, command_buf, num_commands,
                     storage_ix, storage);
-      if (BROTLI_IS_OOM(m)) return;
     } else {
       /* Since we did not find many backward references and the entropy of
          the data is close to 8 bits, we can simply emit an uncompressed block.
@@ -597,18 +594,18 @@
 
 #define BAKE_METHOD_PARAM_(B)                                                  \
 static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B(            \
-    MemoryManager* m, const uint8_t* input, size_t input_size,                 \
+    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,            \
     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,          \
     int* table, size_t* storage_ix, uint8_t* storage) {                        \
   size_t min_match = (B <= 15) ? 4 : 6;                                        \
-  BrotliCompressFragmentTwoPassImpl(m, input, input_size, is_last, command_buf,\
+  BrotliCompressFragmentTwoPassImpl(s, input, input_size, is_last, command_buf,\
       literal_buf, table, B, min_match, storage_ix, storage);                  \
 }
 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
 #undef BAKE_METHOD_PARAM_
 
 void BrotliCompressFragmentTwoPass(
-    MemoryManager* m, const uint8_t* input, size_t input_size,
+    BrotliTwoPassArena* s, const uint8_t* input, size_t input_size,
     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
     int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
   const size_t initial_storage_ix = *storage_ix;
@@ -617,7 +614,7 @@
 #define CASE_(B)                                      \
     case B:                                           \
       BrotliCompressFragmentTwoPassImpl ## B(         \
-          m, input, input_size, is_last, command_buf, \
+          s, input, input_size, is_last, command_buf, \
           literal_buf, table, storage_ix, storage);   \
       break;
     FOR_TABLE_BITS_(CASE_)
diff --git a/c/enc/compress_fragment_two_pass.h b/c/enc/compress_fragment_two_pass.h
index 928677d..9c082fe 100644
--- a/c/enc/compress_fragment_two_pass.h
+++ b/c/enc/compress_fragment_two_pass.h
@@ -13,16 +13,33 @@
 #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
 #define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
 
+#include "../common/constants.h"
 #include "../common/platform.h"
 #include <brotli/types.h>
-#include "./memory.h"
+#include "./entropy_encode.h"
 
 #if defined(__cplusplus) || defined(c_plusplus)
 extern "C" {
 #endif
 
+/* TODO: turn to macro. */
 static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
 
+typedef struct BrotliTwoPassArena {
+  uint32_t lit_histo[256];
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];
+
+  uint32_t cmd_histo[128];
+  uint8_t cmd_depth[128];
+  uint16_t cmd_bits[128];
+
+  /* BuildAndStoreCommandPrefixCode */
+  HuffmanTree tmp_tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
+  uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
+  uint16_t tmp_bits[64];
+} BrotliTwoPassArena;
+
 /* Compresses "input" string to the "*storage" buffer as one or more complete
    meta-blocks, and updates the "*storage_ix" bit position.
 
@@ -36,7 +53,7 @@
    REQUIRES: "table_size" is a power of two
    OUTPUT: maximal copy distance <= |input_size|
    OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
-BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(MemoryManager* m,
+BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(BrotliTwoPassArena* s,
                                                    const uint8_t* input,
                                                    size_t input_size,
                                                    BROTLI_BOOL is_last,
diff --git a/c/enc/encode.c b/c/enc/encode.c
index 0c49c64..23b1d3b 100644
--- a/c/enc/encode.c
+++ b/c/enc/encode.c
@@ -95,20 +95,10 @@
   int small_table_[1 << 10];  /* 4KiB */
   int* large_table_;          /* Allocated only when needed */
   size_t large_table_size_;
-  /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
-     used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command
-     prefix code is over a smaller alphabet with the following 64 symbols:
-        0 - 15: insert length code 0, copy length code 0 - 15, same distance
-       16 - 39: insert length code 0, copy length code 0 - 23
-       40 - 63: insert length code 0 - 23, copy length code 0
-     Note that symbols 16 and 40 represent the same code in the full alphabet,
-     but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */
-  uint8_t cmd_depths_[128];
-  uint16_t cmd_bits_[128];
-  /* The compressed form of the command and distance prefix codes for the next
-     block in FAST_ONE_PASS_COMPRESSION_QUALITY. */
-  uint8_t cmd_code_[512];
-  size_t cmd_code_numbits_;
+
+  BrotliOnePassArena* one_pass_arena_;
+  BrotliTwoPassArena* two_pass_arena_;
+
   /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
   uint32_t* command_buf_;
   uint8_t* literal_buf_;
@@ -283,11 +273,9 @@
   }
 }
 
+/* TODO: move to compress_fragment.c? */
 /* Initializes the command and distance prefix codes for the first block. */
-static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
-                                   uint16_t cmd_bits[128],
-                                   uint8_t cmd_code[512],
-                                   size_t* cmd_code_numbits) {
+static void InitCommandPrefixCodes(BrotliOnePassArena* s) {
   static const uint8_t kDefaultCommandDepths[128] = {
     0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
     0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
@@ -320,13 +308,13 @@
     0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
   };
   static const size_t kDefaultCommandCodeNumBits = 448;
-  COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
-  COPY_ARRAY(cmd_bits, kDefaultCommandBits);
+  COPY_ARRAY(s->cmd_depth, kDefaultCommandDepths);
+  COPY_ARRAY(s->cmd_bits, kDefaultCommandBits);
 
   /* Initialize the pre-compressed form of the command and distance prefix
      codes. */
-  COPY_ARRAY(cmd_code, kDefaultCommandCode);
-  *cmd_code_numbits = kDefaultCommandCodeNumBits;
+  COPY_ARRAY(s->cmd_code, kDefaultCommandCode);
+  s->cmd_code_numbits = kDefaultCommandCodeNumBits;
 }
 
 /* Decide about the context map based on the ability of the prediction
@@ -401,7 +389,8 @@
    first 5 bits of literals. */
 static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
     size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
-    size_t* num_literal_contexts, const uint32_t** literal_context_map) {
+    size_t* num_literal_contexts, const uint32_t** literal_context_map,
+    uint32_t* arena) {
   static const uint32_t kStaticContextMapComplexUTF8[64] = {
     11, 11, 12, 12, /* 0 special */
     0, 0, 0, 0, /* 4 lf */
@@ -426,16 +415,17 @@
     return BROTLI_FALSE;
   } else {
     const size_t end_pos = start_pos + length;
-    /* To make entropy calculations faster and to fit on the stack, we collect
-       histograms over the 5 most significant bits of literals. One histogram
+    /* To make entropy calculations faster, we collect histograms
+       over the 5 most significant bits of literals. One histogram
        without context and 13 additional histograms for each context value. */
-    uint32_t combined_histo[32] = { 0 };
-    uint32_t context_histo[13][32] = { { 0 } };
+    uint32_t* BROTLI_RESTRICT const combined_histo = arena;
+    uint32_t* BROTLI_RESTRICT const context_histo = arena + 32;
     uint32_t total = 0;
     double entropy[3];
     size_t dummy;
     size_t i;
     ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
+    memset(arena, 0, sizeof(arena[0]) * 32 * 14);
     for (; start_pos + 64 <= end_pos; start_pos += 4096) {
       const size_t stride_end_pos = start_pos + 64;
       uint8_t prev2 = input[start_pos & mask];
@@ -449,7 +439,7 @@
             BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
         ++total;
         ++combined_histo[literal >> 3];
-        ++context_histo[context][literal >> 3];
+        ++context_histo[(context << 5) + (literal >> 3)];
         prev2 = prev1;
         prev1 = literal;
       }
@@ -457,7 +447,7 @@
     entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
     entropy[2] = 0;
     for (i = 0; i < 13; ++i) {
-      entropy[2] += ShannonEntropy(&context_histo[i][0], 32, &dummy);
+      entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &dummy);
     }
     entropy[0] = 1.0 / (double)total;
     entropy[1] *= entropy[0];
@@ -481,19 +471,21 @@
 
 static void DecideOverLiteralContextModeling(const uint8_t* input,
     size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
-    size_t* num_literal_contexts, const uint32_t** literal_context_map) {
+    size_t* num_literal_contexts, const uint32_t** literal_context_map,
+    uint32_t* arena) {
   if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
     return;
   } else if (ShouldUseComplexStaticContextMap(
       input, start_pos, length, mask, quality, size_hint,
-      num_literal_contexts, literal_context_map)) {
+      num_literal_contexts, literal_context_map, arena)) {
     /* Context map was already set, nothing else to do. */
   } else {
     /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
        UTF8 data faster we only examine 64 byte long strides at every 4kB
        intervals. */
     const size_t end_pos = start_pos + length;
-    uint32_t bigram_prefix_histo[9] = { 0 };
+    uint32_t* BROTLI_RESTRICT const bigram_prefix_histo = arena;
+    memset(bigram_prefix_histo, 0, sizeof(arena[0]) * 9);
     for (; start_pos + 64 <= end_pos; start_pos += 4096) {
       static const int lut[4] = { 0, 0, 1, 2 };
       const size_t stride_end_pos = start_pos + 64;
@@ -613,10 +605,14 @@
       size_t num_literal_contexts = 1;
       const uint32_t* literal_context_map = NULL;
       if (!params->disable_literal_context_modeling) {
+        /* TODO: pull to higher level and reuse. */
+        uint32_t* arena = BROTLI_ALLOC(m, uint32_t, 14 * 32);
+        if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
         DecideOverLiteralContextModeling(
             data, wrapped_last_flush_pos, bytes, mask, params->quality,
             params->size_hint, &num_literal_contexts,
-            &literal_context_map);
+            &literal_context_map, arena);
+        BROTLI_FREE(m, arena);
       }
       BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
           prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
@@ -681,12 +677,13 @@
     }
   }
 
-  BrotliInitDistanceParams(
-      params, distance_postfix_bits, num_direct_distance_codes);
+  BrotliInitDistanceParams(&params->dist, distance_postfix_bits,
+                           num_direct_distance_codes, params->large_window);
 }
 
 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
-  if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
+  MemoryManager* m = &s->memory_manager_;
+  if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
   if (s->is_initialized_) return BROTLI_TRUE;
 
   s->last_bytes_bits_ = 0;
@@ -728,8 +725,12 @@
   }
 
   if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
-    InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_,
-                           s->cmd_code_, &s->cmd_code_numbits_);
+    s->one_pass_arena_ = BROTLI_ALLOC(m, BrotliOnePassArena, 1);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
+    InitCommandPrefixCodes(s->one_pass_arena_);
+  } else if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
+    s->two_pass_arena_ = BROTLI_ALLOC(m, BrotliTwoPassArena, 1);
+    if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
   }
 
   s->is_initialized_ = BROTLI_TRUE;
@@ -769,7 +770,8 @@
   HasherInit(&s->hasher_);
   s->large_table_ = NULL;
   s->large_table_size_ = 0;
-  s->cmd_code_numbits_ = 0;
+  s->one_pass_arena_ = NULL;
+  s->two_pass_arena_ = NULL;
   s->command_buf_ = NULL;
   s->literal_buf_ = NULL;
   s->next_out_ = NULL;
@@ -796,13 +798,9 @@
 
 BrotliEncoderState* BrotliEncoderCreateInstance(
     brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
-  BrotliEncoderState* state = 0;
-  if (!alloc_func && !free_func) {
-    state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));
-  } else if (alloc_func && free_func) {
-    state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState));
-  }
-  if (state == 0) {
+  BrotliEncoderState* state = (BrotliEncoderState*)BrotliBootstrapAlloc(
+      sizeof(BrotliEncoderState), alloc_func, free_func, opaque);
+  if (state == NULL) {
     /* BROTLI_DUMP(); */
     return 0;
   }
@@ -823,6 +821,8 @@
   RingBufferFree(m, &s->ringbuffer_);
   DestroyHasher(m, &s->hasher_);
   BROTLI_FREE(m, s->large_table_);
+  BROTLI_FREE(m, s->one_pass_arena_);
+  BROTLI_FREE(m, s->two_pass_arena_);
   BROTLI_FREE(m, s->command_buf_);
   BROTLI_FREE(m, s->literal_buf_);
 }
@@ -832,11 +832,8 @@
   if (!state) {
     return;
   } else {
-    MemoryManager* m = &state->memory_manager_;
-    brotli_free_func free_func = m->free_func;
-    void* opaque = m->opaque;
     BrotliEncoderCleanupState(state);
-    free_func(opaque, state);
+    BrotliBootstrapFree(state, &state->memory_manager_);
   }
 }
 
@@ -1012,16 +1009,14 @@
     if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
     if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
       BrotliCompressFragmentFast(
-          m, &data[wrapped_last_processed_pos & mask],
+          s->one_pass_arena_, &data[wrapped_last_processed_pos & mask],
           bytes, is_last,
           table, table_size,
-          s->cmd_depths_, s->cmd_bits_,
-          &s->cmd_code_numbits_, s->cmd_code_,
           &storage_ix, storage);
       if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
     } else {
       BrotliCompressFragmentTwoPass(
-          m, &data[wrapped_last_processed_pos & mask],
+          s->two_pass_arena_, &data[wrapped_last_processed_pos & mask],
           bytes, is_last,
           s->command_buf_, s->literal_buf_,
           table, table_size,
@@ -1204,8 +1199,8 @@
 static BROTLI_BOOL BrotliCompressBufferQuality10(
     int lgwin, size_t input_size, const uint8_t* input_buffer,
     size_t* encoded_size, uint8_t* encoded_buffer) {
-  MemoryManager memory_manager;
-  MemoryManager* m = &memory_manager;
+  MemoryManager* m =
+      (MemoryManager*)BrotliBootstrapAlloc(sizeof(MemoryManager), 0, 0, 0);
 
   const size_t mask = BROTLI_SIZE_MAX >> 1;
   int dist_cache[4] = { 4, 11, 15, 16 };
@@ -1219,8 +1214,6 @@
   const size_t hasher_eff_size = BROTLI_MIN(size_t,
       input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
 
-  BrotliEncoderParams params;
-
   const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
   size_t max_block_size;
   const size_t max_metablock_size = (size_t)1 << lgmetablock;
@@ -1230,25 +1223,35 @@
   uint8_t prev_byte = 0;
   uint8_t prev_byte2 = 0;
 
-  Hasher hasher;
-  HasherInit(&hasher);
+  BrotliEncoderParams* params = NULL;
+  Hasher* hasher = NULL;
 
-  BrotliEncoderInitParams(&params);
-  params.quality = 10;
-  params.lgwin = lgwin;
-  if (lgwin > BROTLI_MAX_WINDOW_BITS) {
-    params.large_window = BROTLI_TRUE;
-  }
-  SanitizeParams(&params);
-  params.lgblock = ComputeLgBlock(&params);
-  ChooseDistanceParams(&params);
-  max_block_size = (size_t)1 << params.lgblock;
-
+  if (m == NULL) return BROTLI_FALSE;
   BrotliInitMemoryManager(m, 0, 0, 0);
+  params = BROTLI_ALLOC(m, BrotliEncoderParams, 2);
+  hasher = BROTLI_ALLOC(m, Hasher, 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(params) || BROTLI_IS_NULL(hasher)) {
+    goto oom;
+  }
+  BrotliEncoderInitParams(params);
+  HasherInit(hasher);
+
+  params->quality = 10;
+  params->lgwin = lgwin;
+  if (lgwin > BROTLI_MAX_WINDOW_BITS) {
+    params->large_window = BROTLI_TRUE;
+  }
+  SanitizeParams(params);
+  params->lgblock = ComputeLgBlock(params);
+  ChooseDistanceParams(params);
+  max_block_size = (size_t)1 << params->lgblock;
+
+  /* Since default static dictionary is used we assume that
+   * params->quality < params->dictionary.max_quality. */
 
   BROTLI_DCHECK(input_size <= mask + 1);
-  EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);
-  InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
+  EncodeWindowBits(lgwin, params->large_window, &last_bytes, &last_bytes_bits);
+  InitOrStitchToPreviousBlock(m, hasher, input_buffer, mask, params,
       0, hasher_eff_size, BROTLI_TRUE);
   if (BROTLI_IS_OOM(m)) goto oom;
 
@@ -1267,7 +1270,7 @@
     uint8_t* storage;
     size_t storage_ix;
 
-    ContextType literal_context_mode = ChooseContextMode(&params,
+    ContextType literal_context_mode = ChooseContextMode(params,
         input_buffer, metablock_start, mask, metablock_end - metablock_start);
     ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
 
@@ -1280,10 +1283,10 @@
       size_t new_cmd_alloc_size;
       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(nodes)) goto oom;
       BrotliInitZopfliNodes(nodes, block_size + 1);
-      StitchToPreviousBlockH10(&hasher.privat._H10, block_size, block_start,
+      StitchToPreviousBlockH10(&hasher->privat._H10, block_size, block_start,
                                input_buffer, mask);
       path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
-          input_buffer, mask, literal_context_lut, &params, dist_cache, &hasher,
+          input_buffer, mask, literal_context_lut, params, dist_cache, hasher,
           nodes);
       if (BROTLI_IS_OOM(m)) goto oom;
       /* We allocate a command buffer in the first iteration of this loop that
@@ -1307,7 +1310,7 @@
         commands = new_commands;
       }
       BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
-          &last_insert_len, &params, &commands[num_commands], &num_literals);
+          &last_insert_len, params, &commands[num_commands], &num_literals);
       num_commands += path_size;
       block_start += block_size;
       metablock_size += block_size;
@@ -1349,10 +1352,11 @@
                                        &storage_ix, storage);
     } else {
       MetaBlockSplit mb;
-      BrotliEncoderParams block_params = params;
+      BrotliEncoderParams* block_params = params + 1;
+      *block_params = *params;  /* shallow copy */
       InitMetaBlockSplit(&mb);
       BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
-                           &block_params,
+                           block_params,
                            prev_byte, prev_byte2,
                            commands, num_commands,
                            literal_context_mode,
@@ -1362,7 +1366,7 @@
         /* The number of distance symbols effectively used for distance
            histograms. It might be less than distance alphabet size
            for "Large Window Brotli" (32-bit). */
-        BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
+        BrotliOptimizeHistograms(block_params->dist.alphabet_size_limit, &mb);
       }
       storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(storage)) goto oom;
@@ -1371,7 +1375,7 @@
       BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
                            mask, prev_byte, prev_byte2,
                            is_last,
-                           &block_params,
+                           block_params,
                            literal_context_mode,
                            commands, num_commands,
                            &mb,
@@ -1415,11 +1419,15 @@
   }
 
   *encoded_size = total_out_size;
-  DestroyHasher(m, &hasher);
+  DestroyHasher(m, hasher);
+  BROTLI_FREE(m, hasher);
+  BROTLI_FREE(m, params);
+  BrotliBootstrapFree(m, m);
   return ok;
 
 oom:
   BrotliWipeOutMemoryManager(m);
+  BrotliBootstrapFree(m, m);
   return BROTLI_FALSE;
 }
 
@@ -1677,13 +1685,12 @@
       if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
 
       if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
-        BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table,
-            table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_,
-            s->cmd_code_, &storage_ix, storage);
+        BrotliCompressFragmentFast(s->one_pass_arena_, *next_in, block_size,
+            is_last, table, table_size, &storage_ix, storage);
         if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
       } else {
-        BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last,
-            command_buf, literal_buf, table, table_size,
+        BrotliCompressFragmentTwoPass(s->two_pass_arena_, *next_in, block_size,
+            is_last, command_buf, literal_buf, table, table_size,
             &storage_ix, storage);
         if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
       }
diff --git a/c/enc/hash.h b/c/enc/hash.h
index afe3f1a..114b621 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -445,13 +445,15 @@
     size_t alloc_size[4] = {0};
     size_t i;
     ChooseHasher(params, &params->hasher);
+    hasher->common.params = params->hasher;
+    hasher->common.dict_num_lookups = 0;
+    hasher->common.dict_num_matches = 0;
     HasherSize(params, one_shot, input_size, alloc_size);
     for (i = 0; i < 4; ++i) {
       if (alloc_size[i] == 0) continue;
       hasher->common.extra[i] = BROTLI_ALLOC(m, uint8_t, alloc_size[i]);
       if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(hasher->common.extra[i])) return;
     }
-    hasher->common.params = params->hasher;
     switch (hasher->common.params.type) {
 #define INITIALIZE_(N)                        \
       case N:                                 \
@@ -479,10 +481,6 @@
 #undef PREPARE_
       default: break;
     }
-    if (position == 0) {
-      hasher->common.dict_num_lookups = 0;
-      hasher->common.dict_num_matches = 0;
-    }
     hasher->common.is_prepared_ = BROTLI_TRUE;
   }
 }
diff --git a/c/enc/hash_composite_inc.h b/c/enc/hash_composite_inc.h
index 5664e6b..0941550 100644
--- a/c/enc/hash_composite_inc.h
+++ b/c/enc/hash_composite_inc.h
@@ -82,7 +82,7 @@
   size_t alloc_size_b[4] = {0};
   FN_A(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_a);
   FN_B(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_b);
-  // Should never happen.
+  /* Should never happen. */
   if (alloc_size_a[2] != 0 || alloc_size_a[3] != 0) exit(EXIT_FAILURE);
   if (alloc_size_b[2] != 0 || alloc_size_b[3] != 0) exit(EXIT_FAILURE);
   alloc_size[0] = alloc_size_a[0];
diff --git a/c/enc/literal_cost.c b/c/enc/literal_cost.c
index c231100..f410974 100644
--- a/c/enc/literal_cost.c
+++ b/c/enc/literal_cost.c
@@ -9,6 +9,8 @@
 
 #include "./literal_cost.h"
 
+#include <string.h>  /* memset */
+
 #include "../common/platform.h"
 #include <brotli/types.h>
 #include "./fast_log.h"
@@ -54,22 +56,23 @@
 }
 
 static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
-                                            const uint8_t* data, float* cost) {
+                                            const uint8_t* data,
+                                            size_t* histogram, float* cost) {
   /* max_utf8 is 0 (normal ASCII single byte modeling),
      1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
   const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
-  size_t histogram[3][256] = { { 0 } };
   size_t window_half = 495;
   size_t in_window = BROTLI_MIN(size_t, window_half, len);
   size_t in_window_utf8[3] = { 0 };
-
   size_t i;
+  memset(histogram, 0, 3 * 256 * sizeof(histogram[0]));
+
   {  /* Bootstrap histograms. */
     size_t last_c = 0;
     size_t utf8_pos = 0;
     for (i = 0; i < in_window; ++i) {
       size_t c = data[(pos + i) & mask];
-      ++histogram[utf8_pos][c];
+      ++histogram[256 * utf8_pos + c];
       ++in_window_utf8[utf8_pos];
       utf8_pos = UTF8Position(last_c, c, max_utf8);
       last_c = c;
@@ -85,7 +88,7 @@
       size_t last_c =
           i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
       size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
-      --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
+      --histogram[256 * utf8_pos2 + data[(pos + i - window_half) & mask]];
       --in_window_utf8[utf8_pos2];
     }
     if (i + window_half < len) {
@@ -93,7 +96,7 @@
       size_t c = data[(pos + i + window_half - 1) & mask];
       size_t last_c = data[(pos + i + window_half - 2) & mask];
       size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
-      ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
+      ++histogram[256 * utf8_pos2 + data[(pos + i + window_half) & mask]];
       ++in_window_utf8[utf8_pos2];
     }
     {
@@ -101,7 +104,7 @@
       size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
       size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
       size_t masked_pos = (pos + i) & mask;
-      size_t histo = histogram[utf8_pos][data[masked_pos]];
+      size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
       double lit_cost;
       if (histo == 0) {
         histo = 1;
@@ -125,17 +128,18 @@
 }
 
 void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
-                                       const uint8_t* data, float* cost) {
+                                       const uint8_t* data,
+                                       size_t* histogram, float* cost) {
   if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
-    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
+    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, histogram, cost);
     return;
   } else {
-    size_t histogram[256] = { 0 };
     size_t window_half = 2000;
     size_t in_window = BROTLI_MIN(size_t, window_half, len);
+    size_t i;
+    memset(histogram, 0, 256 * sizeof(histogram[0]));
 
     /* Bootstrap histogram. */
-    size_t i;
     for (i = 0; i < in_window; ++i) {
       ++histogram[data[(pos + i) & mask]];
     }
diff --git a/c/enc/literal_cost.h b/c/enc/literal_cost.h
index 8f53f39..efc8e17 100644
--- a/c/enc/literal_cost.h
+++ b/c/enc/literal_cost.h
@@ -21,7 +21,8 @@
    ring-buffer (data, mask) will take entropy coded and writes these estimates
    to the cost[0..len) array. */
 BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
-    size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost);
+    size_t pos, size_t len, size_t mask, const uint8_t* data, size_t* histogram,
+    float* cost);
 
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
diff --git a/c/enc/memory.c b/c/enc/memory.c
index f6ed7e3..7663d31 100644
--- a/c/enc/memory.c
+++ b/c/enc/memory.c
@@ -165,6 +165,28 @@
 
 #endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
 
+void* BrotliBootstrapAlloc(size_t size,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
+  if (!alloc_func && !free_func) {
+    return malloc(size);
+  } else if (alloc_func && free_func) {
+    return alloc_func(opaque, size);
+  }
+  return NULL;
+}
+
+void BrotliBootstrapFree(void* address, MemoryManager* m) {
+  if (!address) {
+    /* Should not happen! */
+    return;
+  } else {
+    /* Copy values, as those would be freed. */
+    brotli_free_func free_func = m->free_func;
+    void* opaque = m->opaque;
+    free_func(opaque, address);
+  }
+}
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
diff --git a/c/enc/memory.h b/c/enc/memory.h
index 832e7b2..13b23d4 100644
--- a/c/enc/memory.h
+++ b/c/enc/memory.h
@@ -107,6 +107,12 @@
   A[(S) - 1] = (V);                                       \
 }
 
+/* "Bootstrap" allocations are not tracked by memory manager; should be used
+   only to allocate MemoryManager itself (or structure containing it). */
+BROTLI_INTERNAL void* BrotliBootstrapAlloc(size_t size,
+    brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
+BROTLI_INTERNAL void BrotliBootstrapFree(void* address, MemoryManager* m);
+
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
 #endif
diff --git a/c/enc/metablock.c b/c/enc/metablock.c
index 5aa4d4f..18a1d93 100644
--- a/c/enc/metablock.c
+++ b/c/enc/metablock.c
@@ -25,9 +25,8 @@
 extern "C" {
 #endif
 
-void BrotliInitDistanceParams(BrotliEncoderParams* params,
-    uint32_t npostfix, uint32_t ndirect) {
-  BrotliDistanceParams* dist_params = &params->dist;
+void BrotliInitDistanceParams(BrotliDistanceParams* dist_params,
+    uint32_t npostfix, uint32_t ndirect, BROTLI_BOOL large_window) {
   uint32_t alphabet_size_max;
   uint32_t alphabet_size_limit;
   uint32_t max_distance;
@@ -41,7 +40,7 @@
   max_distance = ndirect + (1U << (BROTLI_MAX_DISTANCE_BITS + npostfix + 2)) -
       (1U << (npostfix + 2));
 
-  if (params->large_window) {
+  if (large_window) {
     BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
         BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
     alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
@@ -83,14 +82,14 @@
                                        size_t num_commands,
                                        const BrotliDistanceParams* orig_params,
                                        const BrotliDistanceParams* new_params,
-                                       double* cost) {
+                                       double* cost,
+                                       HistogramDistance* tmp) {
   size_t i;
   BROTLI_BOOL equal_params = BROTLI_FALSE;
   uint16_t dist_prefix;
   uint32_t dist_extra;
   double extra_bits = 0.0;
-  HistogramDistance histo;
-  HistogramClearDistance(&histo);
+  HistogramClearDistance(tmp);
 
   if (orig_params->distance_postfix_bits == new_params->distance_postfix_bits &&
       orig_params->num_direct_distance_codes ==
@@ -114,12 +113,12 @@
                                  &dist_prefix,
                                  &dist_extra);
       }
-      HistogramAddDistance(&histo, dist_prefix & 0x3FF);
+      HistogramAddDistance(tmp, dist_prefix & 0x3FF);
       extra_bits += dist_prefix >> 10;
     }
   }
 
-  *cost = BrotliPopulationCostDistance(&histo) + extra_bits;
+  *cost = BrotliPopulationCostDistance(tmp) + extra_bits;
   return BROTLI_TRUE;
 }
 
@@ -147,43 +146,46 @@
   uint32_t ndirect_msb = 0;
   BROTLI_BOOL check_orig = BROTLI_TRUE;
   double best_dist_cost = 1e99;
-  BrotliEncoderParams orig_params = *params;
-  BrotliEncoderParams new_params = *params;
+  BrotliDistanceParams orig_params = params->dist;
+  BrotliDistanceParams new_params = params->dist;
+  HistogramDistance* tmp = BROTLI_ALLOC(m, HistogramDistance, 1);
+
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp)) return;
 
   for (npostfix = 0; npostfix <= BROTLI_MAX_NPOSTFIX; npostfix++) {
     for (; ndirect_msb < 16; ndirect_msb++) {
       uint32_t ndirect = ndirect_msb << npostfix;
       BROTLI_BOOL skip;
       double dist_cost;
-      BrotliInitDistanceParams(&new_params, npostfix, ndirect);
-      if (npostfix == orig_params.dist.distance_postfix_bits &&
-          ndirect == orig_params.dist.num_direct_distance_codes) {
+      BrotliInitDistanceParams(&new_params, npostfix, ndirect,
+                               params->large_window);
+      if (npostfix == orig_params.distance_postfix_bits &&
+          ndirect == orig_params.num_direct_distance_codes) {
         check_orig = BROTLI_FALSE;
       }
       skip = !ComputeDistanceCost(
-          cmds, num_commands,
-          &orig_params.dist, &new_params.dist, &dist_cost);
+          cmds, num_commands, &orig_params, &new_params, &dist_cost, tmp);
       if (skip || (dist_cost > best_dist_cost)) {
         break;
       }
       best_dist_cost = dist_cost;
-      params->dist = new_params.dist;
+      params->dist = new_params;
     }
     if (ndirect_msb > 0) ndirect_msb--;
     ndirect_msb /= 2;
   }
   if (check_orig) {
     double dist_cost;
-    ComputeDistanceCost(cmds, num_commands,
-                        &orig_params.dist, &orig_params.dist, &dist_cost);
+    ComputeDistanceCost(cmds, num_commands, &orig_params, &orig_params,
+                        &dist_cost, tmp);
     if (dist_cost < best_dist_cost) {
       /* NB: currently unused; uncomment when more param tuning is added. */
       /* best_dist_cost = dist_cost; */
-      params->dist = orig_params.dist;
+      params->dist = orig_params;
     }
   }
-  RecomputeDistancePrefixes(cmds, num_commands,
-                            &orig_params.dist, &params->dist);
+  BROTLI_FREE(m, tmp);
+  RecomputeDistancePrefixes(cmds, num_commands, &orig_params, &params->dist);
 
   BrotliSplitBlock(m, cmds, num_commands,
                    ringbuffer, pos, mask, params,
@@ -535,17 +537,21 @@
   }
 }
 
-static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
-    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
-    uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
-    const size_t num_contexts, const uint32_t* static_context_map,
-    const Command* commands, size_t n_commands, MetaBlockSplit* mb) {
+typedef struct GreedyMetablockArena {
   union {
     BlockSplitterLiteral plain;
     ContextBlockSplitter ctx;
   } lit_blocks;
   BlockSplitterCommand cmd_blocks;
   BlockSplitterDistance dist_blocks;
+} GreedyMetablockArena;
+
+static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
+    MemoryManager* m, GreedyMetablockArena* arena, const uint8_t* ringbuffer,
+    size_t pos, size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    ContextLut literal_context_lut, const size_t num_contexts,
+    const uint32_t* static_context_map, const Command* commands,
+    size_t n_commands, MetaBlockSplit* mb) {
   size_t num_literals = 0;
   size_t i;
   for (i = 0; i < n_commands; ++i) {
@@ -553,20 +559,20 @@
   }
 
   if (num_contexts == 1) {
-    InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
+    InitBlockSplitterLiteral(m, &arena->lit_blocks.plain, 256, 512, 400.0,
         num_literals, &mb->literal_split, &mb->literal_histograms,
         &mb->literal_histograms_size);
   } else {
-    InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
-        num_literals, &mb->literal_split, &mb->literal_histograms,
+    InitContextBlockSplitter(m, &arena->lit_blocks.ctx, 256, num_contexts, 512,
+        400.0, num_literals, &mb->literal_split, &mb->literal_histograms,
         &mb->literal_histograms_size);
   }
   if (BROTLI_IS_OOM(m)) return;
-  InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
-      500.0, n_commands, &mb->command_split, &mb->command_histograms,
+  InitBlockSplitterCommand(m, &arena->cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS,
+      1024, 500.0, n_commands, &mb->command_split, &mb->command_histograms,
       &mb->command_histograms_size);
   if (BROTLI_IS_OOM(m)) return;
-  InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
+  InitBlockSplitterDistance(m, &arena->dist_blocks, 64, 512, 100.0, n_commands,
       &mb->distance_split, &mb->distance_histograms,
       &mb->distance_histograms_size);
   if (BROTLI_IS_OOM(m)) return;
@@ -574,15 +580,15 @@
   for (i = 0; i < n_commands; ++i) {
     const Command cmd = commands[i];
     size_t j;
-    BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
+    BlockSplitterAddSymbolCommand(&arena->cmd_blocks, cmd.cmd_prefix_);
     for (j = cmd.insert_len_; j != 0; --j) {
       uint8_t literal = ringbuffer[pos & mask];
       if (num_contexts == 1) {
-        BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
+        BlockSplitterAddSymbolLiteral(&arena->lit_blocks.plain, literal);
       } else {
         size_t context =
             BROTLI_CONTEXT(prev_byte, prev_byte2, literal_context_lut);
-        ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
+        ContextBlockSplitterAddSymbol(&arena->lit_blocks.ctx, m, literal,
                                       static_context_map[context]);
         if (BROTLI_IS_OOM(m)) return;
       }
@@ -595,21 +601,24 @@
       prev_byte2 = ringbuffer[(pos - 2) & mask];
       prev_byte = ringbuffer[(pos - 1) & mask];
       if (cmd.cmd_prefix_ >= 128) {
-        BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_ & 0x3FF);
+        BlockSplitterAddSymbolDistance(
+            &arena->dist_blocks, cmd.dist_prefix_ & 0x3FF);
       }
     }
   }
 
   if (num_contexts == 1) {
     BlockSplitterFinishBlockLiteral(
-        &lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
+        &arena->lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
   } else {
     ContextBlockSplitterFinishBlock(
-        &lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
+        &arena->lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
     if (BROTLI_IS_OOM(m)) return;
   }
-  BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
-  BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
+  BlockSplitterFinishBlockCommand(
+      &arena->cmd_blocks, /* is_final = */ BROTLI_TRUE);
+  BlockSplitterFinishBlockDistance(
+      &arena->dist_blocks, /* is_final = */ BROTLI_TRUE);
 
   if (num_contexts > 1) {
     MapStaticContexts(m, num_contexts, static_context_map, mb);
@@ -628,14 +637,18 @@
                                 const Command* commands,
                                 size_t n_commands,
                                 MetaBlockSplit* mb) {
+  GreedyMetablockArena* arena = BROTLI_ALLOC(m, GreedyMetablockArena, 1);
+  if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(arena)) return;
   if (num_contexts == 1) {
-    BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
-        prev_byte2, literal_context_lut, 1, NULL, commands, n_commands, mb);
+    BrotliBuildMetaBlockGreedyInternal(m, arena, ringbuffer, pos, mask,
+        prev_byte, prev_byte2, literal_context_lut, 1, NULL, commands,
+        n_commands, mb);
   } else {
-    BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
-        prev_byte2, literal_context_lut, num_contexts, static_context_map,
-        commands, n_commands, mb);
+    BrotliBuildMetaBlockGreedyInternal(m, arena, ringbuffer, pos, mask,
+        prev_byte, prev_byte2, literal_context_lut, num_contexts,
+        static_context_map, commands, n_commands, mb);
   }
+  BROTLI_FREE(m, arena);
 }
 
 void BrotliOptimizeHistograms(uint32_t num_distance_codes,
diff --git a/c/enc/metablock.h b/c/enc/metablock.h
index 334a79a..4eb40f5 100644
--- a/c/enc/metablock.h
+++ b/c/enc/metablock.h
@@ -95,8 +95,8 @@
 BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
                                               MetaBlockSplit* mb);
 
-BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params,
-    uint32_t npostfix, uint32_t ndirect);
+BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliDistanceParams* params,
+    uint32_t npostfix, uint32_t ndirect, BROTLI_BOOL large_window);
 
 #if defined(__cplusplus) || defined(c_plusplus)
 }  /* extern "C" */
diff --git a/c/enc/metablock_inc.h b/c/enc/metablock_inc.h
index ed507ef..f939386 100644
--- a/c/enc/metablock_inc.h
+++ b/c/enc/metablock_inc.h
@@ -27,6 +27,9 @@
   HistogramType* histograms_;  /* not owned */
   size_t* histograms_size_;  /* not owned */
 
+  /* Temporary storage for BlockSplitterFinishBlock. */
+  HistogramType combined_histo[2];
+
   /* The number of symbols that we want to collect before deciding on whether
      or not to merge the block with a previous one or emit a new block. */
   size_t target_block_size_;
@@ -104,17 +107,16 @@
   } else if (self->block_size_ > 0) {
     double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
                                  self->alphabet_size_);
-    HistogramType combined_histo[2];
     double combined_entropy[2];
     double diff[2];
     size_t j;
     for (j = 0; j < 2; ++j) {
       size_t last_histogram_ix = self->last_histogram_ix_[j];
-      combined_histo[j] = histograms[self->curr_histogram_ix_];
-      FN(HistogramAddHistogram)(&combined_histo[j],
+      self->combined_histo[j] = histograms[self->curr_histogram_ix_];
+      FN(HistogramAddHistogram)(&self->combined_histo[j],
           &histograms[last_histogram_ix]);
       combined_entropy[j] = BitsEntropy(
-          &combined_histo[j].data_[0], self->alphabet_size_);
+          &self->combined_histo[j].data_[0], self->alphabet_size_);
       diff[j] = combined_entropy[j] - entropy - last_entropy[j];
     }
 
@@ -141,7 +143,7 @@
       split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
       split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
       BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
-      histograms[self->last_histogram_ix_[0]] = combined_histo[1];
+      histograms[self->last_histogram_ix_[0]] = self->combined_histo[1];
       last_entropy[1] = last_entropy[0];
       last_entropy[0] = combined_entropy[1];
       ++self->num_blocks_;
@@ -152,7 +154,7 @@
     } else {
       /* Combine this block with last block. */
       split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
-      histograms[self->last_histogram_ix_[0]] = combined_histo[0];
+      histograms[self->last_histogram_ix_[0]] = self->combined_histo[0];
       last_entropy[0] = combined_entropy[0];
       if (split->num_types == 1) {
         last_entropy[1] = last_entropy[0];
diff --git a/java/org/brotli/wrapper/dec/Decoder.java b/java/org/brotli/wrapper/dec/Decoder.java
index 71b77e0..018317d 100644
--- a/java/org/brotli/wrapper/dec/Decoder.java
+++ b/java/org/brotli/wrapper/dec/Decoder.java
@@ -156,6 +156,15 @@
             totalOutputSize += chunk.length;
             break;
 
+          case NEEDS_MORE_INPUT:
+            // Give decoder a chance to process the remaining of the buffered byte.
+            decoder.push(0);
+            // If decoder still needs input, this means that stream is truncated.
+            if (decoder.getStatus() == DecoderJNI.Status.NEEDS_MORE_INPUT) {
+              throw new IOException("corrupted input");
+            }
+            break;
+
           default:
             throw new IOException("corrupted input");
         }
diff --git a/js/decode_test.js b/js/decode_test.js
index cd1cbcf..bbeb541 100644
--- a/js/decode_test.js
+++ b/js/decode_test.js
@@ -20,22 +20,25 @@
  * @return {!Int8Array}
  */
 function stringToBytes(str) {
-  var out = new Int8Array(str.length);
-  for (var i = 0; i < str.length; ++i) out[i] = str.charCodeAt(i);
+  let out = new Int8Array(str.length);
+  for (let i = 0; i < str.length; ++i) out[i] = str.charCodeAt(i);
   return out;
 }
 
 testSuite({
   testMetadata() {
-    assertEquals("", bytesToString(BrotliDecode(Int8Array.from([1, 11, 0, 42, 3]))));
+    assertEquals(
+        '', bytesToString(BrotliDecode(Int8Array.from([1, 11, 0, 42, 3]))));
   },
 
   testCompoundDictionary() {
-    var txt = "kot lomom kolol slona\n";
-    var dictionary = stringToBytes(txt);
-    var compressed = [0xa1, 0xa8, 0x00, 0xc0, 0x2f, 0x01, 0x10, 0xc4, 0x44, 0x09, 0x00];
+    const txt = 'kot lomom kolol slona\n';
+    const dictionary = stringToBytes(txt);
+    const compressed =
+        [0xa1, 0xa8, 0x00, 0xc0, 0x2f, 0x01, 0x10, 0xc4, 0x44, 0x09, 0x00];
     assertEquals(txt.length, compressed.length * 2);
-    var options = {"customDictionary": dictionary};
-    assertEquals(txt, bytesToString(BrotliDecode(Int8Array.from(compressed), options)));
+    const options = {'customDictionary': dictionary};
+    assertEquals(
+        txt, bytesToString(BrotliDecode(Int8Array.from(compressed), options)));
   }
 });
diff --git a/js/polyfill.js b/js/polyfill.js
index 069b1bc..9ff59cc 100644
--- a/js/polyfill.js
+++ b/js/polyfill.js
@@ -5,8 +5,8 @@
       if (!obj['length']) {
         return new this(0);
       }
-      var typed_array = new this(obj.length);
-      for(var i = 0; i < typed_array.length; i++) {
+      let typed_array = new this(obj.length);
+      for (let i = 0; i < typed_array.length; i++) {
         typed_array[i] = obj[i];
       }
       return typed_array;
@@ -16,12 +16,12 @@
 
 if (!Array.prototype.copyWithin) {
   Array.prototype.copyWithin = function(target, start, end) {
-    var O = Object(this);
-    var len = O.length >>> 0;
-    var to = target | 0;
-    var from = start | 0;
-    var count = Math.min(Math.min(end | 0, len) - from, len - to);
-    var direction = 1;
+    let O = Object(this);
+    let len = O.length >>> 0;
+    let to = target | 0;
+    let from = start | 0;
+    let count = Math.min(Math.min(end | 0, len) - from, len - to);
+    let direction = 1;
     if (from < to && to < (from + count)) {
       direction = -1;
       from += count - 1;
@@ -41,8 +41,8 @@
   Object.defineProperty(Array.prototype, 'fill', {
     value: function(value, start, end) {
       end = end | 0;
-      var O = Object(this);
-      var k = start | 0;
+      let O = Object(this);
+      let k = start | 0;
       while (k < end) {
         O[k] = value;
         k++;
@@ -66,9 +66,8 @@
 
 if (!Int8Array.prototype.slice) {
   Object.defineProperty(Int8Array.prototype, 'slice', {
-    value: function (begin, end)
-     {
-        return new Int8Array(Array.prototype.slice.call(this, begin, end));
-     }
+    value: function(begin, end) {
+      return new Int8Array(Array.prototype.slice.call(this, begin, end));
+    }
   });
 }