Introduce std/bzip2 decoder.flush_fast method
Compared to the recent commit 623290ef "Cache std/bzip2 Huffman tree
lookup":
name old speed new speed delta
wuffs_bzip2_decode_10k/clang11 60.5MB/s ± 0% 60.5MB/s ± 0% ~ (p=1.000 n=5+5)
wuffs_bzip2_decode_100k/clang11 46.7MB/s ± 1% 45.7MB/s ± 1% -2.05% (p=0.008 n=5+5)
wuffs_bzip2_decode_10k/gcc10 58.7MB/s ± 0% 58.5MB/s ± 0% ~ (p=0.222 n=5+5)
wuffs_bzip2_decode_100k/gcc10 46.9MB/s ± 0% 46.3MB/s ± 0% -1.46% (p=0.008 n=5+5)
diff --git a/release/c/wuffs-unsupported-snapshot.c b/release/c/wuffs-unsupported-snapshot.c
index 381e4b5..b5df137 100644
--- a/release/c/wuffs-unsupported-snapshot.c
+++ b/release/c/wuffs-unsupported-snapshot.c
@@ -6941,7 +6941,11 @@
uint32_t f_decode_huffman_ticks;
uint32_t f_decode_huffman_section;
uint32_t f_decode_huffman_run_shift;
+ uint32_t f_flush_pointer;
+ uint32_t f_flush_repeat_count;
+ uint8_t f_flush_prev;
uint32_t f_final_checksum_have;
+ uint32_t f_block_checksum_have;
uint32_t f_block_checksum_want;
uint32_t f_original_pointer;
uint32_t f_num_symbols;
@@ -6952,7 +6956,7 @@
uint32_t p_transform_io[1];
uint32_t p_prepare_block[1];
uint32_t p_read_code_lengths[1];
- uint32_t p_flush_block[1];
+ uint32_t p_flush_slow[1];
uint32_t p_decode_huffman_slow[1];
} private_impl;
@@ -6980,14 +6984,14 @@
uint32_t v_code_length;
} s_read_code_lengths[1];
struct {
- uint32_t v_i;
- uint32_t v_n;
- uint32_t v_repeat_count;
+ uint32_t v_flush_pointer;
+ uint32_t v_flush_repeat_count;
+ uint8_t v_flush_prev;
uint32_t v_block_checksum_have;
- uint8_t v_prev;
+ uint32_t v_block_size;
uint8_t v_curr;
uint64_t scratch;
- } s_flush_block[1];
+ } s_flush_slow[1];
struct {
uint32_t v_node_index;
} s_decode_huffman_slow[1];
@@ -24938,8 +24942,13 @@
wuffs_bzip2__decoder__invert_bwt(
wuffs_bzip2__decoder* self);
+static wuffs_base__empty_struct
+wuffs_bzip2__decoder__flush_fast(
+ wuffs_bzip2__decoder* self,
+ wuffs_base__io_buffer* a_dst);
+
static wuffs_base__status
-wuffs_bzip2__decoder__flush_block(
+wuffs_bzip2__decoder__flush_slow(
wuffs_bzip2__decoder* self,
wuffs_base__io_buffer* a_dst);
@@ -25246,11 +25255,32 @@
}
label__1__break:;
wuffs_bzip2__decoder__invert_bwt(self);
- WUFFS_BASE__COROUTINE_SUSPENSION_POINT(8);
- status = wuffs_bzip2__decoder__flush_block(self, a_dst);
- if (status.repr) {
- goto suspend;
+ self->private_impl.f_block_checksum_have = 4294967295;
+ if (self->private_impl.f_original_pointer >= self->private_impl.f_block_size) {
+ status = wuffs_base__make_status(wuffs_bzip2__error__bad_block_length);
+ goto exit;
}
+ self->private_impl.f_flush_pointer = (self->private_data.f_bwt[self->private_impl.f_original_pointer] >> 12);
+ self->private_impl.f_flush_repeat_count = 0;
+ self->private_impl.f_flush_prev = 0;
+ while (self->private_impl.f_block_size > 0) {
+ wuffs_bzip2__decoder__flush_fast(self, a_dst);
+ if (self->private_impl.f_block_size <= 0) {
+ goto label__2__break;
+ }
+ WUFFS_BASE__COROUTINE_SUSPENSION_POINT(8);
+ status = wuffs_bzip2__decoder__flush_slow(self, a_dst);
+ if (status.repr) {
+ goto suspend;
+ }
+ }
+ label__2__break:;
+ self->private_impl.f_block_checksum_have ^= 4294967295;
+ if (self->private_impl.f_block_checksum_have != self->private_impl.f_block_checksum_want) {
+ status = wuffs_base__make_status(wuffs_bzip2__error__bad_checksum);
+ goto exit;
+ }
+ self->private_impl.f_final_checksum_have = (self->private_impl.f_block_checksum_have ^ ((self->private_impl.f_final_checksum_have >> 31) | ((uint32_t)(self->private_impl.f_final_checksum_have << 1))));
}
label__0__break:;
v_final_checksum_want = 0;
@@ -25903,20 +25933,18 @@
return wuffs_base__make_empty_struct();
}
-// -------- func bzip2.decoder.flush_block
+// -------- func bzip2.decoder.flush_fast
-static wuffs_base__status
-wuffs_bzip2__decoder__flush_block(
+static wuffs_base__empty_struct
+wuffs_bzip2__decoder__flush_fast(
wuffs_bzip2__decoder* self,
wuffs_base__io_buffer* a_dst) {
- wuffs_base__status status = wuffs_base__make_status(NULL);
-
- uint32_t v_i = 0;
- uint32_t v_n = 0;
- uint32_t v_entry = 0;
- uint32_t v_repeat_count = 0;
+ uint32_t v_flush_pointer = 0;
+ uint32_t v_flush_repeat_count = 0;
+ uint8_t v_flush_prev = 0;
uint32_t v_block_checksum_have = 0;
- uint8_t v_prev = 0;
+ uint32_t v_block_size = 0;
+ uint32_t v_entry = 0;
uint8_t v_curr = 0;
uint8_t* iop_a_dst = NULL;
@@ -25933,89 +25961,163 @@
}
}
- uint32_t coro_susp_point = self->private_impl.p_flush_block[0];
+ v_flush_pointer = self->private_impl.f_flush_pointer;
+ v_flush_repeat_count = self->private_impl.f_flush_repeat_count;
+ v_flush_prev = self->private_impl.f_flush_prev;
+ v_block_checksum_have = self->private_impl.f_block_checksum_have;
+ v_block_size = self->private_impl.f_block_size;
+ while ((v_block_size > 0) && (((uint64_t)(io2_a_dst - iop_a_dst)) > 255)) {
+ v_entry = self->private_data.f_bwt[v_flush_pointer];
+ v_curr = ((uint8_t)((v_entry & 255)));
+ v_flush_pointer = (v_entry >> 12);
+ if (v_flush_repeat_count >= 4) {
+ v_flush_repeat_count = ((uint32_t)(v_curr));
+ while (v_flush_repeat_count > 0) {
+ v_block_checksum_have = (WUFFS_BZIP2__REV_CRC32_TABLE[(((uint8_t)((v_block_checksum_have >> 24))) ^ v_flush_prev)] ^ ((uint32_t)(v_block_checksum_have << 8)));
+ if (((uint64_t)(io2_a_dst - iop_a_dst)) > 0) {
+ (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, v_flush_prev), iop_a_dst += 1);
+ }
+ v_flush_repeat_count -= 1;
+ }
+ v_flush_repeat_count = 0;
+ } else if (v_curr != v_flush_prev) {
+ v_flush_repeat_count = 1;
+ v_block_checksum_have = (WUFFS_BZIP2__REV_CRC32_TABLE[(((uint8_t)((v_block_checksum_have >> 24))) ^ v_curr)] ^ ((uint32_t)(v_block_checksum_have << 8)));
+ (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, v_curr), iop_a_dst += 1);
+ } else {
+ v_flush_repeat_count += 1;
+ v_block_checksum_have = (WUFFS_BZIP2__REV_CRC32_TABLE[(((uint8_t)((v_block_checksum_have >> 24))) ^ v_curr)] ^ ((uint32_t)(v_block_checksum_have << 8)));
+ (wuffs_base__poke_u8be__no_bounds_check(iop_a_dst, v_curr), iop_a_dst += 1);
+ }
+ v_flush_prev = v_curr;
+ v_block_size -= 1;
+ }
+ self->private_impl.f_flush_pointer = v_flush_pointer;
+ self->private_impl.f_flush_repeat_count = v_flush_repeat_count;
+ self->private_impl.f_flush_prev = v_flush_prev;
+ self->private_impl.f_block_checksum_have = v_block_checksum_have;
+ if (v_block_size <= 900000) {
+ self->private_impl.f_block_size = v_block_size;
+ }
+ if (a_dst) {
+ a_dst->meta.wi = ((size_t)(iop_a_dst - a_dst->data.ptr));
+ }
+
+ return wuffs_base__make_empty_struct();
+}
+
+// -------- func bzip2.decoder.flush_slow
+
+static wuffs_base__status
+wuffs_bzip2__decoder__flush_slow(
+ wuffs_bzip2__decoder* self,
+ wuffs_base__io_buffer* a_dst) {
+ wuffs_base__status status = wuffs_base__make_status(NULL);
+
+ uint32_t v_flush_pointer = 0;
+ uint32_t v_flush_repeat_count = 0;
+ uint8_t v_flush_prev = 0;
+ uint32_t v_block_checksum_have = 0;
+ uint32_t v_block_size = 0;
+ uint32_t v_entry = 0;
+ uint8_t v_curr = 0;
+
+ uint8_t* iop_a_dst = NULL;
+ uint8_t* io0_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ uint8_t* io1_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ uint8_t* io2_a_dst WUFFS_BASE__POTENTIALLY_UNUSED = NULL;
+ if (a_dst) {
+ io0_a_dst = a_dst->data.ptr;
+ io1_a_dst = io0_a_dst + a_dst->meta.wi;
+ iop_a_dst = io1_a_dst;
+ io2_a_dst = io0_a_dst + a_dst->data.len;
+ if (a_dst->meta.closed) {
+ io2_a_dst = iop_a_dst;
+ }
+ }
+
+ uint32_t coro_susp_point = self->private_impl.p_flush_slow[0];
if (coro_susp_point) {
- v_i = self->private_data.s_flush_block[0].v_i;
- v_n = self->private_data.s_flush_block[0].v_n;
- v_repeat_count = self->private_data.s_flush_block[0].v_repeat_count;
- v_block_checksum_have = self->private_data.s_flush_block[0].v_block_checksum_have;
- v_prev = self->private_data.s_flush_block[0].v_prev;
- v_curr = self->private_data.s_flush_block[0].v_curr;
+ v_flush_pointer = self->private_data.s_flush_slow[0].v_flush_pointer;
+ v_flush_repeat_count = self->private_data.s_flush_slow[0].v_flush_repeat_count;
+ v_flush_prev = self->private_data.s_flush_slow[0].v_flush_prev;
+ v_block_checksum_have = self->private_data.s_flush_slow[0].v_block_checksum_have;
+ v_block_size = self->private_data.s_flush_slow[0].v_block_size;
+ v_curr = self->private_data.s_flush_slow[0].v_curr;
}
switch (coro_susp_point) {
WUFFS_BASE__COROUTINE_SUSPENSION_POINT_0;
- if (self->private_impl.f_original_pointer >= self->private_impl.f_block_size) {
- status = wuffs_base__make_status(wuffs_bzip2__error__bad_block_length);
- goto exit;
- }
- v_i = (self->private_data.f_bwt[self->private_impl.f_original_pointer] >> 12);
- v_block_checksum_have = 4294967295;
- v_n = 0;
- while (v_n < self->private_impl.f_block_size) {
- v_entry = self->private_data.f_bwt[v_i];
+ v_flush_pointer = self->private_impl.f_flush_pointer;
+ v_flush_repeat_count = self->private_impl.f_flush_repeat_count;
+ v_flush_prev = self->private_impl.f_flush_prev;
+ v_block_checksum_have = self->private_impl.f_block_checksum_have;
+ v_block_size = self->private_impl.f_block_size;
+ while ((v_block_size > 0) && ! (self->private_impl.p_flush_slow[0] != 0)) {
+ v_entry = self->private_data.f_bwt[v_flush_pointer];
v_curr = ((uint8_t)((v_entry & 255)));
- v_i = (v_entry >> 12);
- if (v_repeat_count >= 4) {
- v_repeat_count = ((uint32_t)(v_curr));
- while (v_repeat_count > 0) {
- v_block_checksum_have = (WUFFS_BZIP2__REV_CRC32_TABLE[(((uint8_t)((v_block_checksum_have >> 24))) ^ v_prev)] ^ ((uint32_t)(v_block_checksum_have << 8)));
- self->private_data.s_flush_block[0].scratch = v_prev;
+ v_flush_pointer = (v_entry >> 12);
+ if (v_flush_repeat_count >= 4) {
+ v_flush_repeat_count = ((uint32_t)(v_curr));
+ while (v_flush_repeat_count > 0) {
+ v_block_checksum_have = (WUFFS_BZIP2__REV_CRC32_TABLE[(((uint8_t)((v_block_checksum_have >> 24))) ^ v_flush_prev)] ^ ((uint32_t)(v_block_checksum_have << 8)));
+ self->private_data.s_flush_slow[0].scratch = v_flush_prev;
WUFFS_BASE__COROUTINE_SUSPENSION_POINT(1);
if (iop_a_dst == io2_a_dst) {
status = wuffs_base__make_status(wuffs_base__suspension__short_write);
goto suspend;
}
- *iop_a_dst++ = ((uint8_t)(self->private_data.s_flush_block[0].scratch));
- v_repeat_count -= 1;
+ *iop_a_dst++ = ((uint8_t)(self->private_data.s_flush_slow[0].scratch));
+ v_flush_repeat_count -= 1;
}
- v_repeat_count = 0;
- } else if (v_curr != v_prev) {
- v_repeat_count = 1;
+ v_flush_repeat_count = 0;
+ } else if (v_curr != v_flush_prev) {
+ v_flush_repeat_count = 1;
v_block_checksum_have = (WUFFS_BZIP2__REV_CRC32_TABLE[(((uint8_t)((v_block_checksum_have >> 24))) ^ v_curr)] ^ ((uint32_t)(v_block_checksum_have << 8)));
- self->private_data.s_flush_block[0].scratch = v_curr;
+ self->private_data.s_flush_slow[0].scratch = v_curr;
WUFFS_BASE__COROUTINE_SUSPENSION_POINT(2);
if (iop_a_dst == io2_a_dst) {
status = wuffs_base__make_status(wuffs_base__suspension__short_write);
goto suspend;
}
- *iop_a_dst++ = ((uint8_t)(self->private_data.s_flush_block[0].scratch));
+ *iop_a_dst++ = ((uint8_t)(self->private_data.s_flush_slow[0].scratch));
} else {
- v_repeat_count += 1;
+ v_flush_repeat_count += 1;
v_block_checksum_have = (WUFFS_BZIP2__REV_CRC32_TABLE[(((uint8_t)((v_block_checksum_have >> 24))) ^ v_curr)] ^ ((uint32_t)(v_block_checksum_have << 8)));
- self->private_data.s_flush_block[0].scratch = v_curr;
+ self->private_data.s_flush_slow[0].scratch = v_curr;
WUFFS_BASE__COROUTINE_SUSPENSION_POINT(3);
if (iop_a_dst == io2_a_dst) {
status = wuffs_base__make_status(wuffs_base__suspension__short_write);
goto suspend;
}
- *iop_a_dst++ = ((uint8_t)(self->private_data.s_flush_block[0].scratch));
+ *iop_a_dst++ = ((uint8_t)(self->private_data.s_flush_slow[0].scratch));
}
- v_prev = v_curr;
- v_n += 1;
+ v_flush_prev = v_curr;
+ v_block_size -= 1;
}
- v_block_checksum_have ^= 4294967295;
- if (v_block_checksum_have != self->private_impl.f_block_checksum_want) {
- status = wuffs_base__make_status(wuffs_bzip2__error__bad_checksum);
- goto exit;
+ self->private_impl.f_flush_pointer = v_flush_pointer;
+ self->private_impl.f_flush_repeat_count = v_flush_repeat_count;
+ self->private_impl.f_flush_prev = v_flush_prev;
+ self->private_impl.f_block_checksum_have = v_block_checksum_have;
+ if (v_block_size <= 900000) {
+ self->private_impl.f_block_size = v_block_size;
}
- self->private_impl.f_final_checksum_have = (v_block_checksum_have ^ ((self->private_impl.f_final_checksum_have >> 31) | ((uint32_t)(self->private_impl.f_final_checksum_have << 1))));
goto ok;
ok:
- self->private_impl.p_flush_block[0] = 0;
+ self->private_impl.p_flush_slow[0] = 0;
goto exit;
}
goto suspend;
suspend:
- self->private_impl.p_flush_block[0] = wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
- self->private_data.s_flush_block[0].v_i = v_i;
- self->private_data.s_flush_block[0].v_n = v_n;
- self->private_data.s_flush_block[0].v_repeat_count = v_repeat_count;
- self->private_data.s_flush_block[0].v_block_checksum_have = v_block_checksum_have;
- self->private_data.s_flush_block[0].v_prev = v_prev;
- self->private_data.s_flush_block[0].v_curr = v_curr;
+ self->private_impl.p_flush_slow[0] = wuffs_base__status__is_suspension(&status) ? coro_susp_point : 0;
+ self->private_data.s_flush_slow[0].v_flush_pointer = v_flush_pointer;
+ self->private_data.s_flush_slow[0].v_flush_repeat_count = v_flush_repeat_count;
+ self->private_data.s_flush_slow[0].v_flush_prev = v_flush_prev;
+ self->private_data.s_flush_slow[0].v_block_checksum_have = v_block_checksum_have;
+ self->private_data.s_flush_slow[0].v_block_size = v_block_size;
+ self->private_data.s_flush_slow[0].v_curr = v_curr;
goto exit;
exit:
diff --git a/std/bzip2/decode_bzip2.wuffs b/std/bzip2/decode_bzip2.wuffs
index 64350c6..889c4dd 100644
--- a/std/bzip2/decode_bzip2.wuffs
+++ b/std/bzip2/decode_bzip2.wuffs
@@ -43,7 +43,12 @@
decode_huffman_section : base.u32,
decode_huffman_run_shift : base.u32[..= 23],
+ flush_pointer : base.u32[..= 1_048575],
+ flush_repeat_count : base.u32[..= 255],
+ flush_prev : base.u8,
+
final_checksum_have : base.u32,
+ block_checksum_have : base.u32,
block_checksum_want : base.u32,
original_pointer : base.u32,
num_symbols : base.u32[..= 258],
@@ -174,7 +179,31 @@
} endwhile
this.invert_bwt!()
- this.flush_block?(dst: args.dst)
+
+ this.block_checksum_have = 0xFFFF_FFFF
+ if this.original_pointer >= this.block_size {
+ return "#bad block length"
+ }
+ assert this.original_pointer < 900000 via "a < b: a < c; c <= b"(c: this.block_size)
+ this.flush_pointer = this.bwt[this.original_pointer] >> 12
+ this.flush_repeat_count = 0
+ this.flush_prev = 0
+
+ while this.block_size > 0 {
+ this.flush_fast!(dst: args.dst)
+ if this.block_size <= 0 {
+ break
+ }
+ this.flush_slow?(dst: args.dst)
+ } endwhile
+
+ this.block_checksum_have ^= 0xFFFF_FFFF
+ if this.block_checksum_have <> this.block_checksum_want {
+ return "#bad checksum"
+ }
+ this.final_checksum_have = this.block_checksum_have ^ (
+ (this.final_checksum_have >> 31) |
+ (this.final_checksum_have ~mod<< 1))
} endwhile
// Read the 32-bit final checksum.
diff --git a/std/bzip2/decode_flush_fast.wuffs b/std/bzip2/decode_flush_fast.wuffs
new file mode 100644
index 0000000..a12dfa0
--- /dev/null
+++ b/std/bzip2/decode_flush_fast.wuffs
@@ -0,0 +1,75 @@
+// Copyright 2022 The Wuffs Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+pri func decoder.flush_fast!(dst: base.io_writer) {
+ var flush_pointer : base.u32[..= 1_048575]
+ var flush_repeat_count : base.u32[..= 255]
+ var flush_prev : base.u8
+ var block_checksum_have : base.u32
+ var block_size : base.u32
+
+ var entry : base.u32
+ var curr : base.u8
+
+ flush_pointer = this.flush_pointer
+ flush_repeat_count = this.flush_repeat_count
+ flush_prev = this.flush_prev
+ block_checksum_have = this.block_checksum_have
+ block_size = this.block_size
+
+ while (block_size > 0) and (args.dst.length() > 255) {
+ entry = this.bwt[flush_pointer]
+ curr = (entry & 0xFF) as base.u8
+ flush_pointer = entry >> 12
+
+ if flush_repeat_count >= 4 {
+ flush_repeat_count = curr as base.u32
+ while flush_repeat_count > 0,
+ inv block_size > 0,
+ {
+ block_checksum_have =
+ REV_CRC32_TABLE[((block_checksum_have >> 24) as base.u8) ^ flush_prev] ^
+ (block_checksum_have ~mod<< 8)
+ if args.dst.length() > 0 {
+ args.dst.write_u8_fast!(a: flush_prev)
+ }
+ flush_repeat_count -= 1
+ } endwhile
+ flush_repeat_count = 0
+ } else if curr <> flush_prev {
+ flush_repeat_count = 1
+ block_checksum_have =
+ REV_CRC32_TABLE[((block_checksum_have >> 24) as base.u8) ^ curr] ^
+ (block_checksum_have ~mod<< 8)
+ args.dst.write_u8_fast!(a: curr)
+ } else {
+ flush_repeat_count += 1
+ block_checksum_have =
+ REV_CRC32_TABLE[((block_checksum_have >> 24) as base.u8) ^ curr] ^
+ (block_checksum_have ~mod<< 8)
+ args.dst.write_u8_fast!(a: curr)
+ }
+
+ flush_prev = curr
+ block_size -= 1
+ } endwhile
+
+ this.flush_pointer = flush_pointer
+ this.flush_repeat_count = flush_repeat_count
+ this.flush_prev = flush_prev
+ this.block_checksum_have = block_checksum_have
+ if block_size <= 900000 {
+ this.block_size = block_size
+ }
+}
diff --git a/std/bzip2/decode_flush_slow.wuffs b/std/bzip2/decode_flush_slow.wuffs
index 33b21d2..a0674c2 100644
--- a/std/bzip2/decode_flush_slow.wuffs
+++ b/std/bzip2/decode_flush_slow.wuffs
@@ -12,65 +12,62 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-pri func decoder.flush_block?(dst: base.io_writer) {
- var i : base.u32[..= 1_048575]
- var n : base.u32
- var entry : base.u32
- var repeat_count : base.u32[..= 255]
+pri func decoder.flush_slow?(dst: base.io_writer) {
+ var flush_pointer : base.u32[..= 1_048575]
+ var flush_repeat_count : base.u32[..= 255]
+ var flush_prev : base.u8
var block_checksum_have : base.u32
- var prev : base.u8
- var curr : base.u8
+ var block_size : base.u32
- if this.original_pointer >= this.block_size {
- return "#bad block length"
- }
- assert this.original_pointer < 900000 via "a < b: a < c; c <= b"(c: this.block_size)
- i = this.bwt[this.original_pointer] >> 12
+ var entry : base.u32
+ var curr : base.u8
- block_checksum_have = 0xFFFF_FFFF
+ flush_pointer = this.flush_pointer
+ flush_repeat_count = this.flush_repeat_count
+ flush_prev = this.flush_prev
+ block_checksum_have = this.block_checksum_have
+ block_size = this.block_size
- n = 0
- while n < this.block_size {
- assert n < 900000 via "a < b: a < c; c <= b"(c: this.block_size)
- entry = this.bwt[i]
+ while (block_size > 0) and (not coroutine_resumed) {
+ entry = this.bwt[flush_pointer]
curr = (entry & 0xFF) as base.u8
- i = entry >> 12
+ flush_pointer = entry >> 12
- if repeat_count >= 4 {
- repeat_count = curr as base.u32
- while repeat_count > 0,
- inv n < 900000,
+ if flush_repeat_count >= 4 {
+ flush_repeat_count = curr as base.u32
+ while flush_repeat_count > 0,
+ inv block_size > 0,
{
block_checksum_have =
- REV_CRC32_TABLE[((block_checksum_have >> 24) as base.u8) ^ prev] ^
+ REV_CRC32_TABLE[((block_checksum_have >> 24) as base.u8) ^ flush_prev] ^
(block_checksum_have ~mod<< 8)
- args.dst.write_u8?(a: prev)
- repeat_count -= 1
+ args.dst.write_u8?(a: flush_prev)
+ flush_repeat_count -= 1
} endwhile
- repeat_count = 0
- } else if curr <> prev {
- repeat_count = 1
+ flush_repeat_count = 0
+ } else if curr <> flush_prev {
+ flush_repeat_count = 1
block_checksum_have =
REV_CRC32_TABLE[((block_checksum_have >> 24) as base.u8) ^ curr] ^
(block_checksum_have ~mod<< 8)
args.dst.write_u8?(a: curr)
} else {
- repeat_count += 1
+ flush_repeat_count += 1
block_checksum_have =
REV_CRC32_TABLE[((block_checksum_have >> 24) as base.u8) ^ curr] ^
(block_checksum_have ~mod<< 8)
args.dst.write_u8?(a: curr)
}
- prev = curr
- n += 1
+ flush_prev = curr
+ block_size -= 1
} endwhile
- block_checksum_have ^= 0xFFFF_FFFF
- if block_checksum_have <> this.block_checksum_want {
- return "#bad checksum"
+ this.flush_pointer = flush_pointer
+ this.flush_repeat_count = flush_repeat_count
+ this.flush_prev = flush_prev
+ this.block_checksum_have = block_checksum_have
+ if block_size <= 900000 {
+ this.block_size = block_size
}
- this.final_checksum_have = block_checksum_have ^ (
- (this.final_checksum_have >> 31) |
- (this.final_checksum_have ~mod<< 1))
}