| // Copyright 2023 The Wuffs Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| pri func decoder.decode_idct!(dst_buffer: slice base.u8, dst_stride: base.u64, b: base.u32[..= 9], q: base.u32[..= 3]) { |
| // This method implements the same algorithm as libjpeg-turbo's jidctint.c. |
| |
| var bq0 : base.u32 |
| var bq2 : base.u32 |
| var bq4 : base.u32 |
| var bq6 : base.u32 |
| |
| var ca : base.u32 |
| |
| var cb2 : base.u32 |
| var cb6 : base.u32 |
| |
| var ccp : base.u32 |
| var ccm : base.u32 |
| |
| var cd0 : base.u32 |
| var cd1 : base.u32 |
| var cd2 : base.u32 |
| var cd3 : base.u32 |
| |
| var bq1 : base.u32 |
| var bq3 : base.u32 |
| var bq5 : base.u32 |
| var bq7 : base.u32 |
| |
| var ci51 : base.u32 |
| var ci53 : base.u32 |
| var ci71 : base.u32 |
| var ci73 : base.u32 |
| |
| var cj : base.u32 |
| |
| var ck1 : base.u32 |
| var ck3 : base.u32 |
| var ck5 : base.u32 |
| var ck7 : base.u32 |
| |
| var cl51 : base.u32 |
| var cl73 : base.u32 |
| |
| var in0 : base.u32 |
| var in2 : base.u32 |
| var in4 : base.u32 |
| var in6 : base.u32 |
| |
| var ra : base.u32 |
| |
| var rb2 : base.u32 |
| var rb6 : base.u32 |
| |
| var rcp : base.u32 |
| var rcm : base.u32 |
| |
| var rd0 : base.u32 |
| var rd1 : base.u32 |
| var rd2 : base.u32 |
| var rd3 : base.u32 |
| |
| var in1 : base.u32 |
| var in3 : base.u32 |
| var in5 : base.u32 |
| var in7 : base.u32 |
| |
| var ri51 : base.u32 |
| var ri53 : base.u32 |
| var ri71 : base.u32 |
| var ri73 : base.u32 |
| |
| var rj : base.u32 |
| |
| var rk1 : base.u32 |
| var rk3 : base.u32 |
| var rk5 : base.u32 |
| var rk7 : base.u32 |
| |
| var rl51 : base.u32 |
| var rl73 : base.u32 |
| |
| var intermediate : array[64] base.u32 |
| |
| if 8 > args.dst_stride { |
| return nothing |
| } |
| |
| // -------- BEGIN generated by script/print-jpeg-idct-code.go |
| |
| // p0_298631336 = 0x098E = 2446 |
| // p0_390180644 = 0x0C7C = 3196 |
| // p0_541196100 = 0x1151 = 4433 |
| // p0_765366865 = 0x187E = 6270 |
| // p0_899976223 = 0x1CCD = 7373 |
| // p1_175875602 = 0x25A1 = 9633 |
| // p1_501321110 = 0x300B = 12299 |
| // p1_847759065 = 0x3B21 = 15137 |
| // p1_961570560 = 0x3EC5 = 16069 |
| // p2_053119869 = 0x41B3 = 16819 |
| // p2_562915447 = 0x5203 = 20995 |
| // p3_072711026 = 0x6254 = 25172 |
| // |
| // m0_390180644 = 0xFFFFF384 = 4294964100 |
| // m0_899976223 = 0xFFFFE333 = 4294959923 |
| // m1_961570560 = 0xFFFFC13B = 4294951227 |
| // m2_562915447 = 0xFFFFADFD = 4294946301 |
| |
| // ==== First pass, column 0. |
| |
| // Even rows. |
| |
| bq2 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x10]) ~mod* (this.quant_tables[args.q][0x10] as base.u32) |
| bq6 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x30]) ~mod* (this.quant_tables[args.q][0x30] as base.u32) |
| |
| ca = (bq2 ~mod+ bq6) ~mod* 0x1151 |
| |
| cb2 = ca ~mod+ (bq2 ~mod* 0x187E) |
| cb6 = ca ~mod- (bq6 ~mod* 0x3B21) |
| |
| bq0 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x00]) ~mod* (this.quant_tables[args.q][0x00] as base.u32) |
| bq4 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x20]) ~mod* (this.quant_tables[args.q][0x20] as base.u32) |
| |
| ccp = (bq0 ~mod+ bq4) ~mod<< 13 |
| ccm = (bq0 ~mod- bq4) ~mod<< 13 |
| |
| cd0 = ccp ~mod+ cb2 |
| cd1 = ccm ~mod+ cb6 |
| cd2 = ccm ~mod- cb6 |
| cd3 = ccp ~mod- cb2 |
| |
| // Odd rows. |
| |
| bq1 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x08]) ~mod* (this.quant_tables[args.q][0x08] as base.u32) |
| bq3 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x18]) ~mod* (this.quant_tables[args.q][0x18] as base.u32) |
| bq5 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x28]) ~mod* (this.quant_tables[args.q][0x28] as base.u32) |
| bq7 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x38]) ~mod* (this.quant_tables[args.q][0x38] as base.u32) |
| |
| ci51 = bq5 ~mod+ bq1 |
| ci53 = bq5 ~mod+ bq3 |
| ci71 = bq7 ~mod+ bq1 |
| ci73 = bq7 ~mod+ bq3 |
| |
| cj = (ci73 ~mod+ ci51) ~mod* 0x25A1 |
| |
| ck1 = bq1 ~mod* 0x300B |
| ck3 = bq3 ~mod* 0x6254 |
| ck5 = bq5 ~mod* 0x41B3 |
| ck7 = bq7 ~mod* 0x098E |
| |
| ci51 ~mod*= 0xFFFF_F384 |
| ci53 ~mod*= 0xFFFF_ADFD |
| ci71 ~mod*= 0xFFFF_E333 |
| ci73 ~mod*= 0xFFFF_C13B |
| |
| cl51 = ci51 ~mod+ cj |
| cl73 = ci73 ~mod+ cj |
| |
| ck1 ~mod+= ci71 ~mod+ cl51 |
| ck3 ~mod+= ci53 ~mod+ cl73 |
| ck5 ~mod+= ci53 ~mod+ cl51 |
| ck7 ~mod+= ci71 ~mod+ cl73 |
| |
| // Combine rows. |
| |
| intermediate[0x00] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod+ ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x38] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod- ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x08] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod+ ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x30] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod- ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x10] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod+ ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x28] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod- ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x18] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod+ ck7) ~mod+ (1 << 10), n: 11) |
| intermediate[0x20] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod- ck7) ~mod+ (1 << 10), n: 11) |
| |
| // ==== First pass, column 1. |
| |
| // Even rows. |
| |
| bq2 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x11]) ~mod* (this.quant_tables[args.q][0x11] as base.u32) |
| bq6 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x31]) ~mod* (this.quant_tables[args.q][0x31] as base.u32) |
| |
| ca = (bq2 ~mod+ bq6) ~mod* 0x1151 |
| |
| cb2 = ca ~mod+ (bq2 ~mod* 0x187E) |
| cb6 = ca ~mod- (bq6 ~mod* 0x3B21) |
| |
| bq0 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x01]) ~mod* (this.quant_tables[args.q][0x01] as base.u32) |
| bq4 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x21]) ~mod* (this.quant_tables[args.q][0x21] as base.u32) |
| |
| ccp = (bq0 ~mod+ bq4) ~mod<< 13 |
| ccm = (bq0 ~mod- bq4) ~mod<< 13 |
| |
| cd0 = ccp ~mod+ cb2 |
| cd1 = ccm ~mod+ cb6 |
| cd2 = ccm ~mod- cb6 |
| cd3 = ccp ~mod- cb2 |
| |
| // Odd rows. |
| |
| bq1 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x09]) ~mod* (this.quant_tables[args.q][0x09] as base.u32) |
| bq3 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x19]) ~mod* (this.quant_tables[args.q][0x19] as base.u32) |
| bq5 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x29]) ~mod* (this.quant_tables[args.q][0x29] as base.u32) |
| bq7 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x39]) ~mod* (this.quant_tables[args.q][0x39] as base.u32) |
| |
| ci51 = bq5 ~mod+ bq1 |
| ci53 = bq5 ~mod+ bq3 |
| ci71 = bq7 ~mod+ bq1 |
| ci73 = bq7 ~mod+ bq3 |
| |
| cj = (ci73 ~mod+ ci51) ~mod* 0x25A1 |
| |
| ck1 = bq1 ~mod* 0x300B |
| ck3 = bq3 ~mod* 0x6254 |
| ck5 = bq5 ~mod* 0x41B3 |
| ck7 = bq7 ~mod* 0x098E |
| |
| ci51 ~mod*= 0xFFFF_F384 |
| ci53 ~mod*= 0xFFFF_ADFD |
| ci71 ~mod*= 0xFFFF_E333 |
| ci73 ~mod*= 0xFFFF_C13B |
| |
| cl51 = ci51 ~mod+ cj |
| cl73 = ci73 ~mod+ cj |
| |
| ck1 ~mod+= ci71 ~mod+ cl51 |
| ck3 ~mod+= ci53 ~mod+ cl73 |
| ck5 ~mod+= ci53 ~mod+ cl51 |
| ck7 ~mod+= ci71 ~mod+ cl73 |
| |
| // Combine rows. |
| |
| intermediate[0x01] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod+ ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x39] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod- ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x09] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod+ ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x31] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod- ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x11] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod+ ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x29] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod- ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x19] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod+ ck7) ~mod+ (1 << 10), n: 11) |
| intermediate[0x21] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod- ck7) ~mod+ (1 << 10), n: 11) |
| |
| // ==== First pass, column 2. |
| |
| // Even rows. |
| |
| bq2 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x12]) ~mod* (this.quant_tables[args.q][0x12] as base.u32) |
| bq6 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x32]) ~mod* (this.quant_tables[args.q][0x32] as base.u32) |
| |
| ca = (bq2 ~mod+ bq6) ~mod* 0x1151 |
| |
| cb2 = ca ~mod+ (bq2 ~mod* 0x187E) |
| cb6 = ca ~mod- (bq6 ~mod* 0x3B21) |
| |
| bq0 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x02]) ~mod* (this.quant_tables[args.q][0x02] as base.u32) |
| bq4 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x22]) ~mod* (this.quant_tables[args.q][0x22] as base.u32) |
| |
| ccp = (bq0 ~mod+ bq4) ~mod<< 13 |
| ccm = (bq0 ~mod- bq4) ~mod<< 13 |
| |
| cd0 = ccp ~mod+ cb2 |
| cd1 = ccm ~mod+ cb6 |
| cd2 = ccm ~mod- cb6 |
| cd3 = ccp ~mod- cb2 |
| |
| // Odd rows. |
| |
| bq1 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x0A]) ~mod* (this.quant_tables[args.q][0x0A] as base.u32) |
| bq3 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x1A]) ~mod* (this.quant_tables[args.q][0x1A] as base.u32) |
| bq5 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x2A]) ~mod* (this.quant_tables[args.q][0x2A] as base.u32) |
| bq7 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x3A]) ~mod* (this.quant_tables[args.q][0x3A] as base.u32) |
| |
| ci51 = bq5 ~mod+ bq1 |
| ci53 = bq5 ~mod+ bq3 |
| ci71 = bq7 ~mod+ bq1 |
| ci73 = bq7 ~mod+ bq3 |
| |
| cj = (ci73 ~mod+ ci51) ~mod* 0x25A1 |
| |
| ck1 = bq1 ~mod* 0x300B |
| ck3 = bq3 ~mod* 0x6254 |
| ck5 = bq5 ~mod* 0x41B3 |
| ck7 = bq7 ~mod* 0x098E |
| |
| ci51 ~mod*= 0xFFFF_F384 |
| ci53 ~mod*= 0xFFFF_ADFD |
| ci71 ~mod*= 0xFFFF_E333 |
| ci73 ~mod*= 0xFFFF_C13B |
| |
| cl51 = ci51 ~mod+ cj |
| cl73 = ci73 ~mod+ cj |
| |
| ck1 ~mod+= ci71 ~mod+ cl51 |
| ck3 ~mod+= ci53 ~mod+ cl73 |
| ck5 ~mod+= ci53 ~mod+ cl51 |
| ck7 ~mod+= ci71 ~mod+ cl73 |
| |
| // Combine rows. |
| |
| intermediate[0x02] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod+ ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x3A] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod- ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x0A] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod+ ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x32] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod- ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x12] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod+ ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x2A] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod- ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x1A] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod+ ck7) ~mod+ (1 << 10), n: 11) |
| intermediate[0x22] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod- ck7) ~mod+ (1 << 10), n: 11) |
| |
| // ==== First pass, column 3. |
| |
| // Even rows. |
| |
| bq2 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x13]) ~mod* (this.quant_tables[args.q][0x13] as base.u32) |
| bq6 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x33]) ~mod* (this.quant_tables[args.q][0x33] as base.u32) |
| |
| ca = (bq2 ~mod+ bq6) ~mod* 0x1151 |
| |
| cb2 = ca ~mod+ (bq2 ~mod* 0x187E) |
| cb6 = ca ~mod- (bq6 ~mod* 0x3B21) |
| |
| bq0 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x03]) ~mod* (this.quant_tables[args.q][0x03] as base.u32) |
| bq4 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x23]) ~mod* (this.quant_tables[args.q][0x23] as base.u32) |
| |
| ccp = (bq0 ~mod+ bq4) ~mod<< 13 |
| ccm = (bq0 ~mod- bq4) ~mod<< 13 |
| |
| cd0 = ccp ~mod+ cb2 |
| cd1 = ccm ~mod+ cb6 |
| cd2 = ccm ~mod- cb6 |
| cd3 = ccp ~mod- cb2 |
| |
| // Odd rows. |
| |
| bq1 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x0B]) ~mod* (this.quant_tables[args.q][0x0B] as base.u32) |
| bq3 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x1B]) ~mod* (this.quant_tables[args.q][0x1B] as base.u32) |
| bq5 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x2B]) ~mod* (this.quant_tables[args.q][0x2B] as base.u32) |
| bq7 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x3B]) ~mod* (this.quant_tables[args.q][0x3B] as base.u32) |
| |
| ci51 = bq5 ~mod+ bq1 |
| ci53 = bq5 ~mod+ bq3 |
| ci71 = bq7 ~mod+ bq1 |
| ci73 = bq7 ~mod+ bq3 |
| |
| cj = (ci73 ~mod+ ci51) ~mod* 0x25A1 |
| |
| ck1 = bq1 ~mod* 0x300B |
| ck3 = bq3 ~mod* 0x6254 |
| ck5 = bq5 ~mod* 0x41B3 |
| ck7 = bq7 ~mod* 0x098E |
| |
| ci51 ~mod*= 0xFFFF_F384 |
| ci53 ~mod*= 0xFFFF_ADFD |
| ci71 ~mod*= 0xFFFF_E333 |
| ci73 ~mod*= 0xFFFF_C13B |
| |
| cl51 = ci51 ~mod+ cj |
| cl73 = ci73 ~mod+ cj |
| |
| ck1 ~mod+= ci71 ~mod+ cl51 |
| ck3 ~mod+= ci53 ~mod+ cl73 |
| ck5 ~mod+= ci53 ~mod+ cl51 |
| ck7 ~mod+= ci71 ~mod+ cl73 |
| |
| // Combine rows. |
| |
| intermediate[0x03] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod+ ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x3B] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod- ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x0B] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod+ ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x33] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod- ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x13] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod+ ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x2B] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod- ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x1B] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod+ ck7) ~mod+ (1 << 10), n: 11) |
| intermediate[0x23] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod- ck7) ~mod+ (1 << 10), n: 11) |
| |
| // ==== First pass, column 4. |
| |
| // Even rows. |
| |
| bq2 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x14]) ~mod* (this.quant_tables[args.q][0x14] as base.u32) |
| bq6 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x34]) ~mod* (this.quant_tables[args.q][0x34] as base.u32) |
| |
| ca = (bq2 ~mod+ bq6) ~mod* 0x1151 |
| |
| cb2 = ca ~mod+ (bq2 ~mod* 0x187E) |
| cb6 = ca ~mod- (bq6 ~mod* 0x3B21) |
| |
| bq0 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x04]) ~mod* (this.quant_tables[args.q][0x04] as base.u32) |
| bq4 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x24]) ~mod* (this.quant_tables[args.q][0x24] as base.u32) |
| |
| ccp = (bq0 ~mod+ bq4) ~mod<< 13 |
| ccm = (bq0 ~mod- bq4) ~mod<< 13 |
| |
| cd0 = ccp ~mod+ cb2 |
| cd1 = ccm ~mod+ cb6 |
| cd2 = ccm ~mod- cb6 |
| cd3 = ccp ~mod- cb2 |
| |
| // Odd rows. |
| |
| bq1 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x0C]) ~mod* (this.quant_tables[args.q][0x0C] as base.u32) |
| bq3 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x1C]) ~mod* (this.quant_tables[args.q][0x1C] as base.u32) |
| bq5 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x2C]) ~mod* (this.quant_tables[args.q][0x2C] as base.u32) |
| bq7 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x3C]) ~mod* (this.quant_tables[args.q][0x3C] as base.u32) |
| |
| ci51 = bq5 ~mod+ bq1 |
| ci53 = bq5 ~mod+ bq3 |
| ci71 = bq7 ~mod+ bq1 |
| ci73 = bq7 ~mod+ bq3 |
| |
| cj = (ci73 ~mod+ ci51) ~mod* 0x25A1 |
| |
| ck1 = bq1 ~mod* 0x300B |
| ck3 = bq3 ~mod* 0x6254 |
| ck5 = bq5 ~mod* 0x41B3 |
| ck7 = bq7 ~mod* 0x098E |
| |
| ci51 ~mod*= 0xFFFF_F384 |
| ci53 ~mod*= 0xFFFF_ADFD |
| ci71 ~mod*= 0xFFFF_E333 |
| ci73 ~mod*= 0xFFFF_C13B |
| |
| cl51 = ci51 ~mod+ cj |
| cl73 = ci73 ~mod+ cj |
| |
| ck1 ~mod+= ci71 ~mod+ cl51 |
| ck3 ~mod+= ci53 ~mod+ cl73 |
| ck5 ~mod+= ci53 ~mod+ cl51 |
| ck7 ~mod+= ci71 ~mod+ cl73 |
| |
| // Combine rows. |
| |
| intermediate[0x04] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod+ ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x3C] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod- ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x0C] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod+ ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x34] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod- ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x14] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod+ ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x2C] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod- ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x1C] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod+ ck7) ~mod+ (1 << 10), n: 11) |
| intermediate[0x24] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod- ck7) ~mod+ (1 << 10), n: 11) |
| |
| // ==== First pass, column 5. |
| |
| // Even rows. |
| |
| bq2 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x15]) ~mod* (this.quant_tables[args.q][0x15] as base.u32) |
| bq6 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x35]) ~mod* (this.quant_tables[args.q][0x35] as base.u32) |
| |
| ca = (bq2 ~mod+ bq6) ~mod* 0x1151 |
| |
| cb2 = ca ~mod+ (bq2 ~mod* 0x187E) |
| cb6 = ca ~mod- (bq6 ~mod* 0x3B21) |
| |
| bq0 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x05]) ~mod* (this.quant_tables[args.q][0x05] as base.u32) |
| bq4 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x25]) ~mod* (this.quant_tables[args.q][0x25] as base.u32) |
| |
| ccp = (bq0 ~mod+ bq4) ~mod<< 13 |
| ccm = (bq0 ~mod- bq4) ~mod<< 13 |
| |
| cd0 = ccp ~mod+ cb2 |
| cd1 = ccm ~mod+ cb6 |
| cd2 = ccm ~mod- cb6 |
| cd3 = ccp ~mod- cb2 |
| |
| // Odd rows. |
| |
| bq1 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x0D]) ~mod* (this.quant_tables[args.q][0x0D] as base.u32) |
| bq3 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x1D]) ~mod* (this.quant_tables[args.q][0x1D] as base.u32) |
| bq5 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x2D]) ~mod* (this.quant_tables[args.q][0x2D] as base.u32) |
| bq7 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x3D]) ~mod* (this.quant_tables[args.q][0x3D] as base.u32) |
| |
| ci51 = bq5 ~mod+ bq1 |
| ci53 = bq5 ~mod+ bq3 |
| ci71 = bq7 ~mod+ bq1 |
| ci73 = bq7 ~mod+ bq3 |
| |
| cj = (ci73 ~mod+ ci51) ~mod* 0x25A1 |
| |
| ck1 = bq1 ~mod* 0x300B |
| ck3 = bq3 ~mod* 0x6254 |
| ck5 = bq5 ~mod* 0x41B3 |
| ck7 = bq7 ~mod* 0x098E |
| |
| ci51 ~mod*= 0xFFFF_F384 |
| ci53 ~mod*= 0xFFFF_ADFD |
| ci71 ~mod*= 0xFFFF_E333 |
| ci73 ~mod*= 0xFFFF_C13B |
| |
| cl51 = ci51 ~mod+ cj |
| cl73 = ci73 ~mod+ cj |
| |
| ck1 ~mod+= ci71 ~mod+ cl51 |
| ck3 ~mod+= ci53 ~mod+ cl73 |
| ck5 ~mod+= ci53 ~mod+ cl51 |
| ck7 ~mod+= ci71 ~mod+ cl73 |
| |
| // Combine rows. |
| |
| intermediate[0x05] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod+ ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x3D] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod- ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x0D] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod+ ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x35] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod- ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x15] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod+ ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x2D] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod- ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x1D] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod+ ck7) ~mod+ (1 << 10), n: 11) |
| intermediate[0x25] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod- ck7) ~mod+ (1 << 10), n: 11) |
| |
| // ==== First pass, column 6. |
| |
| // Even rows. |
| |
| bq2 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x16]) ~mod* (this.quant_tables[args.q][0x16] as base.u32) |
| bq6 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x36]) ~mod* (this.quant_tables[args.q][0x36] as base.u32) |
| |
| ca = (bq2 ~mod+ bq6) ~mod* 0x1151 |
| |
| cb2 = ca ~mod+ (bq2 ~mod* 0x187E) |
| cb6 = ca ~mod- (bq6 ~mod* 0x3B21) |
| |
| bq0 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x06]) ~mod* (this.quant_tables[args.q][0x06] as base.u32) |
| bq4 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x26]) ~mod* (this.quant_tables[args.q][0x26] as base.u32) |
| |
| ccp = (bq0 ~mod+ bq4) ~mod<< 13 |
| ccm = (bq0 ~mod- bq4) ~mod<< 13 |
| |
| cd0 = ccp ~mod+ cb2 |
| cd1 = ccm ~mod+ cb6 |
| cd2 = ccm ~mod- cb6 |
| cd3 = ccp ~mod- cb2 |
| |
| // Odd rows. |
| |
| bq1 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x0E]) ~mod* (this.quant_tables[args.q][0x0E] as base.u32) |
| bq3 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x1E]) ~mod* (this.quant_tables[args.q][0x1E] as base.u32) |
| bq5 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x2E]) ~mod* (this.quant_tables[args.q][0x2E] as base.u32) |
| bq7 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x3E]) ~mod* (this.quant_tables[args.q][0x3E] as base.u32) |
| |
| ci51 = bq5 ~mod+ bq1 |
| ci53 = bq5 ~mod+ bq3 |
| ci71 = bq7 ~mod+ bq1 |
| ci73 = bq7 ~mod+ bq3 |
| |
| cj = (ci73 ~mod+ ci51) ~mod* 0x25A1 |
| |
| ck1 = bq1 ~mod* 0x300B |
| ck3 = bq3 ~mod* 0x6254 |
| ck5 = bq5 ~mod* 0x41B3 |
| ck7 = bq7 ~mod* 0x098E |
| |
| ci51 ~mod*= 0xFFFF_F384 |
| ci53 ~mod*= 0xFFFF_ADFD |
| ci71 ~mod*= 0xFFFF_E333 |
| ci73 ~mod*= 0xFFFF_C13B |
| |
| cl51 = ci51 ~mod+ cj |
| cl73 = ci73 ~mod+ cj |
| |
| ck1 ~mod+= ci71 ~mod+ cl51 |
| ck3 ~mod+= ci53 ~mod+ cl73 |
| ck5 ~mod+= ci53 ~mod+ cl51 |
| ck7 ~mod+= ci71 ~mod+ cl73 |
| |
| // Combine rows. |
| |
| intermediate[0x06] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod+ ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x3E] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod- ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x0E] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod+ ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x36] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod- ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x16] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod+ ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x2E] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod- ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x1E] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod+ ck7) ~mod+ (1 << 10), n: 11) |
| intermediate[0x26] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod- ck7) ~mod+ (1 << 10), n: 11) |
| |
| // ==== First pass, column 7. |
| |
| // Even rows. |
| |
| bq2 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x17]) ~mod* (this.quant_tables[args.q][0x17] as base.u32) |
| bq6 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x37]) ~mod* (this.quant_tables[args.q][0x37] as base.u32) |
| |
| ca = (bq2 ~mod+ bq6) ~mod* 0x1151 |
| |
| cb2 = ca ~mod+ (bq2 ~mod* 0x187E) |
| cb6 = ca ~mod- (bq6 ~mod* 0x3B21) |
| |
| bq0 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x07]) ~mod* (this.quant_tables[args.q][0x07] as base.u32) |
| bq4 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x27]) ~mod* (this.quant_tables[args.q][0x27] as base.u32) |
| |
| ccp = (bq0 ~mod+ bq4) ~mod<< 13 |
| ccm = (bq0 ~mod- bq4) ~mod<< 13 |
| |
| cd0 = ccp ~mod+ cb2 |
| cd1 = ccm ~mod+ cb6 |
| cd2 = ccm ~mod- cb6 |
| cd3 = ccp ~mod- cb2 |
| |
| // Odd rows. |
| |
| bq1 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x0F]) ~mod* (this.quant_tables[args.q][0x0F] as base.u32) |
| bq3 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x1F]) ~mod* (this.quant_tables[args.q][0x1F] as base.u32) |
| bq5 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x2F]) ~mod* (this.quant_tables[args.q][0x2F] as base.u32) |
| bq7 = this.util.sign_extend_convert_u16_u32(a: this.mcu_blocks[args.b][0x3F]) ~mod* (this.quant_tables[args.q][0x3F] as base.u32) |
| |
| ci51 = bq5 ~mod+ bq1 |
| ci53 = bq5 ~mod+ bq3 |
| ci71 = bq7 ~mod+ bq1 |
| ci73 = bq7 ~mod+ bq3 |
| |
| cj = (ci73 ~mod+ ci51) ~mod* 0x25A1 |
| |
| ck1 = bq1 ~mod* 0x300B |
| ck3 = bq3 ~mod* 0x6254 |
| ck5 = bq5 ~mod* 0x41B3 |
| ck7 = bq7 ~mod* 0x098E |
| |
| ci51 ~mod*= 0xFFFF_F384 |
| ci53 ~mod*= 0xFFFF_ADFD |
| ci71 ~mod*= 0xFFFF_E333 |
| ci73 ~mod*= 0xFFFF_C13B |
| |
| cl51 = ci51 ~mod+ cj |
| cl73 = ci73 ~mod+ cj |
| |
| ck1 ~mod+= ci71 ~mod+ cl51 |
| ck3 ~mod+= ci53 ~mod+ cl73 |
| ck5 ~mod+= ci53 ~mod+ cl51 |
| ck7 ~mod+= ci71 ~mod+ cl73 |
| |
| // Combine rows. |
| |
| intermediate[0x07] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod+ ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x3F] = this.util.sign_extend_rshift_u32(a: (cd0 ~mod- ck1) ~mod+ (1 << 10), n: 11) |
| intermediate[0x0F] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod+ ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x37] = this.util.sign_extend_rshift_u32(a: (cd1 ~mod- ck3) ~mod+ (1 << 10), n: 11) |
| intermediate[0x17] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod+ ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x2F] = this.util.sign_extend_rshift_u32(a: (cd2 ~mod- ck5) ~mod+ (1 << 10), n: 11) |
| intermediate[0x1F] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod+ ck7) ~mod+ (1 << 10), n: 11) |
| intermediate[0x27] = this.util.sign_extend_rshift_u32(a: (cd3 ~mod- ck7) ~mod+ (1 << 10), n: 11) |
| |
| // ==== Second pass, row 0. |
| |
| // Even columns. |
| |
| in2 = intermediate[0x02] |
| in6 = intermediate[0x06] |
| |
| ra = (in2 ~mod+ in6) ~mod* 0x1151 |
| |
| rb2 = ra ~mod+ (in2 ~mod* 0x187E) |
| rb6 = ra ~mod- (in6 ~mod* 0x3B21) |
| |
| in0 = intermediate[0x00] |
| in4 = intermediate[0x04] |
| |
| rcp = (in0 ~mod+ in4) ~mod<< 13 |
| rcm = (in0 ~mod- in4) ~mod<< 13 |
| |
| rd0 = rcp ~mod+ rb2 |
| rd1 = rcm ~mod+ rb6 |
| rd2 = rcm ~mod- rb6 |
| rd3 = rcp ~mod- rb2 |
| |
| // Odd columns. |
| |
| in1 = intermediate[0x01] |
| in3 = intermediate[0x03] |
| in5 = intermediate[0x05] |
| in7 = intermediate[0x07] |
| |
| ri51 = in5 ~mod+ in1 |
| ri53 = in5 ~mod+ in3 |
| ri71 = in7 ~mod+ in1 |
| ri73 = in7 ~mod+ in3 |
| |
| rj = (ri73 ~mod+ ri51) ~mod* 0x25A1 |
| |
| rk1 = in1 ~mod* 0x300B |
| rk3 = in3 ~mod* 0x6254 |
| rk5 = in5 ~mod* 0x41B3 |
| rk7 = in7 ~mod* 0x098E |
| |
| ri51 ~mod*= 0xFFFF_F384 |
| ri53 ~mod*= 0xFFFF_ADFD |
| ri71 ~mod*= 0xFFFF_E333 |
| ri73 ~mod*= 0xFFFF_C13B |
| |
| rl51 = ri51 ~mod+ rj |
| rl73 = ri73 ~mod+ rj |
| |
| rk1 ~mod+= ri71 ~mod+ rl51 |
| rk3 ~mod+= ri53 ~mod+ rl73 |
| rk5 ~mod+= ri53 ~mod+ rl51 |
| rk7 ~mod+= ri71 ~mod+ rl73 |
| |
| // Combine columns. |
| |
| if args.dst_stride > args.dst_buffer.length() { |
| return nothing |
| } |
| assert 8 <= args.dst_buffer.length() via "a <= b: a <= c; c <= b"(c: args.dst_stride) |
| |
| args.dst_buffer[0] = BIAS_AND_CLAMP[(((rd0 ~mod+ rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[7] = BIAS_AND_CLAMP[(((rd0 ~mod- rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[1] = BIAS_AND_CLAMP[(((rd1 ~mod+ rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[6] = BIAS_AND_CLAMP[(((rd1 ~mod- rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[2] = BIAS_AND_CLAMP[(((rd2 ~mod+ rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[5] = BIAS_AND_CLAMP[(((rd2 ~mod- rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[3] = BIAS_AND_CLAMP[(((rd3 ~mod+ rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[4] = BIAS_AND_CLAMP[(((rd3 ~mod- rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| |
| args.dst_buffer = args.dst_buffer[args.dst_stride ..] |
| |
| // ==== Second pass, row 1. |
| |
| // Even columns. |
| |
| in2 = intermediate[0x0A] |
| in6 = intermediate[0x0E] |
| |
| ra = (in2 ~mod+ in6) ~mod* 0x1151 |
| |
| rb2 = ra ~mod+ (in2 ~mod* 0x187E) |
| rb6 = ra ~mod- (in6 ~mod* 0x3B21) |
| |
| in0 = intermediate[0x08] |
| in4 = intermediate[0x0C] |
| |
| rcp = (in0 ~mod+ in4) ~mod<< 13 |
| rcm = (in0 ~mod- in4) ~mod<< 13 |
| |
| rd0 = rcp ~mod+ rb2 |
| rd1 = rcm ~mod+ rb6 |
| rd2 = rcm ~mod- rb6 |
| rd3 = rcp ~mod- rb2 |
| |
| // Odd columns. |
| |
| in1 = intermediate[0x09] |
| in3 = intermediate[0x0B] |
| in5 = intermediate[0x0D] |
| in7 = intermediate[0x0F] |
| |
| ri51 = in5 ~mod+ in1 |
| ri53 = in5 ~mod+ in3 |
| ri71 = in7 ~mod+ in1 |
| ri73 = in7 ~mod+ in3 |
| |
| rj = (ri73 ~mod+ ri51) ~mod* 0x25A1 |
| |
| rk1 = in1 ~mod* 0x300B |
| rk3 = in3 ~mod* 0x6254 |
| rk5 = in5 ~mod* 0x41B3 |
| rk7 = in7 ~mod* 0x098E |
| |
| ri51 ~mod*= 0xFFFF_F384 |
| ri53 ~mod*= 0xFFFF_ADFD |
| ri71 ~mod*= 0xFFFF_E333 |
| ri73 ~mod*= 0xFFFF_C13B |
| |
| rl51 = ri51 ~mod+ rj |
| rl73 = ri73 ~mod+ rj |
| |
| rk1 ~mod+= ri71 ~mod+ rl51 |
| rk3 ~mod+= ri53 ~mod+ rl73 |
| rk5 ~mod+= ri53 ~mod+ rl51 |
| rk7 ~mod+= ri71 ~mod+ rl73 |
| |
| // Combine columns. |
| |
| if args.dst_stride > args.dst_buffer.length() { |
| return nothing |
| } |
| assert 8 <= args.dst_buffer.length() via "a <= b: a <= c; c <= b"(c: args.dst_stride) |
| |
| args.dst_buffer[0] = BIAS_AND_CLAMP[(((rd0 ~mod+ rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[7] = BIAS_AND_CLAMP[(((rd0 ~mod- rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[1] = BIAS_AND_CLAMP[(((rd1 ~mod+ rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[6] = BIAS_AND_CLAMP[(((rd1 ~mod- rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[2] = BIAS_AND_CLAMP[(((rd2 ~mod+ rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[5] = BIAS_AND_CLAMP[(((rd2 ~mod- rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[3] = BIAS_AND_CLAMP[(((rd3 ~mod+ rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[4] = BIAS_AND_CLAMP[(((rd3 ~mod- rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| |
| args.dst_buffer = args.dst_buffer[args.dst_stride ..] |
| |
| // ==== Second pass, row 2. |
| |
| // Even columns. |
| |
| in2 = intermediate[0x12] |
| in6 = intermediate[0x16] |
| |
| ra = (in2 ~mod+ in6) ~mod* 0x1151 |
| |
| rb2 = ra ~mod+ (in2 ~mod* 0x187E) |
| rb6 = ra ~mod- (in6 ~mod* 0x3B21) |
| |
| in0 = intermediate[0x10] |
| in4 = intermediate[0x14] |
| |
| rcp = (in0 ~mod+ in4) ~mod<< 13 |
| rcm = (in0 ~mod- in4) ~mod<< 13 |
| |
| rd0 = rcp ~mod+ rb2 |
| rd1 = rcm ~mod+ rb6 |
| rd2 = rcm ~mod- rb6 |
| rd3 = rcp ~mod- rb2 |
| |
| // Odd columns. |
| |
| in1 = intermediate[0x11] |
| in3 = intermediate[0x13] |
| in5 = intermediate[0x15] |
| in7 = intermediate[0x17] |
| |
| ri51 = in5 ~mod+ in1 |
| ri53 = in5 ~mod+ in3 |
| ri71 = in7 ~mod+ in1 |
| ri73 = in7 ~mod+ in3 |
| |
| rj = (ri73 ~mod+ ri51) ~mod* 0x25A1 |
| |
| rk1 = in1 ~mod* 0x300B |
| rk3 = in3 ~mod* 0x6254 |
| rk5 = in5 ~mod* 0x41B3 |
| rk7 = in7 ~mod* 0x098E |
| |
| ri51 ~mod*= 0xFFFF_F384 |
| ri53 ~mod*= 0xFFFF_ADFD |
| ri71 ~mod*= 0xFFFF_E333 |
| ri73 ~mod*= 0xFFFF_C13B |
| |
| rl51 = ri51 ~mod+ rj |
| rl73 = ri73 ~mod+ rj |
| |
| rk1 ~mod+= ri71 ~mod+ rl51 |
| rk3 ~mod+= ri53 ~mod+ rl73 |
| rk5 ~mod+= ri53 ~mod+ rl51 |
| rk7 ~mod+= ri71 ~mod+ rl73 |
| |
| // Combine columns. |
| |
| if args.dst_stride > args.dst_buffer.length() { |
| return nothing |
| } |
| assert 8 <= args.dst_buffer.length() via "a <= b: a <= c; c <= b"(c: args.dst_stride) |
| |
| args.dst_buffer[0] = BIAS_AND_CLAMP[(((rd0 ~mod+ rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[7] = BIAS_AND_CLAMP[(((rd0 ~mod- rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[1] = BIAS_AND_CLAMP[(((rd1 ~mod+ rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[6] = BIAS_AND_CLAMP[(((rd1 ~mod- rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[2] = BIAS_AND_CLAMP[(((rd2 ~mod+ rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[5] = BIAS_AND_CLAMP[(((rd2 ~mod- rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[3] = BIAS_AND_CLAMP[(((rd3 ~mod+ rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[4] = BIAS_AND_CLAMP[(((rd3 ~mod- rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| |
| args.dst_buffer = args.dst_buffer[args.dst_stride ..] |
| |
| // ==== Second pass, row 3. |
| |
| // Even columns. |
| |
| in2 = intermediate[0x1A] |
| in6 = intermediate[0x1E] |
| |
| ra = (in2 ~mod+ in6) ~mod* 0x1151 |
| |
| rb2 = ra ~mod+ (in2 ~mod* 0x187E) |
| rb6 = ra ~mod- (in6 ~mod* 0x3B21) |
| |
| in0 = intermediate[0x18] |
| in4 = intermediate[0x1C] |
| |
| rcp = (in0 ~mod+ in4) ~mod<< 13 |
| rcm = (in0 ~mod- in4) ~mod<< 13 |
| |
| rd0 = rcp ~mod+ rb2 |
| rd1 = rcm ~mod+ rb6 |
| rd2 = rcm ~mod- rb6 |
| rd3 = rcp ~mod- rb2 |
| |
| // Odd columns. |
| |
| in1 = intermediate[0x19] |
| in3 = intermediate[0x1B] |
| in5 = intermediate[0x1D] |
| in7 = intermediate[0x1F] |
| |
| ri51 = in5 ~mod+ in1 |
| ri53 = in5 ~mod+ in3 |
| ri71 = in7 ~mod+ in1 |
| ri73 = in7 ~mod+ in3 |
| |
| rj = (ri73 ~mod+ ri51) ~mod* 0x25A1 |
| |
| rk1 = in1 ~mod* 0x300B |
| rk3 = in3 ~mod* 0x6254 |
| rk5 = in5 ~mod* 0x41B3 |
| rk7 = in7 ~mod* 0x098E |
| |
| ri51 ~mod*= 0xFFFF_F384 |
| ri53 ~mod*= 0xFFFF_ADFD |
| ri71 ~mod*= 0xFFFF_E333 |
| ri73 ~mod*= 0xFFFF_C13B |
| |
| rl51 = ri51 ~mod+ rj |
| rl73 = ri73 ~mod+ rj |
| |
| rk1 ~mod+= ri71 ~mod+ rl51 |
| rk3 ~mod+= ri53 ~mod+ rl73 |
| rk5 ~mod+= ri53 ~mod+ rl51 |
| rk7 ~mod+= ri71 ~mod+ rl73 |
| |
| // Combine columns. |
| |
| if args.dst_stride > args.dst_buffer.length() { |
| return nothing |
| } |
| assert 8 <= args.dst_buffer.length() via "a <= b: a <= c; c <= b"(c: args.dst_stride) |
| |
| args.dst_buffer[0] = BIAS_AND_CLAMP[(((rd0 ~mod+ rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[7] = BIAS_AND_CLAMP[(((rd0 ~mod- rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[1] = BIAS_AND_CLAMP[(((rd1 ~mod+ rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[6] = BIAS_AND_CLAMP[(((rd1 ~mod- rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[2] = BIAS_AND_CLAMP[(((rd2 ~mod+ rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[5] = BIAS_AND_CLAMP[(((rd2 ~mod- rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[3] = BIAS_AND_CLAMP[(((rd3 ~mod+ rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[4] = BIAS_AND_CLAMP[(((rd3 ~mod- rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| |
| args.dst_buffer = args.dst_buffer[args.dst_stride ..] |
| |
| // ==== Second pass, row 4. |
| |
| // Even columns. |
| |
| in2 = intermediate[0x22] |
| in6 = intermediate[0x26] |
| |
| ra = (in2 ~mod+ in6) ~mod* 0x1151 |
| |
| rb2 = ra ~mod+ (in2 ~mod* 0x187E) |
| rb6 = ra ~mod- (in6 ~mod* 0x3B21) |
| |
| in0 = intermediate[0x20] |
| in4 = intermediate[0x24] |
| |
| rcp = (in0 ~mod+ in4) ~mod<< 13 |
| rcm = (in0 ~mod- in4) ~mod<< 13 |
| |
| rd0 = rcp ~mod+ rb2 |
| rd1 = rcm ~mod+ rb6 |
| rd2 = rcm ~mod- rb6 |
| rd3 = rcp ~mod- rb2 |
| |
| // Odd columns. |
| |
| in1 = intermediate[0x21] |
| in3 = intermediate[0x23] |
| in5 = intermediate[0x25] |
| in7 = intermediate[0x27] |
| |
| ri51 = in5 ~mod+ in1 |
| ri53 = in5 ~mod+ in3 |
| ri71 = in7 ~mod+ in1 |
| ri73 = in7 ~mod+ in3 |
| |
| rj = (ri73 ~mod+ ri51) ~mod* 0x25A1 |
| |
| rk1 = in1 ~mod* 0x300B |
| rk3 = in3 ~mod* 0x6254 |
| rk5 = in5 ~mod* 0x41B3 |
| rk7 = in7 ~mod* 0x098E |
| |
| ri51 ~mod*= 0xFFFF_F384 |
| ri53 ~mod*= 0xFFFF_ADFD |
| ri71 ~mod*= 0xFFFF_E333 |
| ri73 ~mod*= 0xFFFF_C13B |
| |
| rl51 = ri51 ~mod+ rj |
| rl73 = ri73 ~mod+ rj |
| |
| rk1 ~mod+= ri71 ~mod+ rl51 |
| rk3 ~mod+= ri53 ~mod+ rl73 |
| rk5 ~mod+= ri53 ~mod+ rl51 |
| rk7 ~mod+= ri71 ~mod+ rl73 |
| |
| // Combine columns. |
| |
| if args.dst_stride > args.dst_buffer.length() { |
| return nothing |
| } |
| assert 8 <= args.dst_buffer.length() via "a <= b: a <= c; c <= b"(c: args.dst_stride) |
| |
| args.dst_buffer[0] = BIAS_AND_CLAMP[(((rd0 ~mod+ rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[7] = BIAS_AND_CLAMP[(((rd0 ~mod- rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[1] = BIAS_AND_CLAMP[(((rd1 ~mod+ rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[6] = BIAS_AND_CLAMP[(((rd1 ~mod- rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[2] = BIAS_AND_CLAMP[(((rd2 ~mod+ rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[5] = BIAS_AND_CLAMP[(((rd2 ~mod- rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[3] = BIAS_AND_CLAMP[(((rd3 ~mod+ rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[4] = BIAS_AND_CLAMP[(((rd3 ~mod- rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| |
| args.dst_buffer = args.dst_buffer[args.dst_stride ..] |
| |
| // ==== Second pass, row 5. |
| |
| // Even columns. |
| |
| in2 = intermediate[0x2A] |
| in6 = intermediate[0x2E] |
| |
| ra = (in2 ~mod+ in6) ~mod* 0x1151 |
| |
| rb2 = ra ~mod+ (in2 ~mod* 0x187E) |
| rb6 = ra ~mod- (in6 ~mod* 0x3B21) |
| |
| in0 = intermediate[0x28] |
| in4 = intermediate[0x2C] |
| |
| rcp = (in0 ~mod+ in4) ~mod<< 13 |
| rcm = (in0 ~mod- in4) ~mod<< 13 |
| |
| rd0 = rcp ~mod+ rb2 |
| rd1 = rcm ~mod+ rb6 |
| rd2 = rcm ~mod- rb6 |
| rd3 = rcp ~mod- rb2 |
| |
| // Odd columns. |
| |
| in1 = intermediate[0x29] |
| in3 = intermediate[0x2B] |
| in5 = intermediate[0x2D] |
| in7 = intermediate[0x2F] |
| |
| ri51 = in5 ~mod+ in1 |
| ri53 = in5 ~mod+ in3 |
| ri71 = in7 ~mod+ in1 |
| ri73 = in7 ~mod+ in3 |
| |
| rj = (ri73 ~mod+ ri51) ~mod* 0x25A1 |
| |
| rk1 = in1 ~mod* 0x300B |
| rk3 = in3 ~mod* 0x6254 |
| rk5 = in5 ~mod* 0x41B3 |
| rk7 = in7 ~mod* 0x098E |
| |
| ri51 ~mod*= 0xFFFF_F384 |
| ri53 ~mod*= 0xFFFF_ADFD |
| ri71 ~mod*= 0xFFFF_E333 |
| ri73 ~mod*= 0xFFFF_C13B |
| |
| rl51 = ri51 ~mod+ rj |
| rl73 = ri73 ~mod+ rj |
| |
| rk1 ~mod+= ri71 ~mod+ rl51 |
| rk3 ~mod+= ri53 ~mod+ rl73 |
| rk5 ~mod+= ri53 ~mod+ rl51 |
| rk7 ~mod+= ri71 ~mod+ rl73 |
| |
| // Combine columns. |
| |
| if args.dst_stride > args.dst_buffer.length() { |
| return nothing |
| } |
| assert 8 <= args.dst_buffer.length() via "a <= b: a <= c; c <= b"(c: args.dst_stride) |
| |
| args.dst_buffer[0] = BIAS_AND_CLAMP[(((rd0 ~mod+ rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[7] = BIAS_AND_CLAMP[(((rd0 ~mod- rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[1] = BIAS_AND_CLAMP[(((rd1 ~mod+ rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[6] = BIAS_AND_CLAMP[(((rd1 ~mod- rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[2] = BIAS_AND_CLAMP[(((rd2 ~mod+ rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[5] = BIAS_AND_CLAMP[(((rd2 ~mod- rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[3] = BIAS_AND_CLAMP[(((rd3 ~mod+ rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[4] = BIAS_AND_CLAMP[(((rd3 ~mod- rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| |
| args.dst_buffer = args.dst_buffer[args.dst_stride ..] |
| |
| // ==== Second pass, row 6. |
| |
| // Even columns. |
| |
| in2 = intermediate[0x32] |
| in6 = intermediate[0x36] |
| |
| ra = (in2 ~mod+ in6) ~mod* 0x1151 |
| |
| rb2 = ra ~mod+ (in2 ~mod* 0x187E) |
| rb6 = ra ~mod- (in6 ~mod* 0x3B21) |
| |
| in0 = intermediate[0x30] |
| in4 = intermediate[0x34] |
| |
| rcp = (in0 ~mod+ in4) ~mod<< 13 |
| rcm = (in0 ~mod- in4) ~mod<< 13 |
| |
| rd0 = rcp ~mod+ rb2 |
| rd1 = rcm ~mod+ rb6 |
| rd2 = rcm ~mod- rb6 |
| rd3 = rcp ~mod- rb2 |
| |
| // Odd columns. |
| |
| in1 = intermediate[0x31] |
| in3 = intermediate[0x33] |
| in5 = intermediate[0x35] |
| in7 = intermediate[0x37] |
| |
| ri51 = in5 ~mod+ in1 |
| ri53 = in5 ~mod+ in3 |
| ri71 = in7 ~mod+ in1 |
| ri73 = in7 ~mod+ in3 |
| |
| rj = (ri73 ~mod+ ri51) ~mod* 0x25A1 |
| |
| rk1 = in1 ~mod* 0x300B |
| rk3 = in3 ~mod* 0x6254 |
| rk5 = in5 ~mod* 0x41B3 |
| rk7 = in7 ~mod* 0x098E |
| |
| ri51 ~mod*= 0xFFFF_F384 |
| ri53 ~mod*= 0xFFFF_ADFD |
| ri71 ~mod*= 0xFFFF_E333 |
| ri73 ~mod*= 0xFFFF_C13B |
| |
| rl51 = ri51 ~mod+ rj |
| rl73 = ri73 ~mod+ rj |
| |
| rk1 ~mod+= ri71 ~mod+ rl51 |
| rk3 ~mod+= ri53 ~mod+ rl73 |
| rk5 ~mod+= ri53 ~mod+ rl51 |
| rk7 ~mod+= ri71 ~mod+ rl73 |
| |
| // Combine columns. |
| |
| if args.dst_stride > args.dst_buffer.length() { |
| return nothing |
| } |
| assert 8 <= args.dst_buffer.length() via "a <= b: a <= c; c <= b"(c: args.dst_stride) |
| |
| args.dst_buffer[0] = BIAS_AND_CLAMP[(((rd0 ~mod+ rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[7] = BIAS_AND_CLAMP[(((rd0 ~mod- rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[1] = BIAS_AND_CLAMP[(((rd1 ~mod+ rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[6] = BIAS_AND_CLAMP[(((rd1 ~mod- rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[2] = BIAS_AND_CLAMP[(((rd2 ~mod+ rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[5] = BIAS_AND_CLAMP[(((rd2 ~mod- rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[3] = BIAS_AND_CLAMP[(((rd3 ~mod+ rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[4] = BIAS_AND_CLAMP[(((rd3 ~mod- rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| |
| args.dst_buffer = args.dst_buffer[args.dst_stride ..] |
| |
| // ==== Second pass, row 7. |
| |
| // Even columns. |
| |
| in2 = intermediate[0x3A] |
| in6 = intermediate[0x3E] |
| |
| ra = (in2 ~mod+ in6) ~mod* 0x1151 |
| |
| rb2 = ra ~mod+ (in2 ~mod* 0x187E) |
| rb6 = ra ~mod- (in6 ~mod* 0x3B21) |
| |
| in0 = intermediate[0x38] |
| in4 = intermediate[0x3C] |
| |
| rcp = (in0 ~mod+ in4) ~mod<< 13 |
| rcm = (in0 ~mod- in4) ~mod<< 13 |
| |
| rd0 = rcp ~mod+ rb2 |
| rd1 = rcm ~mod+ rb6 |
| rd2 = rcm ~mod- rb6 |
| rd3 = rcp ~mod- rb2 |
| |
| // Odd columns. |
| |
| in1 = intermediate[0x39] |
| in3 = intermediate[0x3B] |
| in5 = intermediate[0x3D] |
| in7 = intermediate[0x3F] |
| |
| ri51 = in5 ~mod+ in1 |
| ri53 = in5 ~mod+ in3 |
| ri71 = in7 ~mod+ in1 |
| ri73 = in7 ~mod+ in3 |
| |
| rj = (ri73 ~mod+ ri51) ~mod* 0x25A1 |
| |
| rk1 = in1 ~mod* 0x300B |
| rk3 = in3 ~mod* 0x6254 |
| rk5 = in5 ~mod* 0x41B3 |
| rk7 = in7 ~mod* 0x098E |
| |
| ri51 ~mod*= 0xFFFF_F384 |
| ri53 ~mod*= 0xFFFF_ADFD |
| ri71 ~mod*= 0xFFFF_E333 |
| ri73 ~mod*= 0xFFFF_C13B |
| |
| rl51 = ri51 ~mod+ rj |
| rl73 = ri73 ~mod+ rj |
| |
| rk1 ~mod+= ri71 ~mod+ rl51 |
| rk3 ~mod+= ri53 ~mod+ rl73 |
| rk5 ~mod+= ri53 ~mod+ rl51 |
| rk7 ~mod+= ri71 ~mod+ rl73 |
| |
| // Combine columns. |
| |
| if 8 > args.dst_buffer.length() { |
| return nothing |
| } |
| |
| args.dst_buffer[0] = BIAS_AND_CLAMP[(((rd0 ~mod+ rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[7] = BIAS_AND_CLAMP[(((rd0 ~mod- rk1) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[1] = BIAS_AND_CLAMP[(((rd1 ~mod+ rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[6] = BIAS_AND_CLAMP[(((rd1 ~mod- rk3) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[2] = BIAS_AND_CLAMP[(((rd2 ~mod+ rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[5] = BIAS_AND_CLAMP[(((rd2 ~mod- rk5) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[3] = BIAS_AND_CLAMP[(((rd3 ~mod+ rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| args.dst_buffer[4] = BIAS_AND_CLAMP[(((rd3 ~mod- rk7) ~mod+ (1 << 17)) >> 18) & 1023] |
| |
| // -------- END generated by script/print-jpeg-idct-code.go |
| } |