| //#define _DEBUG |
| |
| #ifndef NULL |
| #define NULL 0L |
| #endif |
| |
| typedef char int8_t; |
| typedef uchar uint8_t; |
| |
| typedef short int16_t; |
| typedef ushort uint16_t; |
| |
| typedef int int32_t; |
| typedef uint uint32_t; |
| |
| typedef long int64_t; |
| typedef ulong uint64_t; |
| |
| typedef uchar4 color_rgba; |
| |
| #define UINT32_MAX 0xFFFFFFFFUL |
| #define INT64_MAX LONG_MAX |
| #define UINT64_MAX ULONG_MAX |
| |
| int squarei(int a) { return a * a; } |
| |
| #ifdef _DEBUG |
| inline void internal_assert(bool x, constant char *pMsg, int line) |
| { |
| if (!x) |
| printf("assert() failed on line %i: %s\n", line, pMsg); |
| } |
| #define assert(x) internal_assert(x, #x, __LINE__) |
| #else |
| #define assert(x) |
| #endif |
| |
| inline uint8_t clamp255(int x) |
| { |
| return clamp(x, 0, 255); |
| } |
| |
| inline uint8_t clamp255_flag(int x, bool *pDid_clamp) |
| { |
| if (x < 0) |
| { |
| *pDid_clamp = true; |
| return 0; |
| } |
| else if (x > 255) |
| { |
| *pDid_clamp = true; |
| return 255; |
| } |
| |
| return (uint8_t)(x); |
| } |
| |
| typedef struct __attribute__ ((packed)) encode_etc1s_param_struct_tag |
| { |
| uint32_t m_total_blocks; |
| int m_perceptual; |
| int m_total_perms; |
| } encode_etc1s_param_struct; |
| |
| typedef struct __attribute__ ((packed)) pixel_block_tag |
| { |
| color_rgba m_pixels[16]; // [y*4+x] |
| } pixel_block; |
| |
| uint color_distance(bool perceptual, color_rgba e1, color_rgba e2, bool alpha) |
| { |
| if (perceptual) |
| { |
| #if 0 |
| float3 delta_rgb = (float3)(e1.x - e2.x, e1.y - e2.y, e1.z - e2.z); |
| |
| float3 delta_ycbcr; |
| delta_ycbcr.x = dot(delta_rgb, (float3)(.2126f, .7152f, .0722f)); // y |
| delta_ycbcr.y = delta_rgb.x - delta_ycbcr.x; // cr |
| delta_ycbcr.z = delta_rgb.z - delta_ycbcr.x; // cb |
| |
| delta_ycbcr *= delta_ycbcr; |
| |
| float d = dot(delta_ycbcr, (float3)(1.0f, 0.203125f, 0.0234375f)); |
| |
| if (alpha) |
| { |
| int delta_a = e1.w - e2.w; |
| d += delta_a * delta_a; |
| } |
| |
| d = clamp(d * 256.0f + .5f, 0.0f, (float)UINT32_MAX); |
| |
| return (uint)(d); |
| #else |
| // This matches the CPU code, which is useful for testing. |
| int dr = e1.x - e2.x; |
| int dg = e1.y - e2.y; |
| int db = e1.z - e2.z; |
| |
| int delta_l = dr * 27 + dg * 92 + db * 9; |
| int delta_cr = dr * 128 - delta_l; |
| int delta_cb = db * 128 - delta_l; |
| |
| uint id = ((uint)(delta_l * delta_l) >> 7U) + |
| ((((uint)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) + |
| ((((uint)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U); |
| |
| if (alpha) |
| { |
| int da = (e1.w - e2.w) << 7; |
| id += ((uint)(da * da) >> 7U); |
| } |
| |
| return id; |
| #endif |
| } |
| else if (alpha) |
| { |
| int dr = e1.x - e2.x; |
| int dg = e1.y - e2.y; |
| int db = e1.z - e2.z; |
| int da = e1.w - e2.w; |
| return dr * dr + dg * dg + db * db + da * da; |
| } |
| else |
| { |
| int dr = e1.x - e2.x; |
| int dg = e1.y - e2.y; |
| int db = e1.z - e2.z; |
| return dr * dr + dg * dg + db * db; |
| } |
| } |
| |
| typedef struct __attribute__ ((packed)) etc_block_tag |
| { |
| // big endian uint64: |
| // bit ofs: 56 48 40 32 24 16 8 0 |
| // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 |
| union |
| { |
| uint64_t m_uint64; |
| uint8_t m_bytes[8]; |
| }; |
| |
| } etc_block; |
| |
| enum etc_constants |
| { |
| cETC1BytesPerBlock = 8U, |
| |
| cETC1SelectorBits = 2U, |
| cETC1SelectorValues = 1U << cETC1SelectorBits, |
| cETC1SelectorMask = cETC1SelectorValues - 1U, |
| |
| cETC1BlockShift = 2U, |
| cETC1BlockSize = 1U << cETC1BlockShift, |
| |
| cETC1LSBSelectorIndicesBitOffset = 0, |
| cETC1MSBSelectorIndicesBitOffset = 16, |
| |
| cETC1FlipBitOffset = 32, |
| cETC1DiffBitOffset = 33, |
| |
| cETC1IntenModifierNumBits = 3, |
| cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, |
| cETC1RightIntenModifierTableBitOffset = 34, |
| cETC1LeftIntenModifierTableBitOffset = 37, |
| |
| // Base+Delta encoding (5 bit bases, 3 bit delta) |
| cETC1BaseColorCompNumBits = 5, |
| cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, |
| |
| cETC1DeltaColorCompNumBits = 3, |
| cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, |
| cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, |
| |
| cETC1BaseColor5RBitOffset = 59, |
| cETC1BaseColor5GBitOffset = 51, |
| cETC1BaseColor5BBitOffset = 43, |
| |
| cETC1DeltaColor3RBitOffset = 56, |
| cETC1DeltaColor3GBitOffset = 48, |
| cETC1DeltaColor3BBitOffset = 40, |
| |
| // Absolute (non-delta) encoding (two 4-bit per component bases) |
| cETC1AbsColorCompNumBits = 4, |
| cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, |
| |
| cETC1AbsColor4R1BitOffset = 60, |
| cETC1AbsColor4G1BitOffset = 52, |
| cETC1AbsColor4B1BitOffset = 44, |
| |
| cETC1AbsColor4R2BitOffset = 56, |
| cETC1AbsColor4G2BitOffset = 48, |
| cETC1AbsColor4B2BitOffset = 40, |
| |
| cETC1ColorDeltaMin = -4, |
| cETC1ColorDeltaMax = 3, |
| |
| // Delta3: |
| // 0 1 2 3 4 5 6 7 |
| // 000 001 010 011 100 101 110 111 |
| // 0 1 2 3 -4 -3 -2 -1 |
| }; |
| |
| #define BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE (165) |
| constant struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = |
| { |
| { { 0, 0, 0, 8 } },{ { 0, 5, 2, 1 } },{ { 0, 6, 1, 1 } },{ { 0, 7, 0, 1 } },{ { 0, 7, 1, 0 } }, |
| { { 0, 0, 8, 0 } },{ { 0, 0, 3, 5 } },{ { 0, 1, 7, 0 } },{ { 0, 0, 4, 4 } },{ { 0, 0, 2, 6 } }, |
| { { 0, 0, 7, 1 } },{ { 0, 0, 1, 7 } },{ { 0, 0, 5, 3 } },{ { 1, 6, 0, 1 } },{ { 0, 0, 6, 2 } }, |
| { { 0, 2, 6, 0 } },{ { 2, 4, 2, 0 } },{ { 0, 3, 5, 0 } },{ { 3, 3, 1, 1 } },{ { 4, 2, 0, 2 } }, |
| { { 1, 5, 2, 0 } },{ { 0, 5, 3, 0 } },{ { 0, 6, 2, 0 } },{ { 2, 4, 1, 1 } },{ { 5, 1, 0, 2 } }, |
| { { 6, 1, 1, 0 } },{ { 3, 3, 0, 2 } },{ { 6, 0, 0, 2 } },{ { 0, 8, 0, 0 } },{ { 6, 1, 0, 1 } }, |
| { { 0, 1, 6, 1 } },{ { 1, 6, 1, 0 } },{ { 4, 1, 3, 0 } },{ { 0, 2, 5, 1 } },{ { 5, 0, 3, 0 } }, |
| { { 5, 3, 0, 0 } },{ { 0, 1, 5, 2 } },{ { 0, 3, 4, 1 } },{ { 2, 5, 1, 0 } },{ { 1, 7, 0, 0 } }, |
| { { 0, 1, 4, 3 } },{ { 6, 0, 2, 0 } },{ { 0, 4, 4, 0 } },{ { 2, 6, 0, 0 } },{ { 0, 2, 4, 2 } }, |
| { { 0, 5, 1, 2 } },{ { 0, 6, 0, 2 } },{ { 3, 5, 0, 0 } },{ { 0, 4, 3, 1 } },{ { 3, 4, 1, 0 } }, |
| { { 4, 3, 1, 0 } },{ { 1, 5, 0, 2 } },{ { 0, 3, 3, 2 } },{ { 1, 4, 1, 2 } },{ { 0, 4, 2, 2 } }, |
| { { 2, 3, 3, 0 } },{ { 4, 4, 0, 0 } },{ { 1, 2, 4, 1 } },{ { 0, 5, 0, 3 } },{ { 0, 1, 3, 4 } }, |
| { { 1, 5, 1, 1 } },{ { 1, 4, 2, 1 } },{ { 1, 3, 2, 2 } },{ { 5, 2, 1, 0 } },{ { 1, 3, 3, 1 } }, |
| { { 0, 1, 2, 5 } },{ { 1, 1, 5, 1 } },{ { 0, 3, 2, 3 } },{ { 2, 5, 0, 1 } },{ { 3, 2, 2, 1 } }, |
| { { 2, 3, 0, 3 } },{ { 1, 4, 3, 0 } },{ { 2, 2, 1, 3 } },{ { 6, 2, 0, 0 } },{ { 1, 0, 6, 1 } }, |
| { { 3, 3, 2, 0 } },{ { 7, 1, 0, 0 } },{ { 3, 1, 4, 0 } },{ { 0, 2, 3, 3 } },{ { 0, 4, 1, 3 } }, |
| { { 0, 4, 0, 4 } },{ { 0, 1, 0, 7 } },{ { 2, 0, 5, 1 } },{ { 2, 0, 4, 2 } },{ { 3, 0, 2, 3 } }, |
| { { 2, 2, 4, 0 } },{ { 2, 2, 3, 1 } },{ { 4, 0, 3, 1 } },{ { 3, 2, 3, 0 } },{ { 2, 3, 2, 1 } }, |
| { { 1, 3, 4, 0 } },{ { 7, 0, 1, 0 } },{ { 3, 0, 4, 1 } },{ { 1, 0, 5, 2 } },{ { 8, 0, 0, 0 } }, |
| { { 3, 0, 1, 4 } },{ { 4, 1, 1, 2 } },{ { 4, 0, 2, 2 } },{ { 1, 2, 5, 0 } },{ { 4, 2, 1, 1 } }, |
| { { 3, 4, 0, 1 } },{ { 2, 0, 3, 3 } },{ { 5, 0, 1, 2 } },{ { 5, 0, 0, 3 } },{ { 2, 4, 0, 2 } }, |
| { { 2, 1, 4, 1 } },{ { 4, 0, 1, 3 } },{ { 2, 1, 5, 0 } },{ { 4, 2, 2, 0 } },{ { 4, 0, 4, 0 } }, |
| { { 1, 0, 4, 3 } },{ { 1, 4, 0, 3 } },{ { 3, 0, 3, 2 } },{ { 4, 3, 0, 1 } },{ { 0, 1, 1, 6 } }, |
| { { 1, 3, 1, 3 } },{ { 0, 2, 2, 4 } },{ { 2, 0, 2, 4 } },{ { 5, 1, 1, 1 } },{ { 3, 0, 5, 0 } }, |
| { { 2, 3, 1, 2 } },{ { 3, 0, 0, 5 } },{ { 0, 3, 1, 4 } },{ { 5, 0, 2, 1 } },{ { 2, 1, 3, 2 } }, |
| { { 2, 0, 6, 0 } },{ { 3, 1, 3, 1 } },{ { 5, 1, 2, 0 } },{ { 1, 0, 3, 4 } },{ { 1, 1, 6, 0 } }, |
| { { 4, 0, 0, 4 } },{ { 2, 0, 1, 5 } },{ { 0, 3, 0, 5 } },{ { 1, 3, 0, 4 } },{ { 4, 1, 2, 1 } }, |
| { { 1, 2, 3, 2 } },{ { 3, 1, 0, 4 } },{ { 5, 2, 0, 1 } },{ { 1, 2, 2, 3 } },{ { 3, 2, 1, 2 } }, |
| { { 2, 2, 2, 2 } },{ { 6, 0, 1, 1 } },{ { 1, 2, 1, 4 } },{ { 1, 1, 4, 2 } },{ { 3, 2, 0, 3 } }, |
| { { 1, 2, 0, 5 } },{ { 1, 0, 7, 0 } },{ { 3, 1, 2, 2 } },{ { 1, 0, 2, 5 } },{ { 2, 0, 0, 6 } }, |
| { { 2, 1, 1, 4 } },{ { 2, 2, 0, 4 } },{ { 1, 1, 3, 3 } },{ { 7, 0, 0, 1 } },{ { 1, 0, 0, 7 } }, |
| { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, |
| { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } |
| }; |
| |
| constant int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = |
| { |
| { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, |
| { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } |
| }; |
| |
| constant uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; |
| constant uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; |
| |
| uint32_t etc_block_get_byte_bits(const etc_block *p, uint32_t ofs, uint32_t num) |
| { |
| assert((ofs + num) <= 64U); |
| assert(num && (num <= 8U)); |
| assert((ofs >> 3) == ((ofs + num - 1) >> 3)); |
| const uint32_t byte_ofs = 7 - (ofs >> 3); |
| const uint32_t byte_bit_ofs = ofs & 7; |
| return (p->m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); |
| } |
| |
| void etc_block_set_byte_bits(etc_block *p, uint32_t ofs, uint32_t num, uint32_t bits) |
| { |
| assert((ofs + num) <= 64U); |
| assert(num && (num < 32U)); |
| assert((ofs >> 3) == ((ofs + num - 1) >> 3)); |
| assert(bits < (1U << num)); |
| const uint32_t byte_ofs = 7 - (ofs >> 3); |
| const uint32_t byte_bit_ofs = ofs & 7; |
| const uint32_t mask = (1 << num) - 1; |
| p->m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); |
| p->m_bytes[byte_ofs] |= (bits << byte_bit_ofs); |
| } |
| |
| bool etc_block_get_flip_bit(const etc_block *p) |
| { |
| return (p->m_bytes[3] & 1) != 0; |
| } |
| |
| void etc_block_set_flip_bit(etc_block *p, bool flip) |
| { |
| p->m_bytes[3] &= ~1; |
| p->m_bytes[3] |= (uint8_t)(flip); |
| } |
| |
| bool etc_block_get_diff_bit(const etc_block *p) |
| { |
| return (p->m_bytes[3] & 2) != 0; |
| } |
| |
| void etc_block_set_diff_bit(etc_block *p, bool diff) |
| { |
| p->m_bytes[3] &= ~2; |
| p->m_bytes[3] |= ((uint32_t)(diff) << 1); |
| } |
| |
| // Returns intensity modifier table (0-7) used by subblock subblock_id. |
| // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) |
| uint32_t etc_block_get_inten_table(const etc_block *p, uint32_t subblock_id) |
| { |
| assert(subblock_id < 2); |
| const uint32_t ofs = subblock_id ? 2 : 5; |
| return (p->m_bytes[3] >> ofs) & 7; |
| } |
| |
| // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) |
| void etc_block_set_inten_table(etc_block *p, uint32_t subblock_id, uint32_t t) |
| { |
| assert(subblock_id < 2); |
| assert(t < 8); |
| const uint32_t ofs = subblock_id ? 2 : 5; |
| p->m_bytes[3] &= ~(7 << ofs); |
| p->m_bytes[3] |= (t << ofs); |
| } |
| |
| void etc_block_set_inten_tables_etc1s(etc_block *p, uint32_t t) |
| { |
| etc_block_set_inten_table(p, 0, t); |
| etc_block_set_inten_table(p, 1, t); |
| } |
| |
| uint32_t etc_block_get_raw_selector(const etc_block *pBlock, uint32_t x, uint32_t y) |
| { |
| assert((x | y) < 4); |
| |
| const uint32_t bit_index = x * 4 + y; |
| const uint32_t byte_bit_ofs = bit_index & 7; |
| const uint8_t *p = &pBlock->m_bytes[7 - (bit_index >> 3)]; |
| const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1; |
| const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1; |
| const uint32_t val = lsb | (msb << 1); |
| |
| return val; |
| } |
| |
| // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. |
| uint32_t etc_block_get_selector(const etc_block *pBlock, uint32_t x, uint32_t y) |
| { |
| return g_etc1_to_selector_index[etc_block_get_raw_selector(pBlock, x, y)]; |
| } |
| |
| // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. |
| void etc_block_set_selector(etc_block *pBlock, uint32_t x, uint32_t y, uint32_t val) |
| { |
| assert((x | y | val) < 4); |
| const uint32_t bit_index = x * 4 + y; |
| |
| uint8_t *p = &pBlock->m_bytes[7 - (bit_index >> 3)]; |
| |
| const uint32_t byte_bit_ofs = bit_index & 7; |
| const uint32_t mask = 1 << byte_bit_ofs; |
| |
| const uint32_t etc1_val = g_selector_index_to_etc1[val]; |
| |
| const uint32_t lsb = etc1_val & 1; |
| const uint32_t msb = etc1_val >> 1; |
| |
| p[0] &= ~mask; |
| p[0] |= (lsb << byte_bit_ofs); |
| |
| p[-2] &= ~mask; |
| p[-2] |= (msb << byte_bit_ofs); |
| } |
| |
| void etc_block_set_base4_color(etc_block *pBlock, uint32_t idx, uint16_t c) |
| { |
| if (idx) |
| { |
| etc_block_set_byte_bits(pBlock, cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); |
| etc_block_set_byte_bits(pBlock, cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); |
| etc_block_set_byte_bits(pBlock, cETC1AbsColor4B2BitOffset, 4, c & 15); |
| } |
| else |
| { |
| etc_block_set_byte_bits(pBlock, cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); |
| etc_block_set_byte_bits(pBlock, cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); |
| etc_block_set_byte_bits(pBlock, cETC1AbsColor4B1BitOffset, 4, c & 15); |
| } |
| } |
| |
| uint16_t etc_block_get_base4_color(const etc_block *pBlock, uint32_t idx) |
| { |
| uint32_t r, g, b; |
| if (idx) |
| { |
| r = etc_block_get_byte_bits(pBlock, cETC1AbsColor4R2BitOffset, 4); |
| g = etc_block_get_byte_bits(pBlock, cETC1AbsColor4G2BitOffset, 4); |
| b = etc_block_get_byte_bits(pBlock, cETC1AbsColor4B2BitOffset, 4); |
| } |
| else |
| { |
| r = etc_block_get_byte_bits(pBlock, cETC1AbsColor4R1BitOffset, 4); |
| g = etc_block_get_byte_bits(pBlock, cETC1AbsColor4G1BitOffset, 4); |
| b = etc_block_get_byte_bits(pBlock, cETC1AbsColor4B1BitOffset, 4); |
| } |
| return (uint16_t)(b | (g << 4U) | (r << 8U)); |
| } |
| |
| void etc_block_set_base5_color(etc_block *pBlock, uint16_t c) |
| { |
| etc_block_set_byte_bits(pBlock, cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); |
| etc_block_set_byte_bits(pBlock, cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); |
| etc_block_set_byte_bits(pBlock, cETC1BaseColor5BBitOffset, 5, c & 31); |
| } |
| |
| uint16_t etc_block_get_base5_color(const etc_block *pBlock) |
| { |
| const uint32_t r = etc_block_get_byte_bits(pBlock, cETC1BaseColor5RBitOffset, 5); |
| const uint32_t g = etc_block_get_byte_bits(pBlock, cETC1BaseColor5GBitOffset, 5); |
| const uint32_t b = etc_block_get_byte_bits(pBlock, cETC1BaseColor5BBitOffset, 5); |
| return (uint16_t)(b | (g << 5U) | (r << 10U)); |
| } |
| |
| void etc_block_set_delta3_color(etc_block *pBlock, uint16_t c) |
| { |
| etc_block_set_byte_bits(pBlock, cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); |
| etc_block_set_byte_bits(pBlock, cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); |
| etc_block_set_byte_bits(pBlock, cETC1DeltaColor3BBitOffset, 3, c & 7); |
| } |
| |
| uint16_t etc_block_get_delta3_color(const etc_block *pBlock) |
| { |
| const uint32_t r = etc_block_get_byte_bits(pBlock, cETC1DeltaColor3RBitOffset, 3); |
| const uint32_t g = etc_block_get_byte_bits(pBlock, cETC1DeltaColor3GBitOffset, 3); |
| const uint32_t b = etc_block_get_byte_bits(pBlock, cETC1DeltaColor3BBitOffset, 3); |
| return (uint16_t)(b | (g << 3U) | (r << 6U)); |
| } |
| |
| void etc_block_unpack_delta3(int *pR, int *pG, int *pB, uint16_t packed_delta3) |
| { |
| int r = (packed_delta3 >> 6) & 7; |
| int g = (packed_delta3 >> 3) & 7; |
| int b = packed_delta3 & 7; |
| if (r >= 4) r -= 8; |
| if (g >= 4) g -= 8; |
| if (b >= 4) b -= 8; |
| *pR = r; |
| *pG = g; |
| *pB = b; |
| } |
| |
| bool etc_block_unpack_color5_delta3(color_rgba *pResult, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) |
| { |
| int dr, dg, db; |
| etc_block_unpack_delta3(&dr, &dg, &db, packed_delta3); |
| |
| int b = (packed_color5 & 31U) + db; |
| int g = ((packed_color5 >> 5U) & 31U) + dg; |
| int r = ((packed_color5 >> 10U) & 31U) + dr; |
| |
| bool success = true; |
| if ((uint32_t)(r | g | b) > 31U) |
| { |
| success = false; |
| r = clamp(r, 0, 31); |
| g = clamp(g, 0, 31); |
| b = clamp(b, 0, 31); |
| } |
| |
| if (scaled) |
| { |
| b = (b << 3U) | (b >> 2U); |
| g = (g << 3U) | (g >> 2U); |
| r = (r << 3U) | (r >> 2U); |
| } |
| |
| *pResult = (color_rgba)(r, g, b, min(alpha, 255U)); |
| return success; |
| } |
| |
| color_rgba etc_block_unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) |
| { |
| uint32_t b = packed_color5 & 31U; |
| uint32_t g = (packed_color5 >> 5U) & 31U; |
| uint32_t r = (packed_color5 >> 10U) & 31U; |
| |
| if (scaled) |
| { |
| b = (b << 3U) | (b >> 2U); |
| g = (g << 3U) | (g >> 2U); |
| r = (r << 3U) | (r >> 2U); |
| } |
| |
| return (color_rgba)(r, g, b, min(alpha, 255U)); |
| } |
| |
| color_rgba etc_block_unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) |
| { |
| uint32_t b = packed_color4 & 15U; |
| uint32_t g = (packed_color4 >> 4U) & 15U; |
| uint32_t r = (packed_color4 >> 8U) & 15U; |
| |
| if (scaled) |
| { |
| b = (b << 4U) | b; |
| g = (g << 4U) | g; |
| r = (r << 4U) | r; |
| } |
| |
| return (color_rgba)(r, g, b, min(alpha, 255U)); |
| } |
| |
| // false if didn't clamp, true if any component clamped |
| bool etc_block_get_block_colors(const etc_block *pBlock, color_rgba* pBlock_colors, uint32_t subblock_index) |
| { |
| color_rgba b; |
| |
| if (etc_block_get_diff_bit(pBlock)) |
| { |
| if (subblock_index) |
| etc_block_unpack_color5_delta3(&b, etc_block_get_base5_color(pBlock), etc_block_get_delta3_color(pBlock), true, 255); |
| else |
| b = etc_block_unpack_color5(etc_block_get_base5_color(pBlock), true, 255); |
| } |
| else |
| { |
| b = etc_block_unpack_color4(etc_block_get_base4_color(pBlock, subblock_index), true, 255); |
| } |
| |
| constant int* pInten_table = g_etc1_inten_tables[etc_block_get_inten_table(pBlock, subblock_index)]; |
| |
| bool dc = false; |
| pBlock_colors[0] = (color_rgba)(clamp255_flag(b.x + pInten_table[0], &dc), clamp255_flag(b.y + pInten_table[0], &dc), clamp255_flag(b.z + pInten_table[0], &dc), 255); |
| pBlock_colors[1] = (color_rgba)(clamp255_flag(b.x + pInten_table[1], &dc), clamp255_flag(b.y + pInten_table[1], &dc), clamp255_flag(b.z + pInten_table[1], &dc), 255); |
| pBlock_colors[2] = (color_rgba)(clamp255_flag(b.x + pInten_table[2], &dc), clamp255_flag(b.y + pInten_table[2], &dc), clamp255_flag(b.z + pInten_table[2], &dc), 255); |
| pBlock_colors[3] = (color_rgba)(clamp255_flag(b.x + pInten_table[3], &dc), clamp255_flag(b.y + pInten_table[3], &dc), clamp255_flag(b.z + pInten_table[3], &dc), 255); |
| return dc; |
| } |
| |
| void get_block_colors5(color_rgba *pBlock_colors, const color_rgba *pBase_color5, uint32_t inten_table, bool scaled /* false */) |
| { |
| color_rgba b = *pBase_color5; |
| |
| if (!scaled) |
| { |
| b.x = (b.x << 3) | (b.x >> 2); |
| b.y = (b.y << 3) | (b.y >> 2); |
| b.z = (b.z << 3) | (b.z >> 2); |
| } |
| |
| constant int* pInten_table = g_etc1_inten_tables[inten_table]; |
| |
| pBlock_colors[0] = (color_rgba)(clamp255(b.x + pInten_table[0]), clamp255(b.y + pInten_table[0]), clamp255(b.z + pInten_table[0]), 255); |
| pBlock_colors[1] = (color_rgba)(clamp255(b.x + pInten_table[1]), clamp255(b.y + pInten_table[1]), clamp255(b.z + pInten_table[1]), 255); |
| pBlock_colors[2] = (color_rgba)(clamp255(b.x + pInten_table[2]), clamp255(b.y + pInten_table[2]), clamp255(b.z + pInten_table[2]), 255); |
| pBlock_colors[3] = (color_rgba)(clamp255(b.x + pInten_table[3]), clamp255(b.y + pInten_table[3]), clamp255(b.z + pInten_table[3]), 255); |
| } |
| |
| uint64_t etc_block_determine_selectors(etc_block *pBlock, const color_rgba* pSource_pixels, bool perceptual, uint32_t begin_subblock /*= 0*/, uint32_t end_subblock /*= 2*/) |
| { |
| uint64_t total_error = 0; |
| |
| for (uint32_t subblock = begin_subblock; subblock < end_subblock; subblock++) |
| { |
| color_rgba block_colors[4]; |
| etc_block_get_block_colors(pBlock, block_colors, subblock); |
| |
| if (etc_block_get_flip_bit(pBlock)) |
| { |
| for (uint32_t y = 0; y < 2; y++) |
| { |
| for (uint32_t x = 0; x < 4; x++) |
| { |
| uint32_t best_selector = 0; |
| uint64_t best_error = UINT64_MAX; |
| |
| for (uint32_t s = 0; s < 4; s++) |
| { |
| uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[x + (subblock * 2 + y) * 4], false); |
| if (err < best_error) |
| { |
| best_error = err; |
| best_selector = s; |
| } |
| } |
| |
| etc_block_set_selector(pBlock, x, subblock * 2 + y, best_selector); |
| |
| total_error += best_error; |
| } |
| } |
| } |
| else |
| { |
| for (uint32_t y = 0; y < 4; y++) |
| { |
| for (uint32_t x = 0; x < 2; x++) |
| { |
| uint32_t best_selector = 0; |
| uint64_t best_error = UINT64_MAX; |
| |
| for (uint32_t s = 0; s < 4; s++) |
| { |
| uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[(subblock * 2) + x + y * 4], false); |
| if (err < best_error) |
| { |
| best_error = err; |
| best_selector = s; |
| } |
| } |
| |
| etc_block_set_selector(pBlock, subblock * 2 + x, y, best_selector); |
| |
| total_error += best_error; |
| } |
| } |
| } |
| } |
| |
| return total_error; |
| } |
| |
| uint16_t etc_block_pack_color4_rgb(uint32_t r, uint32_t g, uint32_t b, bool scaled) |
| { |
| uint32_t bias = 127; |
| |
| if (scaled) |
| { |
| r = (r * 15U + bias) / 255U; |
| g = (g * 15U + bias) / 255U; |
| b = (b * 15U + bias) / 255U; |
| } |
| |
| r = min(r, 15U); |
| g = min(g, 15U); |
| b = min(b, 15U); |
| |
| return (uint16_t)(b | (g << 4U) | (r << 8U)); |
| } |
| |
| uint16_t etc_block_pack_color4(color_rgba color, bool scaled) |
| { |
| uint32_t bias = 127; |
| return etc_block_pack_color4_rgb(color.x, color.y, color.z, scaled); |
| } |
| |
| uint16_t etc_block_pack_delta3(int r, int g, int b) |
| { |
| assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); |
| assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); |
| assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); |
| if (r < 0) r += 8; |
| if (g < 0) g += 8; |
| if (b < 0) b += 8; |
| return (uint16_t)(b | (g << 3) | (r << 6)); |
| } |
| |
| void etc_block_set_block_color4(etc_block *pBlock, color_rgba c0_unscaled, color_rgba c1_unscaled) |
| { |
| etc_block_set_diff_bit(pBlock, false); |
| |
| etc_block_set_base4_color(pBlock, 0, etc_block_pack_color4(c0_unscaled, false)); |
| etc_block_set_base4_color(pBlock, 1, etc_block_pack_color4(c1_unscaled, false)); |
| } |
| |
| uint16_t etc_block_pack_color5_rgb(uint32_t r, uint32_t g, uint32_t b, bool scaled) |
| { |
| uint32_t bias = 127; |
| |
| if (scaled) |
| { |
| r = (r * 31U + bias) / 255U; |
| g = (g * 31U + bias) / 255U; |
| b = (b * 31U + bias) / 255U; |
| } |
| |
| r = min(r, 31U); |
| g = min(g, 31U); |
| b = min(b, 31U); |
| |
| return (uint16_t)(b | (g << 5U) | (r << 10U)); |
| } |
| |
| uint16_t etc_block_pack_color5(color_rgba c, bool scaled) |
| { |
| return etc_block_pack_color5_rgb(c.x, c.y, c.z, scaled); |
| } |
| |
| void etc_block_set_block_color5(etc_block *pBlock, color_rgba c0_unscaled, color_rgba c1_unscaled) |
| { |
| etc_block_set_diff_bit(pBlock, true); |
| |
| etc_block_set_base5_color(pBlock, etc_block_pack_color5(c0_unscaled, false)); |
| |
| int dr = c1_unscaled.x - c0_unscaled.x; |
| int dg = c1_unscaled.y - c0_unscaled.y; |
| int db = c1_unscaled.z - c0_unscaled.z; |
| |
| etc_block_set_delta3_color(pBlock, etc_block_pack_delta3(dr, dg, db)); |
| } |
| |
| void etc_block_set_block_color5_etc1s(etc_block *pBlock, color_rgba c_unscaled) |
| { |
| etc_block_set_diff_bit(pBlock, true); |
| |
| etc_block_set_base5_color(pBlock, etc_block_pack_color5(c_unscaled, false)); |
| etc_block_set_delta3_color(pBlock, etc_block_pack_delta3(0, 0, 0)); |
| } |
| |
| bool etc_block_set_block_color5_check(etc_block *pBlock, color_rgba c0_unscaled, color_rgba c1_unscaled) |
| { |
| etc_block_set_diff_bit(pBlock, true); |
| |
| etc_block_set_base5_color(pBlock, etc_block_pack_color5(c0_unscaled, false)); |
| |
| int dr = c1_unscaled.x - c0_unscaled.x; |
| int dg = c1_unscaled.y - c0_unscaled.y; |
| int db = c1_unscaled.z - c0_unscaled.z; |
| |
| if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) || |
| ((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) || |
| ((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax))) |
| return false; |
| |
| etc_block_set_delta3_color(pBlock, etc_block_pack_delta3(dr, dg, db)); |
| |
| return true; |
| } |
| |
| void etc_block_pack_raw_selectors(etc_block *pBlock, const uint8_t *pSelectors) |
| { |
| uint32_t word3 = 0, word2 = 0; |
| for (uint32_t y = 0; y < 4; y++) |
| { |
| for (uint32_t x = 0; x < 4; x++) |
| { |
| const uint32_t bit_index = x * 4 + y; |
| const uint32_t s = pSelectors[x + y * 4]; |
| |
| const uint32_t lsb = s & 1, msb = s >> 1; |
| |
| word3 |= (lsb << bit_index); |
| word2 |= (msb << bit_index); |
| } |
| } |
| |
| pBlock->m_bytes[7] = (uint8_t)(word3); |
| pBlock->m_bytes[6] = (uint8_t)(word3 >> 8); |
| pBlock->m_bytes[5] = (uint8_t)(word2); |
| pBlock->m_bytes[4] = (uint8_t)(word2 >> 8); |
| } |
| |
| // ---- EC1S block encoding/endpoint optimization |
| |
| constant uint8_t g_eval_dist_tables[8][256] = |
| { |
| // 99% threshold |
| { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, |
| { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, |
| { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, |
| { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, |
| { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,}, |
| { 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}, |
| { 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}, |
| { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,} |
| }; |
| |
| typedef struct etc1s_optimizer_solution_coordinates_tag |
| { |
| color_rgba m_unscaled_color; |
| uint32_t m_inten_table; |
| } etc1s_optimizer_solution_coordinates; |
| |
| color_rgba get_scaled_color(color_rgba unscaled_color) |
| { |
| int br, bg, bb; |
| |
| br = (unscaled_color.x >> 2) | (unscaled_color.x << 3); |
| bg = (unscaled_color.y >> 2) | (unscaled_color.y << 3); |
| bb = (unscaled_color.z >> 2) | (unscaled_color.z << 3); |
| |
| return (color_rgba)((uint8_t)br, (uint8_t)bg, (uint8_t)bb, 255); |
| } |
| |
| typedef struct etc1s_optimizer_potential_solution_tag |
| { |
| uint64_t m_error; |
| etc1s_optimizer_solution_coordinates m_coords; |
| |
| uint8_t m_selectors[16]; |
| bool m_valid; |
| } etc1s_optimizer_potential_solution; |
| |
| typedef struct etc1s_optimizer_state_tag |
| { |
| int m_br, m_bg, m_bb; |
| float3 m_avg_color; |
| int m_max_comp_spread; |
| etc1s_optimizer_potential_solution m_best_solution; |
| } etc1s_optimizer_state; |
| |
| bool etc1s_optimizer_evaluate_solution( |
| etc1s_optimizer_state *pState, |
| const global encode_etc1s_param_struct *pParams, |
| uint64_t num_pixels, const global color_rgba *pPixels, |
| const global uint32_t *pWeights, |
| etc1s_optimizer_solution_coordinates coords, |
| etc1s_optimizer_potential_solution* pTrial_solution, |
| etc1s_optimizer_potential_solution* pBest_solution) |
| { |
| uint8_t temp_selectors[16]; |
| |
| pTrial_solution->m_valid = false; |
| |
| const color_rgba base_color = get_scaled_color(coords.m_unscaled_color); |
| |
| pTrial_solution->m_error = INT64_MAX; |
| |
| for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) |
| { |
| // TODO: This check is equivalent to medium quality in the C++ version. |
| if (!g_eval_dist_tables[inten_table][pState->m_max_comp_spread]) |
| continue; |
| |
| constant int* pInten_table = g_etc1_inten_tables[inten_table]; |
| |
| color_rgba block_colors[4]; |
| for (uint32_t s = 0; s < 4; s++) |
| { |
| int yd = pInten_table[s]; |
| block_colors[s] = (color_rgba)(clamp255(base_color.x + yd), clamp255(base_color.y + yd), clamp255(base_color.z + yd), 255); |
| } |
| |
| uint64_t total_error = 0; |
| |
| for (uint64_t c = 0; c < num_pixels; c++) |
| { |
| color_rgba src_pixel = pPixels[c]; |
| |
| uint32_t best_selector_index = 3; |
| uint32_t best_error = color_distance(pParams->m_perceptual, src_pixel, block_colors[0], false); |
| |
| uint32_t trial_error = color_distance(pParams->m_perceptual, src_pixel, block_colors[1], false); |
| if (trial_error < best_error) |
| { |
| best_error = trial_error; |
| best_selector_index = 2; |
| } |
| |
| trial_error = color_distance(pParams->m_perceptual, src_pixel, block_colors[2], false); |
| if (trial_error < best_error) |
| { |
| best_error = trial_error; |
| best_selector_index = 0; |
| } |
| |
| trial_error = color_distance(pParams->m_perceptual, src_pixel, block_colors[3], false); |
| if (trial_error < best_error) |
| { |
| best_error = trial_error; |
| best_selector_index = 1; |
| } |
| |
| if (num_pixels <= 16) |
| temp_selectors[c] = (uint8_t)(best_selector_index); |
| |
| total_error += pWeights ? (best_error * (uint64_t)pWeights[c]) : best_error; |
| |
| if (total_error >= pTrial_solution->m_error) |
| break; |
| } |
| |
| if (total_error < pTrial_solution->m_error) |
| { |
| pTrial_solution->m_error = total_error; |
| pTrial_solution->m_coords.m_inten_table = inten_table; |
| if (num_pixels <= 16) |
| { |
| for (uint32_t i = 0; i < num_pixels; i++) |
| pTrial_solution->m_selectors[i] = temp_selectors[i]; |
| } |
| pTrial_solution->m_valid = true; |
| } |
| } |
| pTrial_solution->m_coords.m_unscaled_color = coords.m_unscaled_color; |
| |
| bool success = false; |
| if (pBest_solution) |
| { |
| if (pTrial_solution->m_error < pBest_solution->m_error) |
| { |
| *pBest_solution = *pTrial_solution; |
| success = true; |
| } |
| } |
| |
| return success; |
| } |
| |
| void etc1s_optimizer_init( |
| etc1s_optimizer_state *pState, |
| const global encode_etc1s_param_struct *pParams, |
| uint64_t num_pixels, const global color_rgba *pPixels, |
| const global uint32_t *pWeights) |
| { |
| const int LIMIT = 31; |
| |
| color_rgba min_color = 255; |
| color_rgba max_color = 0; |
| uint64_t total_weight = 0; |
| uint64_t sum_r = 0, sum_g = 0, sum_b = 0; |
| |
| for (uint64_t i = 0; i < num_pixels; i++) |
| { |
| const color_rgba c = pPixels[i]; |
| |
| min_color = min(min_color, c); |
| max_color = max(max_color, c); |
| |
| if (pWeights) |
| { |
| uint64_t weight = pWeights[i]; |
| |
| sum_r += weight * c.x; |
| sum_g += weight * c.y; |
| sum_b += weight * c.z; |
| |
| total_weight += weight; |
| } |
| else |
| { |
| sum_r += c.x; |
| sum_g += c.y; |
| sum_b += c.z; |
| |
| total_weight++; |
| } |
| } |
| |
| float3 avg_color; |
| avg_color.x = (float)sum_r / total_weight; |
| avg_color.y = (float)sum_g / total_weight; |
| avg_color.z = (float)sum_b / total_weight; |
| |
| pState->m_avg_color = avg_color; |
| pState->m_max_comp_spread = max(max((int)max_color.x - (int)min_color.x, (int)max_color.y - (int)min_color.y), (int)max_color.z - (int)min_color.z); |
| |
| // TODO: The rounding here could be improved, like with DXT1/BC1. |
| pState->m_br = clamp((int)(avg_color.x * (LIMIT / 255.0f) + .5f), 0, LIMIT); |
| pState->m_bg = clamp((int)(avg_color.y * (LIMIT / 255.0f) + .5f), 0, LIMIT); |
| pState->m_bb = clamp((int)(avg_color.z * (LIMIT / 255.0f) + .5f), 0, LIMIT); |
| |
| pState->m_best_solution.m_valid = false; |
| pState->m_best_solution.m_error = UINT64_MAX; |
| } |
| |
| void etc1s_optimizer_internal_cluster_fit( |
| uint32_t total_perms_to_try, |
| etc1s_optimizer_state *pState, |
| const global encode_etc1s_param_struct *pParams, |
| uint64_t num_pixels, const global color_rgba *pPixels, |
| const global uint32_t *pWeights) |
| { |
| const int LIMIT = 31; |
| |
| etc1s_optimizer_potential_solution trial_solution; |
| |
| etc1s_optimizer_solution_coordinates cur_coords; |
| cur_coords.m_unscaled_color = (color_rgba)(pState->m_br, pState->m_bg, pState->m_bb, 255); |
| etc1s_optimizer_evaluate_solution(pState, pParams, num_pixels, pPixels, pWeights, cur_coords, &trial_solution, &pState->m_best_solution); |
| |
| if (pState->m_best_solution.m_error == 0) |
| return; |
| |
| for (uint32_t i = 0; i < total_perms_to_try; i++) |
| { |
| int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; |
| |
| constant int *pInten_table = g_etc1_inten_tables[pState->m_best_solution.m_coords.m_inten_table]; |
| const color_rgba base_color = get_scaled_color(pState->m_best_solution.m_coords.m_unscaled_color); |
| |
| constant uint8_t *pNum_selectors = g_cluster_fit_order_tab[i].m_v; |
| |
| for (uint32_t q = 0; q < 4; q++) |
| { |
| const int yd_temp = pInten_table[q]; |
| |
| delta_sum_r += pNum_selectors[q] * (clamp(base_color.x + yd_temp, 0, 255) - base_color.x); |
| delta_sum_g += pNum_selectors[q] * (clamp(base_color.y + yd_temp, 0, 255) - base_color.y); |
| delta_sum_b += pNum_selectors[q] * (clamp(base_color.z + yd_temp, 0, 255) - base_color.z); |
| } |
| |
| if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) |
| continue; |
| |
| const float avg_delta_r_f = (float)(delta_sum_r) / 8; |
| const float avg_delta_g_f = (float)(delta_sum_g) / 8; |
| const float avg_delta_b_f = (float)(delta_sum_b) / 8; |
| |
| const int br1 = clamp((int)((pState->m_avg_color.x - avg_delta_r_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT); |
| const int bg1 = clamp((int)((pState->m_avg_color.y - avg_delta_g_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT); |
| const int bb1 = clamp((int)((pState->m_avg_color.z - avg_delta_b_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT); |
| |
| cur_coords.m_unscaled_color = (color_rgba)(br1, bg1, bb1, 255); |
| |
| etc1s_optimizer_evaluate_solution(pState, pParams, num_pixels, pPixels, pWeights, cur_coords, &trial_solution, &pState->m_best_solution); |
| |
| if (pState->m_best_solution.m_error == 0) |
| break; |
| } |
| } |
| |
| // Encode an ETC1S block given a 4x4 pixel block. |
| kernel void encode_etc1s_blocks( |
| const global encode_etc1s_param_struct *pParams, |
| const global pixel_block *pInput_blocks, |
| global etc_block *pOutput_blocks) |
| { |
| const uint32_t block_index = get_global_id(0); |
| |
| const global pixel_block *pInput_block = &pInput_blocks[block_index]; |
| |
| etc1s_optimizer_state state; |
| etc1s_optimizer_init(&state, pParams, 16, pInput_block->m_pixels, NULL); |
| etc1s_optimizer_internal_cluster_fit(pParams->m_total_perms, &state, pParams, 16, pInput_block->m_pixels, NULL); |
| |
| etc_block blk; |
| etc_block_set_flip_bit(&blk, true); |
| etc_block_set_block_color5_etc1s(&blk, state.m_best_solution.m_coords.m_unscaled_color); |
| etc_block_set_inten_tables_etc1s(&blk, state.m_best_solution.m_coords.m_inten_table); |
| etc_block_pack_raw_selectors(&blk, state.m_best_solution.m_selectors); |
| |
| pOutput_blocks[block_index] = blk; |
| } |
| |
| typedef struct __attribute__ ((packed)) pixel_cluster_tag |
| { |
| uint64_t m_total_pixels; |
| uint64_t m_first_pixel_index; |
| } pixel_cluster; |
| |
| // Determine the optimal ETC1S color5/intensity given an arbitrary large array of 4x4 input pixel blocks. |
| kernel void encode_etc1s_from_pixel_cluster( |
| const global encode_etc1s_param_struct *pParams, |
| const global pixel_cluster *pInput_pixel_clusters, |
| const global color_rgba *pInput_pixels, |
| const global uint32_t *pInput_weights, |
| global etc_block *pOutput_blocks) |
| { |
| const uint32_t cluster_index = get_global_id(0); |
| |
| const global pixel_cluster *pInput_cluster = &pInput_pixel_clusters[cluster_index]; |
| |
| uint64_t total_pixels = pInput_cluster->m_total_pixels; |
| const global color_rgba *pPixels = pInput_pixels + pInput_cluster->m_first_pixel_index; |
| const global uint32_t *pWeights = pInput_weights + pInput_cluster->m_first_pixel_index; |
| |
| etc1s_optimizer_state state; |
| etc1s_optimizer_init(&state, pParams, total_pixels, pPixels, pWeights); |
| etc1s_optimizer_internal_cluster_fit(pParams->m_total_perms, &state, pParams, total_pixels, pPixels, pWeights); |
| |
| etc_block blk; |
| etc_block_set_flip_bit(&blk, true); |
| etc_block_set_block_color5_etc1s(&blk, state.m_best_solution.m_coords.m_unscaled_color); |
| etc_block_set_inten_tables_etc1s(&blk, state.m_best_solution.m_coords.m_inten_table); |
| |
| pOutput_blocks[cluster_index] = blk; |
| } |
| |
| // ---- refine_endpoint_clusterization |
| typedef struct __attribute__ ((packed)) rec_block_struct_tag |
| { |
| uint16_t m_first_cluster_ofs; |
| uint16_t m_num_clusters; |
| uint16_t m_cur_cluster_index; |
| uint8_t m_cur_cluster_etc_inten; |
| } rec_block_struct; |
| |
| typedef struct __attribute__ ((packed)) rec_endpoint_cluster_struct_tag |
| { |
| color_rgba m_unscaled_color; |
| uint8_t m_etc_inten; |
| uint16_t m_cluster_index; |
| } rec_endpoint_cluster_struct; |
| |
| typedef struct __attribute__ ((packed)) rec_param_struct_tag |
| { |
| uint32_t m_total_blocks; |
| int m_perceptual; |
| } rec_param_struct; |
| |
| // For each input block: find the best endpoint cluster that encodes it. |
| kernel void refine_endpoint_clusterization( |
| const rec_param_struct params, |
| const global pixel_block *pInput_blocks, |
| const global rec_block_struct *pInput_block_info, |
| const global rec_endpoint_cluster_struct *pInput_clusters, |
| const global uint32_t *pSorted_block_indices, |
| global uint32_t *pOutput_indices) |
| { |
| const uint32_t sorted_block_index = get_global_id(0); |
| const uint32_t block_index = pSorted_block_indices[sorted_block_index]; |
| const int perceptual = params.m_perceptual; |
| |
| const global pixel_block *pInput_block = &pInput_blocks[block_index]; |
| |
| pixel_block priv_pixel_block; |
| priv_pixel_block = *pInput_block; |
| |
| const uint32_t first_cluster_ofs = pInput_block_info[block_index].m_first_cluster_ofs; |
| const uint32_t num_clusters = pInput_block_info[block_index].m_num_clusters; |
| const uint32_t cur_block_cluster_index = pInput_block_info[block_index].m_cur_cluster_index; |
| const uint32_t cur_block_cluster_etc_inten = pInput_block_info[block_index].m_cur_cluster_etc_inten; |
| |
| uint64_t overall_best_err = UINT64_MAX; |
| uint32_t best_cluster_index = 0; |
| |
| for (uint32_t i = 0; i < num_clusters; i++) |
| { |
| const uint32_t cluster_index = first_cluster_ofs + i; |
| color_rgba unscaled_color = pInput_clusters[cluster_index].m_unscaled_color; |
| const uint8_t etc_inten = pInput_clusters[cluster_index].m_etc_inten; |
| const uint16_t orig_cluster_index = pInput_clusters[cluster_index].m_cluster_index; |
| |
| if (etc_inten > cur_block_cluster_etc_inten) |
| continue; |
| |
| color_rgba block_colors[4]; |
| get_block_colors5(block_colors, &unscaled_color, etc_inten, false); |
| |
| uint64_t total_error = 0; |
| |
| for (uint32_t c = 0; c < 16; c++) |
| { |
| color_rgba src_pixel = priv_pixel_block.m_pixels[c]; |
| |
| uint32_t best_error = color_distance(perceptual, src_pixel, block_colors[0], false); |
| |
| uint32_t trial_error = color_distance(perceptual, src_pixel, block_colors[1], false); |
| if (trial_error < best_error) |
| best_error = trial_error; |
| |
| trial_error = color_distance(perceptual, src_pixel, block_colors[2], false); |
| if (trial_error < best_error) |
| best_error = trial_error; |
| |
| trial_error = color_distance(perceptual, src_pixel, block_colors[3], false); |
| if (trial_error < best_error) |
| best_error = trial_error; |
| |
| total_error += best_error; |
| } |
| |
| if ( (total_error < overall_best_err) || |
| ((orig_cluster_index == cur_block_cluster_index) && (total_error == overall_best_err)) |
| ) |
| { |
| overall_best_err = total_error; |
| best_cluster_index = orig_cluster_index; |
| if (!overall_best_err) |
| break; |
| } |
| } |
| |
| pOutput_indices[block_index] = best_cluster_index; |
| } |
| |
| // ---- find_optimal_selector_clusters_for_each_block |
| |
| typedef struct __attribute__ ((packed)) fosc_selector_struct_tag |
| { |
| uint32_t m_packed_selectors; // 4x4 grid of 2-bit selectors |
| } fosc_selector_struct; |
| |
| typedef struct __attribute__ ((packed)) fosc_block_struct_tag |
| { |
| color_rgba m_etc_color5_inten; // unscaled 5-bit block color in RGB, alpha has block's intensity index |
| uint32_t m_first_selector; // offset into selector table |
| uint32_t m_num_selectors; // number of selectors to check |
| } fosc_block_struct; |
| |
| typedef struct __attribute__ ((packed)) fosc_param_struct_tag |
| { |
| uint32_t m_total_blocks; |
| int m_perceptual; |
| } fosc_param_struct; |
| |
| // For each input block: Find the quantized selector which results in the lowest error. |
| kernel void find_optimal_selector_clusters_for_each_block( |
| const fosc_param_struct params, |
| const global pixel_block *pInput_blocks, |
| const global fosc_block_struct *pInput_block_info, |
| const global fosc_selector_struct *pInput_selectors, |
| const global uint32_t *pSelector_cluster_indices, |
| global uint32_t *pOutput_selector_cluster_indices) |
| { |
| const uint32_t block_index = get_global_id(0); |
| |
| const global color_rgba *pBlock_pixels = pInput_blocks[block_index].m_pixels; |
| const global fosc_block_struct *pBlock_info = &pInput_block_info[block_index]; |
| |
| const global fosc_selector_struct *pSelectors = &pInput_selectors[pBlock_info->m_first_selector]; |
| const uint32_t num_selectors = pBlock_info->m_num_selectors; |
| |
| color_rgba trial_block_colors[4]; |
| color_rgba etc_color5_inten = pBlock_info->m_etc_color5_inten; |
| get_block_colors5(trial_block_colors, &etc_color5_inten, etc_color5_inten.w, false); |
| |
| uint32_t trial_errors[4][16]; |
| |
| if (params.m_perceptual) |
| { |
| for (uint32_t sel = 0; sel < 4; ++sel) |
| for (uint32_t i = 0; i < 16; ++i) |
| trial_errors[sel][i] = color_distance(true, pBlock_pixels[i], trial_block_colors[sel], false); |
| } |
| else |
| { |
| for (uint32_t sel = 0; sel < 4; ++sel) |
| for (uint32_t i = 0; i < 16; ++i) |
| trial_errors[sel][i] = color_distance(false, pBlock_pixels[i], trial_block_colors[sel], false); |
| } |
| |
| uint64_t best_err = UINT64_MAX; |
| uint32_t best_index = 0; |
| |
| for (uint32_t sel_index = 0; sel_index < num_selectors; sel_index++) |
| { |
| uint32_t sels = pSelectors[sel_index].m_packed_selectors; |
| |
| uint64_t total_err = 0; |
| for (uint32_t i = 0; i < 16; i++, sels >>= 2) |
| total_err += trial_errors[sels & 3][i]; |
| |
| if (total_err < best_err) |
| { |
| best_err = total_err; |
| best_index = sel_index; |
| |
| if (!best_err) |
| break; |
| } |
| } |
| |
| pOutput_selector_cluster_indices[block_index] = pSelector_cluster_indices[pBlock_info->m_first_selector + best_index]; |
| } |
| |
| // determine_selectors |
| |
| typedef struct __attribute__ ((packed)) ds_param_struct_tag |
| { |
| uint32_t m_total_blocks; |
| int m_perceptual; |
| } ds_param_struct; |
| |
| // For each input block: Determine the ETC1S selectors that result in the lowest error, given each block's predetermined ETC1S color5/intensities. |
| kernel void determine_selectors( |
| const ds_param_struct params, |
| const global pixel_block *pInput_blocks, |
| const global color_rgba *pInput_etc_color5_and_inten, |
| global etc_block *pOutput_blocks) |
| { |
| const uint32_t block_index = get_global_id(0); |
| |
| const global color_rgba *pBlock_pixels = pInput_blocks[block_index].m_pixels; |
| |
| color_rgba etc_color5_inten = pInput_etc_color5_and_inten[block_index]; |
| |
| color_rgba block_colors[4]; |
| get_block_colors5(block_colors, &etc_color5_inten, etc_color5_inten.w, false); |
| |
| etc_block output_block; |
| etc_block_set_flip_bit(&output_block, true); |
| etc_block_set_block_color5_etc1s(&output_block, etc_color5_inten); |
| etc_block_set_inten_tables_etc1s(&output_block, etc_color5_inten.w); |
| |
| for (uint32_t i = 0; i < 16; i++) |
| { |
| color_rgba pixel_color = pBlock_pixels[i]; |
| |
| uint err0 = color_distance(params.m_perceptual, pixel_color, block_colors[0], false); |
| uint err1 = color_distance(params.m_perceptual, pixel_color, block_colors[1], false); |
| uint err2 = color_distance(params.m_perceptual, pixel_color, block_colors[2], false); |
| uint err3 = color_distance(params.m_perceptual, pixel_color, block_colors[3], false); |
| |
| uint best_err = min(min(min(err0, err1), err2), err3); |
| |
| uint32_t best_sel = (best_err == err2) ? 2 : 3; |
| best_sel = (best_err == err1) ? 1 : best_sel; |
| best_sel = (best_err == err0) ? 0 : best_sel; |
| |
| etc_block_set_selector(&output_block, i & 3, i >> 2, best_sel); |
| } |
| |
| pOutput_blocks[block_index] = output_block; |
| } |
| |