blob: 6eda24f1b847034dd3f0a38c7cfb550e867ce869 [file] [log] [blame]
//#define _DEBUG
#ifndef NULL
#define NULL 0L
typedef char int8_t;
typedef uchar uint8_t;
typedef short int16_t;
typedef ushort uint16_t;
typedef int int32_t;
typedef uint uint32_t;
typedef long int64_t;
typedef ulong uint64_t;
typedef uchar4 color_rgba;
#define INT64_MAX LONG_MAX
int squarei(int a) { return a * a; }
#ifdef _DEBUG
inline void internal_assert(bool x, constant char *pMsg, int line)
if (!x)
printf("assert() failed on line %i: %s\n", line, pMsg);
#define assert(x) internal_assert(x, #x, __LINE__)
#define assert(x)
inline uint8_t clamp255(int x)
return clamp(x, 0, 255);
inline uint8_t clamp255_flag(int x, bool *pDid_clamp)
if (x < 0)
*pDid_clamp = true;
return 0;
else if (x > 255)
*pDid_clamp = true;
return 255;
return (uint8_t)(x);
typedef struct __attribute__ ((packed)) encode_etc1s_param_struct_tag
uint32_t m_total_blocks;
int m_perceptual;
int m_total_perms;
} encode_etc1s_param_struct;
typedef struct __attribute__ ((packed)) pixel_block_tag
color_rgba m_pixels[16]; // [y*4+x]
} pixel_block;
uint color_distance(bool perceptual, color_rgba e1, color_rgba e2, bool alpha)
if (perceptual)
#if 0
float3 delta_rgb = (float3)(e1.x - e2.x, e1.y - e2.y, e1.z - e2.z);
float3 delta_ycbcr;
delta_ycbcr.x = dot(delta_rgb, (float3)(.2126f, .7152f, .0722f)); // y
delta_ycbcr.y = delta_rgb.x - delta_ycbcr.x; // cr
delta_ycbcr.z = delta_rgb.z - delta_ycbcr.x; // cb
delta_ycbcr *= delta_ycbcr;
float d = dot(delta_ycbcr, (float3)(1.0f, 0.203125f, 0.0234375f));
if (alpha)
int delta_a = e1.w - e2.w;
d += delta_a * delta_a;
d = clamp(d * 256.0f + .5f, 0.0f, (float)UINT32_MAX);
return (uint)(d);
// This matches the CPU code, which is useful for testing.
int dr = e1.x - e2.x;
int dg = e1.y - e2.y;
int db = e1.z - e2.z;
int delta_l = dr * 27 + dg * 92 + db * 9;
int delta_cr = dr * 128 - delta_l;
int delta_cb = db * 128 - delta_l;
uint id = ((uint)(delta_l * delta_l) >> 7U) +
((((uint)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) +
((((uint)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U);
if (alpha)
int da = (e1.w - e2.w) << 7;
id += ((uint)(da * da) >> 7U);
return id;
else if (alpha)
int dr = e1.x - e2.x;
int dg = e1.y - e2.y;
int db = e1.z - e2.z;
int da = e1.w - e2.w;
return dr * dr + dg * dg + db * db + da * da;
int dr = e1.x - e2.x;
int dg = e1.y - e2.y;
int db = e1.z - e2.z;
return dr * dr + dg * dg + db * db;
typedef struct __attribute__ ((packed)) etc_block_tag
// big endian uint64:
// bit ofs: 56 48 40 32 24 16 8 0
// byte ofs: b0, b1, b2, b3, b4, b5, b6, b7
uint64_t m_uint64;
uint8_t m_bytes[8];
} etc_block;
enum etc_constants
cETC1BytesPerBlock = 8U,
cETC1SelectorBits = 2U,
cETC1SelectorValues = 1U << cETC1SelectorBits,
cETC1SelectorMask = cETC1SelectorValues - 1U,
cETC1BlockShift = 2U,
cETC1BlockSize = 1U << cETC1BlockShift,
cETC1LSBSelectorIndicesBitOffset = 0,
cETC1MSBSelectorIndicesBitOffset = 16,
cETC1FlipBitOffset = 32,
cETC1DiffBitOffset = 33,
cETC1IntenModifierNumBits = 3,
cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
cETC1RightIntenModifierTableBitOffset = 34,
cETC1LeftIntenModifierTableBitOffset = 37,
// Base+Delta encoding (5 bit bases, 3 bit delta)
cETC1BaseColorCompNumBits = 5,
cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
cETC1DeltaColorCompNumBits = 3,
cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
cETC1BaseColor5RBitOffset = 59,
cETC1BaseColor5GBitOffset = 51,
cETC1BaseColor5BBitOffset = 43,
cETC1DeltaColor3RBitOffset = 56,
cETC1DeltaColor3GBitOffset = 48,
cETC1DeltaColor3BBitOffset = 40,
// Absolute (non-delta) encoding (two 4-bit per component bases)
cETC1AbsColorCompNumBits = 4,
cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
cETC1AbsColor4R1BitOffset = 60,
cETC1AbsColor4G1BitOffset = 52,
cETC1AbsColor4B1BitOffset = 44,
cETC1AbsColor4R2BitOffset = 56,
cETC1AbsColor4G2BitOffset = 48,
cETC1AbsColor4B2BitOffset = 40,
cETC1ColorDeltaMin = -4,
cETC1ColorDeltaMax = 3,
// Delta3:
// 0 1 2 3 4 5 6 7
// 000 001 010 011 100 101 110 111
// 0 1 2 3 -4 -3 -2 -1
constant struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] =
{ { 0, 0, 0, 8 } },{ { 0, 5, 2, 1 } },{ { 0, 6, 1, 1 } },{ { 0, 7, 0, 1 } },{ { 0, 7, 1, 0 } },
{ { 0, 0, 8, 0 } },{ { 0, 0, 3, 5 } },{ { 0, 1, 7, 0 } },{ { 0, 0, 4, 4 } },{ { 0, 0, 2, 6 } },
{ { 0, 0, 7, 1 } },{ { 0, 0, 1, 7 } },{ { 0, 0, 5, 3 } },{ { 1, 6, 0, 1 } },{ { 0, 0, 6, 2 } },
{ { 0, 2, 6, 0 } },{ { 2, 4, 2, 0 } },{ { 0, 3, 5, 0 } },{ { 3, 3, 1, 1 } },{ { 4, 2, 0, 2 } },
{ { 1, 5, 2, 0 } },{ { 0, 5, 3, 0 } },{ { 0, 6, 2, 0 } },{ { 2, 4, 1, 1 } },{ { 5, 1, 0, 2 } },
{ { 6, 1, 1, 0 } },{ { 3, 3, 0, 2 } },{ { 6, 0, 0, 2 } },{ { 0, 8, 0, 0 } },{ { 6, 1, 0, 1 } },
{ { 0, 1, 6, 1 } },{ { 1, 6, 1, 0 } },{ { 4, 1, 3, 0 } },{ { 0, 2, 5, 1 } },{ { 5, 0, 3, 0 } },
{ { 5, 3, 0, 0 } },{ { 0, 1, 5, 2 } },{ { 0, 3, 4, 1 } },{ { 2, 5, 1, 0 } },{ { 1, 7, 0, 0 } },
{ { 0, 1, 4, 3 } },{ { 6, 0, 2, 0 } },{ { 0, 4, 4, 0 } },{ { 2, 6, 0, 0 } },{ { 0, 2, 4, 2 } },
{ { 0, 5, 1, 2 } },{ { 0, 6, 0, 2 } },{ { 3, 5, 0, 0 } },{ { 0, 4, 3, 1 } },{ { 3, 4, 1, 0 } },
{ { 4, 3, 1, 0 } },{ { 1, 5, 0, 2 } },{ { 0, 3, 3, 2 } },{ { 1, 4, 1, 2 } },{ { 0, 4, 2, 2 } },
{ { 2, 3, 3, 0 } },{ { 4, 4, 0, 0 } },{ { 1, 2, 4, 1 } },{ { 0, 5, 0, 3 } },{ { 0, 1, 3, 4 } },
{ { 1, 5, 1, 1 } },{ { 1, 4, 2, 1 } },{ { 1, 3, 2, 2 } },{ { 5, 2, 1, 0 } },{ { 1, 3, 3, 1 } },
{ { 0, 1, 2, 5 } },{ { 1, 1, 5, 1 } },{ { 0, 3, 2, 3 } },{ { 2, 5, 0, 1 } },{ { 3, 2, 2, 1 } },
{ { 2, 3, 0, 3 } },{ { 1, 4, 3, 0 } },{ { 2, 2, 1, 3 } },{ { 6, 2, 0, 0 } },{ { 1, 0, 6, 1 } },
{ { 3, 3, 2, 0 } },{ { 7, 1, 0, 0 } },{ { 3, 1, 4, 0 } },{ { 0, 2, 3, 3 } },{ { 0, 4, 1, 3 } },
{ { 0, 4, 0, 4 } },{ { 0, 1, 0, 7 } },{ { 2, 0, 5, 1 } },{ { 2, 0, 4, 2 } },{ { 3, 0, 2, 3 } },
{ { 2, 2, 4, 0 } },{ { 2, 2, 3, 1 } },{ { 4, 0, 3, 1 } },{ { 3, 2, 3, 0 } },{ { 2, 3, 2, 1 } },
{ { 1, 3, 4, 0 } },{ { 7, 0, 1, 0 } },{ { 3, 0, 4, 1 } },{ { 1, 0, 5, 2 } },{ { 8, 0, 0, 0 } },
{ { 3, 0, 1, 4 } },{ { 4, 1, 1, 2 } },{ { 4, 0, 2, 2 } },{ { 1, 2, 5, 0 } },{ { 4, 2, 1, 1 } },
{ { 3, 4, 0, 1 } },{ { 2, 0, 3, 3 } },{ { 5, 0, 1, 2 } },{ { 5, 0, 0, 3 } },{ { 2, 4, 0, 2 } },
{ { 2, 1, 4, 1 } },{ { 4, 0, 1, 3 } },{ { 2, 1, 5, 0 } },{ { 4, 2, 2, 0 } },{ { 4, 0, 4, 0 } },
{ { 1, 0, 4, 3 } },{ { 1, 4, 0, 3 } },{ { 3, 0, 3, 2 } },{ { 4, 3, 0, 1 } },{ { 0, 1, 1, 6 } },
{ { 1, 3, 1, 3 } },{ { 0, 2, 2, 4 } },{ { 2, 0, 2, 4 } },{ { 5, 1, 1, 1 } },{ { 3, 0, 5, 0 } },
{ { 2, 3, 1, 2 } },{ { 3, 0, 0, 5 } },{ { 0, 3, 1, 4 } },{ { 5, 0, 2, 1 } },{ { 2, 1, 3, 2 } },
{ { 2, 0, 6, 0 } },{ { 3, 1, 3, 1 } },{ { 5, 1, 2, 0 } },{ { 1, 0, 3, 4 } },{ { 1, 1, 6, 0 } },
{ { 4, 0, 0, 4 } },{ { 2, 0, 1, 5 } },{ { 0, 3, 0, 5 } },{ { 1, 3, 0, 4 } },{ { 4, 1, 2, 1 } },
{ { 1, 2, 3, 2 } },{ { 3, 1, 0, 4 } },{ { 5, 2, 0, 1 } },{ { 1, 2, 2, 3 } },{ { 3, 2, 1, 2 } },
{ { 2, 2, 2, 2 } },{ { 6, 0, 1, 1 } },{ { 1, 2, 1, 4 } },{ { 1, 1, 4, 2 } },{ { 3, 2, 0, 3 } },
{ { 1, 2, 0, 5 } },{ { 1, 0, 7, 0 } },{ { 3, 1, 2, 2 } },{ { 1, 0, 2, 5 } },{ { 2, 0, 0, 6 } },
{ { 2, 1, 1, 4 } },{ { 2, 2, 0, 4 } },{ { 1, 1, 3, 3 } },{ { 7, 0, 0, 1 } },{ { 1, 0, 0, 7 } },
{ { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } },
{ { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } }
constant int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] =
{ -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 },
{ -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 }
constant uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
constant uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
uint32_t etc_block_get_byte_bits(const etc_block *p, uint32_t ofs, uint32_t num)
assert((ofs + num) <= 64U);
assert(num && (num <= 8U));
assert((ofs >> 3) == ((ofs + num - 1) >> 3));
const uint32_t byte_ofs = 7 - (ofs >> 3);
const uint32_t byte_bit_ofs = ofs & 7;
return (p->m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
void etc_block_set_byte_bits(etc_block *p, uint32_t ofs, uint32_t num, uint32_t bits)
assert((ofs + num) <= 64U);
assert(num && (num < 32U));
assert((ofs >> 3) == ((ofs + num - 1) >> 3));
assert(bits < (1U << num));
const uint32_t byte_ofs = 7 - (ofs >> 3);
const uint32_t byte_bit_ofs = ofs & 7;
const uint32_t mask = (1 << num) - 1;
p->m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
p->m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
bool etc_block_get_flip_bit(const etc_block *p)
return (p->m_bytes[3] & 1) != 0;
void etc_block_set_flip_bit(etc_block *p, bool flip)
p->m_bytes[3] &= ~1;
p->m_bytes[3] |= (uint8_t)(flip);
bool etc_block_get_diff_bit(const etc_block *p)
return (p->m_bytes[3] & 2) != 0;
void etc_block_set_diff_bit(etc_block *p, bool diff)
p->m_bytes[3] &= ~2;
p->m_bytes[3] |= ((uint32_t)(diff) << 1);
// Returns intensity modifier table (0-7) used by subblock subblock_id.
// subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2)
uint32_t etc_block_get_inten_table(const etc_block *p, uint32_t subblock_id)
assert(subblock_id < 2);
const uint32_t ofs = subblock_id ? 2 : 5;
return (p->m_bytes[3] >> ofs) & 7;
// Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
void etc_block_set_inten_table(etc_block *p, uint32_t subblock_id, uint32_t t)
assert(subblock_id < 2);
assert(t < 8);
const uint32_t ofs = subblock_id ? 2 : 5;
p->m_bytes[3] &= ~(7 << ofs);
p->m_bytes[3] |= (t << ofs);
void etc_block_set_inten_tables_etc1s(etc_block *p, uint32_t t)
etc_block_set_inten_table(p, 0, t);
etc_block_set_inten_table(p, 1, t);
uint32_t etc_block_get_raw_selector(const etc_block *pBlock, uint32_t x, uint32_t y)
assert((x | y) < 4);
const uint32_t bit_index = x * 4 + y;
const uint32_t byte_bit_ofs = bit_index & 7;
const uint8_t *p = &pBlock->m_bytes[7 - (bit_index >> 3)];
const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1;
const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1;
const uint32_t val = lsb | (msb << 1);
return val;
// Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
uint32_t etc_block_get_selector(const etc_block *pBlock, uint32_t x, uint32_t y)
return g_etc1_to_selector_index[etc_block_get_raw_selector(pBlock, x, y)];
// Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
void etc_block_set_selector(etc_block *pBlock, uint32_t x, uint32_t y, uint32_t val)
assert((x | y | val) < 4);
const uint32_t bit_index = x * 4 + y;
uint8_t *p = &pBlock->m_bytes[7 - (bit_index >> 3)];
const uint32_t byte_bit_ofs = bit_index & 7;
const uint32_t mask = 1 << byte_bit_ofs;
const uint32_t etc1_val = g_selector_index_to_etc1[val];
const uint32_t lsb = etc1_val & 1;
const uint32_t msb = etc1_val >> 1;
p[0] &= ~mask;
p[0] |= (lsb << byte_bit_ofs);
p[-2] &= ~mask;
p[-2] |= (msb << byte_bit_ofs);
void etc_block_set_base4_color(etc_block *pBlock, uint32_t idx, uint16_t c)
if (idx)
etc_block_set_byte_bits(pBlock, cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
etc_block_set_byte_bits(pBlock, cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
etc_block_set_byte_bits(pBlock, cETC1AbsColor4B2BitOffset, 4, c & 15);
etc_block_set_byte_bits(pBlock, cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
etc_block_set_byte_bits(pBlock, cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
etc_block_set_byte_bits(pBlock, cETC1AbsColor4B1BitOffset, 4, c & 15);
uint16_t etc_block_get_base4_color(const etc_block *pBlock, uint32_t idx)
uint32_t r, g, b;
if (idx)
r = etc_block_get_byte_bits(pBlock, cETC1AbsColor4R2BitOffset, 4);
g = etc_block_get_byte_bits(pBlock, cETC1AbsColor4G2BitOffset, 4);
b = etc_block_get_byte_bits(pBlock, cETC1AbsColor4B2BitOffset, 4);
r = etc_block_get_byte_bits(pBlock, cETC1AbsColor4R1BitOffset, 4);
g = etc_block_get_byte_bits(pBlock, cETC1AbsColor4G1BitOffset, 4);
b = etc_block_get_byte_bits(pBlock, cETC1AbsColor4B1BitOffset, 4);
return (uint16_t)(b | (g << 4U) | (r << 8U));
void etc_block_set_base5_color(etc_block *pBlock, uint16_t c)
etc_block_set_byte_bits(pBlock, cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
etc_block_set_byte_bits(pBlock, cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
etc_block_set_byte_bits(pBlock, cETC1BaseColor5BBitOffset, 5, c & 31);
uint16_t etc_block_get_base5_color(const etc_block *pBlock)
const uint32_t r = etc_block_get_byte_bits(pBlock, cETC1BaseColor5RBitOffset, 5);
const uint32_t g = etc_block_get_byte_bits(pBlock, cETC1BaseColor5GBitOffset, 5);
const uint32_t b = etc_block_get_byte_bits(pBlock, cETC1BaseColor5BBitOffset, 5);
return (uint16_t)(b | (g << 5U) | (r << 10U));
void etc_block_set_delta3_color(etc_block *pBlock, uint16_t c)
etc_block_set_byte_bits(pBlock, cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
etc_block_set_byte_bits(pBlock, cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
etc_block_set_byte_bits(pBlock, cETC1DeltaColor3BBitOffset, 3, c & 7);
uint16_t etc_block_get_delta3_color(const etc_block *pBlock)
const uint32_t r = etc_block_get_byte_bits(pBlock, cETC1DeltaColor3RBitOffset, 3);
const uint32_t g = etc_block_get_byte_bits(pBlock, cETC1DeltaColor3GBitOffset, 3);
const uint32_t b = etc_block_get_byte_bits(pBlock, cETC1DeltaColor3BBitOffset, 3);
return (uint16_t)(b | (g << 3U) | (r << 6U));
void etc_block_unpack_delta3(int *pR, int *pG, int *pB, uint16_t packed_delta3)
int r = (packed_delta3 >> 6) & 7;
int g = (packed_delta3 >> 3) & 7;
int b = packed_delta3 & 7;
if (r >= 4) r -= 8;
if (g >= 4) g -= 8;
if (b >= 4) b -= 8;
*pR = r;
*pG = g;
*pB = b;
bool etc_block_unpack_color5_delta3(color_rgba *pResult, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha)
int dr, dg, db;
etc_block_unpack_delta3(&dr, &dg, &db, packed_delta3);
int b = (packed_color5 & 31U) + db;
int g = ((packed_color5 >> 5U) & 31U) + dg;
int r = ((packed_color5 >> 10U) & 31U) + dr;
bool success = true;
if ((uint32_t)(r | g | b) > 31U)
success = false;
r = clamp(r, 0, 31);
g = clamp(g, 0, 31);
b = clamp(b, 0, 31);
if (scaled)
b = (b << 3U) | (b >> 2U);
g = (g << 3U) | (g >> 2U);
r = (r << 3U) | (r >> 2U);
*pResult = (color_rgba)(r, g, b, min(alpha, 255U));
return success;
color_rgba etc_block_unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha)
uint32_t b = packed_color5 & 31U;
uint32_t g = (packed_color5 >> 5U) & 31U;
uint32_t r = (packed_color5 >> 10U) & 31U;
if (scaled)
b = (b << 3U) | (b >> 2U);
g = (g << 3U) | (g >> 2U);
r = (r << 3U) | (r >> 2U);
return (color_rgba)(r, g, b, min(alpha, 255U));
color_rgba etc_block_unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha)
uint32_t b = packed_color4 & 15U;
uint32_t g = (packed_color4 >> 4U) & 15U;
uint32_t r = (packed_color4 >> 8U) & 15U;
if (scaled)
b = (b << 4U) | b;
g = (g << 4U) | g;
r = (r << 4U) | r;
return (color_rgba)(r, g, b, min(alpha, 255U));
// false if didn't clamp, true if any component clamped
bool etc_block_get_block_colors(const etc_block *pBlock, color_rgba* pBlock_colors, uint32_t subblock_index)
color_rgba b;
if (etc_block_get_diff_bit(pBlock))
if (subblock_index)
etc_block_unpack_color5_delta3(&b, etc_block_get_base5_color(pBlock), etc_block_get_delta3_color(pBlock), true, 255);
b = etc_block_unpack_color5(etc_block_get_base5_color(pBlock), true, 255);
b = etc_block_unpack_color4(etc_block_get_base4_color(pBlock, subblock_index), true, 255);
constant int* pInten_table = g_etc1_inten_tables[etc_block_get_inten_table(pBlock, subblock_index)];
bool dc = false;
pBlock_colors[0] = (color_rgba)(clamp255_flag(b.x + pInten_table[0], &dc), clamp255_flag(b.y + pInten_table[0], &dc), clamp255_flag(b.z + pInten_table[0], &dc), 255);
pBlock_colors[1] = (color_rgba)(clamp255_flag(b.x + pInten_table[1], &dc), clamp255_flag(b.y + pInten_table[1], &dc), clamp255_flag(b.z + pInten_table[1], &dc), 255);
pBlock_colors[2] = (color_rgba)(clamp255_flag(b.x + pInten_table[2], &dc), clamp255_flag(b.y + pInten_table[2], &dc), clamp255_flag(b.z + pInten_table[2], &dc), 255);
pBlock_colors[3] = (color_rgba)(clamp255_flag(b.x + pInten_table[3], &dc), clamp255_flag(b.y + pInten_table[3], &dc), clamp255_flag(b.z + pInten_table[3], &dc), 255);
return dc;
void get_block_colors5(color_rgba *pBlock_colors, const color_rgba *pBase_color5, uint32_t inten_table, bool scaled /* false */)
color_rgba b = *pBase_color5;
if (!scaled)
b.x = (b.x << 3) | (b.x >> 2);
b.y = (b.y << 3) | (b.y >> 2);
b.z = (b.z << 3) | (b.z >> 2);
constant int* pInten_table = g_etc1_inten_tables[inten_table];
pBlock_colors[0] = (color_rgba)(clamp255(b.x + pInten_table[0]), clamp255(b.y + pInten_table[0]), clamp255(b.z + pInten_table[0]), 255);
pBlock_colors[1] = (color_rgba)(clamp255(b.x + pInten_table[1]), clamp255(b.y + pInten_table[1]), clamp255(b.z + pInten_table[1]), 255);
pBlock_colors[2] = (color_rgba)(clamp255(b.x + pInten_table[2]), clamp255(b.y + pInten_table[2]), clamp255(b.z + pInten_table[2]), 255);
pBlock_colors[3] = (color_rgba)(clamp255(b.x + pInten_table[3]), clamp255(b.y + pInten_table[3]), clamp255(b.z + pInten_table[3]), 255);
uint64_t etc_block_determine_selectors(etc_block *pBlock, const color_rgba* pSource_pixels, bool perceptual, uint32_t begin_subblock /*= 0*/, uint32_t end_subblock /*= 2*/)
uint64_t total_error = 0;
for (uint32_t subblock = begin_subblock; subblock < end_subblock; subblock++)
color_rgba block_colors[4];
etc_block_get_block_colors(pBlock, block_colors, subblock);
if (etc_block_get_flip_bit(pBlock))
for (uint32_t y = 0; y < 2; y++)
for (uint32_t x = 0; x < 4; x++)
uint32_t best_selector = 0;
uint64_t best_error = UINT64_MAX;
for (uint32_t s = 0; s < 4; s++)
uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[x + (subblock * 2 + y) * 4], false);
if (err < best_error)
best_error = err;
best_selector = s;
etc_block_set_selector(pBlock, x, subblock * 2 + y, best_selector);
total_error += best_error;
for (uint32_t y = 0; y < 4; y++)
for (uint32_t x = 0; x < 2; x++)
uint32_t best_selector = 0;
uint64_t best_error = UINT64_MAX;
for (uint32_t s = 0; s < 4; s++)
uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[(subblock * 2) + x + y * 4], false);
if (err < best_error)
best_error = err;
best_selector = s;
etc_block_set_selector(pBlock, subblock * 2 + x, y, best_selector);
total_error += best_error;
return total_error;
uint16_t etc_block_pack_color4_rgb(uint32_t r, uint32_t g, uint32_t b, bool scaled)
uint32_t bias = 127;
if (scaled)
r = (r * 15U + bias) / 255U;
g = (g * 15U + bias) / 255U;
b = (b * 15U + bias) / 255U;
r = min(r, 15U);
g = min(g, 15U);
b = min(b, 15U);
return (uint16_t)(b | (g << 4U) | (r << 8U));
uint16_t etc_block_pack_color4(color_rgba color, bool scaled)
uint32_t bias = 127;
return etc_block_pack_color4_rgb(color.x, color.y, color.z, scaled);
uint16_t etc_block_pack_delta3(int r, int g, int b)
assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
if (r < 0) r += 8;
if (g < 0) g += 8;
if (b < 0) b += 8;
return (uint16_t)(b | (g << 3) | (r << 6));
void etc_block_set_block_color4(etc_block *pBlock, color_rgba c0_unscaled, color_rgba c1_unscaled)
etc_block_set_diff_bit(pBlock, false);
etc_block_set_base4_color(pBlock, 0, etc_block_pack_color4(c0_unscaled, false));
etc_block_set_base4_color(pBlock, 1, etc_block_pack_color4(c1_unscaled, false));
uint16_t etc_block_pack_color5_rgb(uint32_t r, uint32_t g, uint32_t b, bool scaled)
uint32_t bias = 127;
if (scaled)
r = (r * 31U + bias) / 255U;
g = (g * 31U + bias) / 255U;
b = (b * 31U + bias) / 255U;
r = min(r, 31U);
g = min(g, 31U);
b = min(b, 31U);
return (uint16_t)(b | (g << 5U) | (r << 10U));
uint16_t etc_block_pack_color5(color_rgba c, bool scaled)
return etc_block_pack_color5_rgb(c.x, c.y, c.z, scaled);
void etc_block_set_block_color5(etc_block *pBlock, color_rgba c0_unscaled, color_rgba c1_unscaled)
etc_block_set_diff_bit(pBlock, true);
etc_block_set_base5_color(pBlock, etc_block_pack_color5(c0_unscaled, false));
int dr = c1_unscaled.x - c0_unscaled.x;
int dg = c1_unscaled.y - c0_unscaled.y;
int db = c1_unscaled.z - c0_unscaled.z;
etc_block_set_delta3_color(pBlock, etc_block_pack_delta3(dr, dg, db));
void etc_block_set_block_color5_etc1s(etc_block *pBlock, color_rgba c_unscaled)
etc_block_set_diff_bit(pBlock, true);
etc_block_set_base5_color(pBlock, etc_block_pack_color5(c_unscaled, false));
etc_block_set_delta3_color(pBlock, etc_block_pack_delta3(0, 0, 0));
bool etc_block_set_block_color5_check(etc_block *pBlock, color_rgba c0_unscaled, color_rgba c1_unscaled)
etc_block_set_diff_bit(pBlock, true);
etc_block_set_base5_color(pBlock, etc_block_pack_color5(c0_unscaled, false));
int dr = c1_unscaled.x - c0_unscaled.x;
int dg = c1_unscaled.y - c0_unscaled.y;
int db = c1_unscaled.z - c0_unscaled.z;
if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) ||
((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) ||
((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax)))
return false;
etc_block_set_delta3_color(pBlock, etc_block_pack_delta3(dr, dg, db));
return true;
void etc_block_pack_raw_selectors(etc_block *pBlock, const uint8_t *pSelectors)
uint32_t word3 = 0, word2 = 0;
for (uint32_t y = 0; y < 4; y++)
for (uint32_t x = 0; x < 4; x++)
const uint32_t bit_index = x * 4 + y;
const uint32_t s = pSelectors[x + y * 4];
const uint32_t lsb = s & 1, msb = s >> 1;
word3 |= (lsb << bit_index);
word2 |= (msb << bit_index);
pBlock->m_bytes[7] = (uint8_t)(word3);
pBlock->m_bytes[6] = (uint8_t)(word3 >> 8);
pBlock->m_bytes[5] = (uint8_t)(word2);
pBlock->m_bytes[4] = (uint8_t)(word2 >> 8);
// ---- EC1S block encoding/endpoint optimization
constant uint8_t g_eval_dist_tables[8][256] =
// 99% threshold
{ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,},
{ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,},
{ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,},
{ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,},
{ 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,},
{ 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,},
{ 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,},
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}
typedef struct etc1s_optimizer_solution_coordinates_tag
color_rgba m_unscaled_color;
uint32_t m_inten_table;
} etc1s_optimizer_solution_coordinates;
color_rgba get_scaled_color(color_rgba unscaled_color)
int br, bg, bb;
br = (unscaled_color.x >> 2) | (unscaled_color.x << 3);
bg = (unscaled_color.y >> 2) | (unscaled_color.y << 3);
bb = (unscaled_color.z >> 2) | (unscaled_color.z << 3);
return (color_rgba)((uint8_t)br, (uint8_t)bg, (uint8_t)bb, 255);
typedef struct etc1s_optimizer_potential_solution_tag
uint64_t m_error;
etc1s_optimizer_solution_coordinates m_coords;
uint8_t m_selectors[16];
bool m_valid;
} etc1s_optimizer_potential_solution;
typedef struct etc1s_optimizer_state_tag
int m_br, m_bg, m_bb;
float3 m_avg_color;
int m_max_comp_spread;
etc1s_optimizer_potential_solution m_best_solution;
} etc1s_optimizer_state;
bool etc1s_optimizer_evaluate_solution(
etc1s_optimizer_state *pState,
const global encode_etc1s_param_struct *pParams,
uint64_t num_pixels, const global color_rgba *pPixels,
const global uint32_t *pWeights,
etc1s_optimizer_solution_coordinates coords,
etc1s_optimizer_potential_solution* pTrial_solution,
etc1s_optimizer_potential_solution* pBest_solution)
uint8_t temp_selectors[16];
pTrial_solution->m_valid = false;
const color_rgba base_color = get_scaled_color(coords.m_unscaled_color);
pTrial_solution->m_error = INT64_MAX;
for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++)
// TODO: This check is equivalent to medium quality in the C++ version.
if (!g_eval_dist_tables[inten_table][pState->m_max_comp_spread])
constant int* pInten_table = g_etc1_inten_tables[inten_table];
color_rgba block_colors[4];
for (uint32_t s = 0; s < 4; s++)
int yd = pInten_table[s];
block_colors[s] = (color_rgba)(clamp255(base_color.x + yd), clamp255(base_color.y + yd), clamp255(base_color.z + yd), 255);
uint64_t total_error = 0;
for (uint64_t c = 0; c < num_pixels; c++)
color_rgba src_pixel = pPixels[c];
uint32_t best_selector_index = 3;
uint32_t best_error = color_distance(pParams->m_perceptual, src_pixel, block_colors[0], false);
uint32_t trial_error = color_distance(pParams->m_perceptual, src_pixel, block_colors[1], false);
if (trial_error < best_error)
best_error = trial_error;
best_selector_index = 2;
trial_error = color_distance(pParams->m_perceptual, src_pixel, block_colors[2], false);
if (trial_error < best_error)
best_error = trial_error;
best_selector_index = 0;
trial_error = color_distance(pParams->m_perceptual, src_pixel, block_colors[3], false);
if (trial_error < best_error)
best_error = trial_error;
best_selector_index = 1;
if (num_pixels <= 16)
temp_selectors[c] = (uint8_t)(best_selector_index);
total_error += pWeights ? (best_error * (uint64_t)pWeights[c]) : best_error;
if (total_error >= pTrial_solution->m_error)
if (total_error < pTrial_solution->m_error)
pTrial_solution->m_error = total_error;
pTrial_solution->m_coords.m_inten_table = inten_table;
if (num_pixels <= 16)
for (uint32_t i = 0; i < num_pixels; i++)
pTrial_solution->m_selectors[i] = temp_selectors[i];
pTrial_solution->m_valid = true;
pTrial_solution->m_coords.m_unscaled_color = coords.m_unscaled_color;
bool success = false;
if (pBest_solution)
if (pTrial_solution->m_error < pBest_solution->m_error)
*pBest_solution = *pTrial_solution;
success = true;
return success;
void etc1s_optimizer_init(
etc1s_optimizer_state *pState,
const global encode_etc1s_param_struct *pParams,
uint64_t num_pixels, const global color_rgba *pPixels,
const global uint32_t *pWeights)
const int LIMIT = 31;
color_rgba min_color = 255;
color_rgba max_color = 0;
uint64_t total_weight = 0;
uint64_t sum_r = 0, sum_g = 0, sum_b = 0;
for (uint64_t i = 0; i < num_pixels; i++)
const color_rgba c = pPixels[i];
min_color = min(min_color, c);
max_color = max(max_color, c);
if (pWeights)
uint64_t weight = pWeights[i];
sum_r += weight * c.x;
sum_g += weight * c.y;
sum_b += weight * c.z;
total_weight += weight;
sum_r += c.x;
sum_g += c.y;
sum_b += c.z;
float3 avg_color;
avg_color.x = (float)sum_r / total_weight;
avg_color.y = (float)sum_g / total_weight;
avg_color.z = (float)sum_b / total_weight;
pState->m_avg_color = avg_color;
pState->m_max_comp_spread = max(max((int)max_color.x - (int)min_color.x, (int)max_color.y - (int)min_color.y), (int)max_color.z - (int)min_color.z);
// TODO: The rounding here could be improved, like with DXT1/BC1.
pState->m_br = clamp((int)(avg_color.x * (LIMIT / 255.0f) + .5f), 0, LIMIT);
pState->m_bg = clamp((int)(avg_color.y * (LIMIT / 255.0f) + .5f), 0, LIMIT);
pState->m_bb = clamp((int)(avg_color.z * (LIMIT / 255.0f) + .5f), 0, LIMIT);
pState->m_best_solution.m_valid = false;
pState->m_best_solution.m_error = UINT64_MAX;
void etc1s_optimizer_internal_cluster_fit(
uint32_t total_perms_to_try,
etc1s_optimizer_state *pState,
const global encode_etc1s_param_struct *pParams,
uint64_t num_pixels, const global color_rgba *pPixels,
const global uint32_t *pWeights)
const int LIMIT = 31;
etc1s_optimizer_potential_solution trial_solution;
etc1s_optimizer_solution_coordinates cur_coords;
cur_coords.m_unscaled_color = (color_rgba)(pState->m_br, pState->m_bg, pState->m_bb, 255);
etc1s_optimizer_evaluate_solution(pState, pParams, num_pixels, pPixels, pWeights, cur_coords, &trial_solution, &pState->m_best_solution);
if (pState->m_best_solution.m_error == 0)
for (uint32_t i = 0; i < total_perms_to_try; i++)
int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0;
constant int *pInten_table = g_etc1_inten_tables[pState->m_best_solution.m_coords.m_inten_table];
const color_rgba base_color = get_scaled_color(pState->m_best_solution.m_coords.m_unscaled_color);
constant uint8_t *pNum_selectors = g_cluster_fit_order_tab[i].m_v;
for (uint32_t q = 0; q < 4; q++)
const int yd_temp = pInten_table[q];
delta_sum_r += pNum_selectors[q] * (clamp(base_color.x + yd_temp, 0, 255) - base_color.x);
delta_sum_g += pNum_selectors[q] * (clamp(base_color.y + yd_temp, 0, 255) - base_color.y);
delta_sum_b += pNum_selectors[q] * (clamp(base_color.z + yd_temp, 0, 255) - base_color.z);
if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b))
const float avg_delta_r_f = (float)(delta_sum_r) / 8;
const float avg_delta_g_f = (float)(delta_sum_g) / 8;
const float avg_delta_b_f = (float)(delta_sum_b) / 8;
const int br1 = clamp((int)((pState->m_avg_color.x - avg_delta_r_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT);
const int bg1 = clamp((int)((pState->m_avg_color.y - avg_delta_g_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT);
const int bb1 = clamp((int)((pState->m_avg_color.z - avg_delta_b_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT);
cur_coords.m_unscaled_color = (color_rgba)(br1, bg1, bb1, 255);
etc1s_optimizer_evaluate_solution(pState, pParams, num_pixels, pPixels, pWeights, cur_coords, &trial_solution, &pState->m_best_solution);
if (pState->m_best_solution.m_error == 0)
// Encode an ETC1S block given a 4x4 pixel block.
kernel void encode_etc1s_blocks(
const global encode_etc1s_param_struct *pParams,
const global pixel_block *pInput_blocks,
global etc_block *pOutput_blocks)
const uint32_t block_index = get_global_id(0);
const global pixel_block *pInput_block = &pInput_blocks[block_index];
etc1s_optimizer_state state;
etc1s_optimizer_init(&state, pParams, 16, pInput_block->m_pixels, NULL);
etc1s_optimizer_internal_cluster_fit(pParams->m_total_perms, &state, pParams, 16, pInput_block->m_pixels, NULL);
etc_block blk;
etc_block_set_flip_bit(&blk, true);
etc_block_set_block_color5_etc1s(&blk, state.m_best_solution.m_coords.m_unscaled_color);
etc_block_set_inten_tables_etc1s(&blk, state.m_best_solution.m_coords.m_inten_table);
etc_block_pack_raw_selectors(&blk, state.m_best_solution.m_selectors);
pOutput_blocks[block_index] = blk;
typedef struct __attribute__ ((packed)) pixel_cluster_tag
uint64_t m_total_pixels;
uint64_t m_first_pixel_index;
} pixel_cluster;
// Determine the optimal ETC1S color5/intensity given an arbitrary large array of 4x4 input pixel blocks.
kernel void encode_etc1s_from_pixel_cluster(
const global encode_etc1s_param_struct *pParams,
const global pixel_cluster *pInput_pixel_clusters,
const global color_rgba *pInput_pixels,
const global uint32_t *pInput_weights,
global etc_block *pOutput_blocks)
const uint32_t cluster_index = get_global_id(0);
const global pixel_cluster *pInput_cluster = &pInput_pixel_clusters[cluster_index];
uint64_t total_pixels = pInput_cluster->m_total_pixels;
const global color_rgba *pPixels = pInput_pixels + pInput_cluster->m_first_pixel_index;
const global uint32_t *pWeights = pInput_weights + pInput_cluster->m_first_pixel_index;
etc1s_optimizer_state state;
etc1s_optimizer_init(&state, pParams, total_pixels, pPixels, pWeights);
etc1s_optimizer_internal_cluster_fit(pParams->m_total_perms, &state, pParams, total_pixels, pPixels, pWeights);
etc_block blk;
etc_block_set_flip_bit(&blk, true);
etc_block_set_block_color5_etc1s(&blk, state.m_best_solution.m_coords.m_unscaled_color);
etc_block_set_inten_tables_etc1s(&blk, state.m_best_solution.m_coords.m_inten_table);
pOutput_blocks[cluster_index] = blk;
// ---- refine_endpoint_clusterization
typedef struct __attribute__ ((packed)) rec_block_struct_tag
uint16_t m_first_cluster_ofs;
uint16_t m_num_clusters;
uint16_t m_cur_cluster_index;
uint8_t m_cur_cluster_etc_inten;
} rec_block_struct;
typedef struct __attribute__ ((packed)) rec_endpoint_cluster_struct_tag
color_rgba m_unscaled_color;
uint8_t m_etc_inten;
uint16_t m_cluster_index;
} rec_endpoint_cluster_struct;
typedef struct __attribute__ ((packed)) rec_param_struct_tag
uint32_t m_total_blocks;
int m_perceptual;
} rec_param_struct;
// For each input block: find the best endpoint cluster that encodes it.
kernel void refine_endpoint_clusterization(
const rec_param_struct params,
const global pixel_block *pInput_blocks,
const global rec_block_struct *pInput_block_info,
const global rec_endpoint_cluster_struct *pInput_clusters,
const global uint32_t *pSorted_block_indices,
global uint32_t *pOutput_indices)
const uint32_t sorted_block_index = get_global_id(0);
const uint32_t block_index = pSorted_block_indices[sorted_block_index];
const int perceptual = params.m_perceptual;
const global pixel_block *pInput_block = &pInput_blocks[block_index];
pixel_block priv_pixel_block;
priv_pixel_block = *pInput_block;
const uint32_t first_cluster_ofs = pInput_block_info[block_index].m_first_cluster_ofs;
const uint32_t num_clusters = pInput_block_info[block_index].m_num_clusters;
const uint32_t cur_block_cluster_index = pInput_block_info[block_index].m_cur_cluster_index;
const uint32_t cur_block_cluster_etc_inten = pInput_block_info[block_index].m_cur_cluster_etc_inten;
uint64_t overall_best_err = UINT64_MAX;
uint32_t best_cluster_index = 0;
for (uint32_t i = 0; i < num_clusters; i++)
const uint32_t cluster_index = first_cluster_ofs + i;
color_rgba unscaled_color = pInput_clusters[cluster_index].m_unscaled_color;
const uint8_t etc_inten = pInput_clusters[cluster_index].m_etc_inten;
const uint16_t orig_cluster_index = pInput_clusters[cluster_index].m_cluster_index;
if (etc_inten > cur_block_cluster_etc_inten)
color_rgba block_colors[4];
get_block_colors5(block_colors, &unscaled_color, etc_inten, false);
uint64_t total_error = 0;
for (uint32_t c = 0; c < 16; c++)
color_rgba src_pixel = priv_pixel_block.m_pixels[c];
uint32_t best_error = color_distance(perceptual, src_pixel, block_colors[0], false);
uint32_t trial_error = color_distance(perceptual, src_pixel, block_colors[1], false);
if (trial_error < best_error)
best_error = trial_error;
trial_error = color_distance(perceptual, src_pixel, block_colors[2], false);
if (trial_error < best_error)
best_error = trial_error;
trial_error = color_distance(perceptual, src_pixel, block_colors[3], false);
if (trial_error < best_error)
best_error = trial_error;
total_error += best_error;
if ( (total_error < overall_best_err) ||
((orig_cluster_index == cur_block_cluster_index) && (total_error == overall_best_err))
overall_best_err = total_error;
best_cluster_index = orig_cluster_index;
if (!overall_best_err)
pOutput_indices[block_index] = best_cluster_index;
// ---- find_optimal_selector_clusters_for_each_block
typedef struct __attribute__ ((packed)) fosc_selector_struct_tag
uint32_t m_packed_selectors; // 4x4 grid of 2-bit selectors
} fosc_selector_struct;
typedef struct __attribute__ ((packed)) fosc_block_struct_tag
color_rgba m_etc_color5_inten; // unscaled 5-bit block color in RGB, alpha has block's intensity index
uint32_t m_first_selector; // offset into selector table
uint32_t m_num_selectors; // number of selectors to check
} fosc_block_struct;
typedef struct __attribute__ ((packed)) fosc_param_struct_tag
uint32_t m_total_blocks;
int m_perceptual;
} fosc_param_struct;
// For each input block: Find the quantized selector which results in the lowest error.
kernel void find_optimal_selector_clusters_for_each_block(
const fosc_param_struct params,
const global pixel_block *pInput_blocks,
const global fosc_block_struct *pInput_block_info,
const global fosc_selector_struct *pInput_selectors,
const global uint32_t *pSelector_cluster_indices,
global uint32_t *pOutput_selector_cluster_indices)
const uint32_t block_index = get_global_id(0);
const global color_rgba *pBlock_pixels = pInput_blocks[block_index].m_pixels;
const global fosc_block_struct *pBlock_info = &pInput_block_info[block_index];
const global fosc_selector_struct *pSelectors = &pInput_selectors[pBlock_info->m_first_selector];
const uint32_t num_selectors = pBlock_info->m_num_selectors;
color_rgba trial_block_colors[4];
color_rgba etc_color5_inten = pBlock_info->m_etc_color5_inten;
get_block_colors5(trial_block_colors, &etc_color5_inten, etc_color5_inten.w, false);
uint32_t trial_errors[4][16];
if (params.m_perceptual)
for (uint32_t sel = 0; sel < 4; ++sel)
for (uint32_t i = 0; i < 16; ++i)
trial_errors[sel][i] = color_distance(true, pBlock_pixels[i], trial_block_colors[sel], false);
for (uint32_t sel = 0; sel < 4; ++sel)
for (uint32_t i = 0; i < 16; ++i)
trial_errors[sel][i] = color_distance(false, pBlock_pixels[i], trial_block_colors[sel], false);
uint64_t best_err = UINT64_MAX;
uint32_t best_index = 0;
for (uint32_t sel_index = 0; sel_index < num_selectors; sel_index++)
uint32_t sels = pSelectors[sel_index].m_packed_selectors;
uint64_t total_err = 0;
for (uint32_t i = 0; i < 16; i++, sels >>= 2)
total_err += trial_errors[sels & 3][i];
if (total_err < best_err)
best_err = total_err;
best_index = sel_index;
if (!best_err)
pOutput_selector_cluster_indices[block_index] = pSelector_cluster_indices[pBlock_info->m_first_selector + best_index];
// determine_selectors
typedef struct __attribute__ ((packed)) ds_param_struct_tag
uint32_t m_total_blocks;
int m_perceptual;
} ds_param_struct;
// For each input block: Determine the ETC1S selectors that result in the lowest error, given each block's predetermined ETC1S color5/intensities.
kernel void determine_selectors(
const ds_param_struct params,
const global pixel_block *pInput_blocks,
const global color_rgba *pInput_etc_color5_and_inten,
global etc_block *pOutput_blocks)
const uint32_t block_index = get_global_id(0);
const global color_rgba *pBlock_pixels = pInput_blocks[block_index].m_pixels;
color_rgba etc_color5_inten = pInput_etc_color5_and_inten[block_index];
color_rgba block_colors[4];
get_block_colors5(block_colors, &etc_color5_inten, etc_color5_inten.w, false);
etc_block output_block;
etc_block_set_flip_bit(&output_block, true);
etc_block_set_block_color5_etc1s(&output_block, etc_color5_inten);
etc_block_set_inten_tables_etc1s(&output_block, etc_color5_inten.w);
for (uint32_t i = 0; i < 16; i++)
color_rgba pixel_color = pBlock_pixels[i];
uint err0 = color_distance(params.m_perceptual, pixel_color, block_colors[0], false);
uint err1 = color_distance(params.m_perceptual, pixel_color, block_colors[1], false);
uint err2 = color_distance(params.m_perceptual, pixel_color, block_colors[2], false);
uint err3 = color_distance(params.m_perceptual, pixel_color, block_colors[3], false);
uint best_err = min(min(min(err0, err1), err2), err3);
uint32_t best_sel = (best_err == err2) ? 2 : 3;
best_sel = (best_err == err1) ? 1 : best_sel;
best_sel = (best_err == err0) ? 0 : best_sel;
etc_block_set_selector(&output_block, i & 3, i >> 2, best_sel);
pOutput_blocks[block_index] = output_block;