Merge pull request #136 from aaronfranke/travis
Add a formatting script for Travis CI
diff --git a/basisu_enc.cpp b/basisu_enc.cpp
index 57aac65..0c5f883 100644
--- a/basisu_enc.cpp
+++ b/basisu_enc.cpp
@@ -678,7 +678,7 @@
if ((s >= num_syms) || (A[r].m_key < A[s].m_key))
{
A[next].m_key = A[r].m_key;
- A[r].m_key = static_cast<uint16_t>(next);
+ A[r].m_key = next;
++r;
}
else
@@ -689,13 +689,13 @@
if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key))
{
- A[next].m_key = static_cast<uint16_t>(A[next].m_key + A[r].m_key);
- A[r].m_key = static_cast<uint16_t>(next);
+ A[next].m_key = A[next].m_key + A[r].m_key;
+ A[r].m_key = next;
++r;
}
else
{
- A[next].m_key = static_cast<uint16_t>(A[next].m_key + A[s].m_key);
+ A[next].m_key = A[next].m_key + A[s].m_key;
++s;
}
}
@@ -715,7 +715,7 @@
;
for ( ; num_avail > num_used; --next, --num_avail)
- A[next].m_key = static_cast<uint16_t>(depth);
+ A[next].m_key = depth;
num_avail = 2 * num_used;
num_used = 0;
@@ -763,6 +763,10 @@
for (i = 0; i < num_syms; i++)
{
uint32_t freq = pSyms0[i].m_key;
+
+ // We scale all input frequencies to 16-bits.
+ assert(freq <= UINT16_MAX);
+
hist[freq & 0xFF]++;
hist[256 + ((freq >> 8) & 0xFF)]++;
}
@@ -884,8 +888,13 @@
else
{
for (uint32_t i = 0; i < num_syms; i++)
+ {
if (pSym_freq[i])
- sym_freq[i] = static_cast<uint16_t>(maximum<uint32_t>((pSym_freq[i] * 65534U + (max_freq >> 1)) / max_freq, 1));
+ {
+ uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq);
+ sym_freq[i] = static_cast<uint16_t>(clamp<uint32_t>(f, 1, 65534));
+ }
+ }
}
return init(num_syms, &sym_freq[0], max_code_size);
diff --git a/basisu_enc.h b/basisu_enc.h
index b1abba0..80a8074 100644
--- a/basisu_enc.h
+++ b/basisu_enc.h
@@ -1927,7 +1927,8 @@
struct sym_freq
{
- uint16_t m_key, m_sym_index;
+ uint32_t m_key;
+ uint16_t m_sym_index;
};
sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);
@@ -2008,7 +2009,7 @@
{
if (m_bit_buffer_size)
{
- m_total_bits += 8;
+ m_total_bits += 8 - (m_bit_buffer_size & 7);
append_byte(static_cast<uint8_t>(m_bit_buffer));
m_bit_buffer = 0;
diff --git a/basisu_gpu_texture.cpp b/basisu_gpu_texture.cpp
index 8c92516..a9e3d92 100644
--- a/basisu_gpu_texture.cpp
+++ b/basisu_gpu_texture.cpp
@@ -95,19 +95,18 @@
bc1_block::unpack_color(l, r0, g0, b0);
bc1_block::unpack_color(h, r1, g1, b1);
+ c[0].set_noclamp_rgba(r0, g0, b0, 255);
+ c[1].set_noclamp_rgba(r1, g1, b1, 255);
+
bool used_punchthrough = false;
if (l > h)
{
- c[0].set_noclamp_rgba(r0, g0, b0, 255);
- c[1].set_noclamp_rgba(r1, g1, b1, 255);
c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
}
else
{
- c[0].set_noclamp_rgba(r0, g0, b0, 255);
- c[1].set_noclamp_rgba(r1, g1, b1, 255);
c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
c[3].set_noclamp_rgba(0, 0, 0, 0);
used_punchthrough = true;
@@ -137,6 +136,142 @@
return used_punchthrough;
}
+ bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
+ {
+ static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
+
+ const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
+
+ const uint32_t l = pBlock->get_low_color();
+ const uint32_t h = pBlock->get_high_color();
+
+ color_rgba c[4];
+
+ int r0 = (l >> 11) & 31;
+ int g0 = (l >> 5) & 63;
+ int b0 = l & 31;
+ int r1 = (h >> 11) & 31;
+ int g1 = (h >> 5) & 63;
+ int b1 = h & 31;
+
+ c[0].b = (uint8_t)((3 * b0 * 22) / 8);
+ c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4));
+ c[0].r = (uint8_t)((3 * r0 * 22) / 8);
+ c[0].a = 0xFF;
+
+ c[1].r = (uint8_t)((3 * r1 * 22) / 8);
+ c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4));
+ c[1].b = (uint8_t)((3 * b1 * 22) / 8);
+ c[1].a = 0xFF;
+
+ int gdiff = c[1].g - c[0].g;
+
+ bool used_punchthrough = false;
+
+ if (l > h)
+ {
+ c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8);
+ c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256));
+ c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8);
+ c[2].a = 0xFF;
+
+ c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8);
+ c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256);
+ c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8);
+ c[3].a = 0xFF;
+ }
+ else
+ {
+ c[2].r = (uint8_t)(((r0 + r1) * 33) / 8);
+ c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256);
+ c[2].b = (uint8_t)(((b0 + b1) * 33) / 8);
+ c[2].a = 0xFF;
+
+ c[3].set_noclamp_rgba(0, 0, 0, 0);
+ used_punchthrough = true;
+ }
+
+ if (set_alpha)
+ {
+ for (uint32_t y = 0; y < 4; y++, pPixels += 4)
+ {
+ pPixels[0] = c[pBlock->get_selector(0, y)];
+ pPixels[1] = c[pBlock->get_selector(1, y)];
+ pPixels[2] = c[pBlock->get_selector(2, y)];
+ pPixels[3] = c[pBlock->get_selector(3, y)];
+ }
+ }
+ else
+ {
+ for (uint32_t y = 0; y < 4; y++, pPixels += 4)
+ {
+ pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
+ pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
+ pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
+ pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
+ }
+ }
+
+ return used_punchthrough;
+ }
+
+ static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }
+ static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }
+
+ bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
+ {
+ const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
+
+ const uint32_t l = pBlock->get_low_color();
+ const uint32_t h = pBlock->get_high_color();
+
+ color_rgba c[4];
+
+ uint32_t r0, g0, b0, r1, g1, b1;
+ bc1_block::unpack_color(l, r0, g0, b0);
+ bc1_block::unpack_color(h, r1, g1, b1);
+
+ c[0].set_noclamp_rgba(r0, g0, b0, 255);
+ c[1].set_noclamp_rgba(r1, g1, b1, 255);
+
+ bool used_punchthrough = false;
+
+ if (l > h)
+ {
+ c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
+ c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
+ }
+ else
+ {
+ c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
+ c[3].set_noclamp_rgba(0, 0, 0, 0);
+ used_punchthrough = true;
+ }
+
+ if (set_alpha)
+ {
+ for (uint32_t y = 0; y < 4; y++, pPixels += 4)
+ {
+ pPixels[0] = c[pBlock->get_selector(0, y)];
+ pPixels[1] = c[pBlock->get_selector(1, y)];
+ pPixels[2] = c[pBlock->get_selector(2, y)];
+ pPixels[3] = c[pBlock->get_selector(3, y)];
+ }
+ }
+ else
+ {
+ for (uint32_t y = 0; y < 4; y++, pPixels += 4)
+ {
+ pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
+ pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
+ pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
+ pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
+ }
+ }
+
+ return used_punchthrough;
+ }
+
struct bc4_block
{
enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
@@ -964,6 +1099,16 @@
unpack_bc1(pBlock, pPixels, true);
break;
}
+ case texture_format::cBC1_NV:
+ {
+ unpack_bc1_nv(pBlock, pPixels, true);
+ break;
+ }
+ case texture_format::cBC1_AMD:
+ {
+ unpack_bc1_amd(pBlock, pPixels, true);
+ break;
+ }
case texture_format::cBC3:
{
return unpack_bc3(pBlock, pPixels);
@@ -1234,6 +1379,8 @@
switch (fmt)
{
case texture_format::cBC1:
+ case texture_format::cBC1_NV:
+ case texture_format::cBC1_AMD:
{
internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;
break;
diff --git a/transcoder/basisu.h b/transcoder/basisu.h
index c4d5bfc..6e6f46d 100644
--- a/transcoder/basisu.h
+++ b/transcoder/basisu.h
@@ -88,7 +88,7 @@
#define BASISU_ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#define BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(x) x(const x &) = delete; x& operator= (const x &) = delete;
#define BASISU_ASSUME(x) static_assert(x, #x);
-#define BASISU_OFFSETOF(s, m) (uint32_t)(intptr_t)(&((s *)(0))->m)
+#define BASISU_OFFSETOF(s, m) offsetof(s, m)
#define BASISU_STRINGIZE(x) #x
#define BASISU_STRINGIZE2(x) BASISU_STRINGIZE(x)
@@ -293,7 +293,7 @@
enum
{
cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31,
- cHuffmanFastLookupBits = 10, cHuffmanFastLookupSize = 1 << cHuffmanFastLookupBits,
+ cHuffmanFastLookupBits = 10,
cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2,
// Small zero runs
@@ -341,6 +341,8 @@
cETC2_R11_EAC,
cETC2_RG11_EAC,
cUASTC4x4,
+ cBC1_NV,
+ cBC1_AMD,
// Uncompressed/raw pixels
cRGBA32,
@@ -359,6 +361,8 @@
case texture_format::cETC2_RGB:
case texture_format::cETC2_ALPHA:
case texture_format::cBC1:
+ case texture_format::cBC1_NV:
+ case texture_format::cBC1_AMD:
case texture_format::cBC4:
case texture_format::cPVRTC1_4_RGB:
case texture_format::cPVRTC1_4_RGBA:
diff --git a/transcoder/basisu_transcoder.cpp b/transcoder/basisu_transcoder.cpp
index 1d6a46e..654ac82 100644
--- a/transcoder/basisu_transcoder.cpp
+++ b/transcoder/basisu_transcoder.cpp
@@ -17,17 +17,22 @@
#include <limits.h>
#include <vector>
-#ifndef IS_BIG_ENDIAN
+#ifndef BASISD_IS_BIG_ENDIAN
// TODO: This doesn't work on OSX. How can this be so difficult?
//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
-// #define IS_BIG_ENDIAN (1)
+// #define BASISD_IS_BIG_ENDIAN (1)
//#else
- #define IS_BIG_ENDIAN (0)
+ #define BASISD_IS_BIG_ENDIAN (0)
//#endif
#endif
-#ifndef USE_UNALIGNED_WORD_READS
-#define USE_UNALIGNED_WORD_READS (1)
+#ifndef BASISD_USE_UNALIGNED_WORD_READS
+ #ifdef __EMSCRIPTEN__
+ // Can't use unaligned loads/stores with WebAssembly.
+ #define BASISD_USE_UNALIGNED_WORD_READS (0)
+ #else
+ #define BASISD_USE_UNALIGNED_WORD_READS (1)
+ #endif
#endif
#define BASISD_SUPPORTED_BASIS_VERSION (0x13)
@@ -190,7 +195,7 @@
{
crc = ~crc;
- const uint8_t* p = reinterpret_cast<const uint8_t*>(r);
+ const uint8_t* p = static_cast<const uint8_t*>(r);
for (; size; --size)
{
const uint16_t q = *p++ ^ (crc >> 8);
@@ -8510,7 +8515,7 @@
for (uint32_t i = 0; i < 4; i++)
{
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31));
- if (IS_BIG_ENDIAN)
+ if (BASISD_IS_BIG_ENDIAN)
packed_colors[i] = byteswap_uint16(packed_colors[i]);
}
}
@@ -8519,7 +8524,7 @@
for (uint32_t i = 0; i < 4; i++)
{
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31));
- if (IS_BIG_ENDIAN)
+ if (BASISD_IS_BIG_ENDIAN)
packed_colors[i] = byteswap_uint16(packed_colors[i]);
}
}
@@ -8560,12 +8565,12 @@
for (uint32_t x = 0; x < max_x; x++)
{
uint16_t cur = reinterpret_cast<uint16_t*>(pDst_pixels)[x];
- if (IS_BIG_ENDIAN)
+ if (BASISD_IS_BIG_ENDIAN)
cur = byteswap_uint16(cur);
cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3];
- if (IS_BIG_ENDIAN)
+ if (BASISD_IS_BIG_ENDIAN)
cur = byteswap_uint16(cur);
reinterpret_cast<uint16_t*>(pDst_pixels)[x] = cur;
@@ -8591,7 +8596,7 @@
for (uint32_t i = 0; i < 4; i++)
{
packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF);
- if (IS_BIG_ENDIAN)
+ if (BASISD_IS_BIG_ENDIAN)
packed_colors[i] = byteswap_uint16(packed_colors[i]);
}
@@ -8622,7 +8627,7 @@
for (uint32_t i = 0; i < 4; i++)
{
packed_colors[i] = mul_8(colors[i].g, 15);
- if (IS_BIG_ENDIAN)
+ if (BASISD_IS_BIG_ENDIAN)
packed_colors[i] = byteswap_uint16(packed_colors[i]);
}
@@ -11795,7 +11800,7 @@
if (!codesize)
return 0;
- if ((IS_BIG_ENDIAN) || (!USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
+ if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
{
const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
@@ -11849,7 +11854,7 @@
return 0;
assert(bit_offset < 112);
- if ((IS_BIG_ENDIAN) || (!USE_UNALIGNED_WORD_READS))
+ if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
{
const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
@@ -12179,7 +12184,7 @@
uint64_t bits;
// Read the weight bits
- if ((IS_BIG_ENDIAN) || (!USE_UNALIGNED_WORD_READS))
+ if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
bits = read_bits64(blk.m_bytes, bit_ofs, std::min<int>(64, 128 - (int)bit_ofs));
else
{
diff --git a/transcoder/basisu_transcoder_internal.h b/transcoder/basisu_transcoder_internal.h
index dc234bd..80e43e6 100644
--- a/transcoder/basisu_transcoder_internal.h
+++ b/transcoder/basisu_transcoder_internal.h
@@ -122,7 +122,7 @@
basisu::clear_vector(m_tree);
}
- bool init(uint32_t total_syms, const uint8_t *pCode_sizes)
+ bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits)
{
if (!total_syms)
{
@@ -133,8 +133,10 @@
m_code_sizes.resize(total_syms);
memcpy(&m_code_sizes[0], pCode_sizes, total_syms);
+ const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
+
m_lookup.resize(0);
- m_lookup.resize(basisu::cHuffmanFastLookupSize);
+ m_lookup.resize(huffman_fast_lookup_size);
m_tree.resize(0);
m_tree.resize(total_syms * 2);
@@ -172,10 +174,10 @@
for (l = code_size; l > 0; l--, cur_code >>= 1)
rev_code = (rev_code << 1) | (cur_code & 1);
- if (code_size <= basisu::cHuffmanFastLookupBits)
+ if (code_size <= fast_lookup_bits)
{
uint32_t k = (code_size << 16) | sym_index;
- while (rev_code < basisu::cHuffmanFastLookupSize)
+ while (rev_code < huffman_fast_lookup_size)
{
if (m_lookup[rev_code] != 0)
{
@@ -190,9 +192,9 @@
}
int tree_cur;
- if (0 == (tree_cur = m_lookup[rev_code & (basisu::cHuffmanFastLookupSize - 1)]))
+ if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)]))
{
- const uint32_t idx = rev_code & (basisu::cHuffmanFastLookupSize - 1);
+ const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1);
if (m_lookup[idx] != 0)
{
// Supplied codesizes can't create a valid prefix code.
@@ -210,9 +212,9 @@
return false;
}
- rev_code >>= (basisu::cHuffmanFastLookupBits - 1);
+ rev_code >>= (fast_lookup_bits - 1);
- for (int j = code_size; j > (basisu::cHuffmanFastLookupBits + 1); j--)
+ for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--)
{
tree_cur -= ((rev_code >>= 1) & 1);
@@ -260,6 +262,8 @@
}
const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
+ const basisu::int_vec get_lookup() const { return m_lookup; }
+ const basisu::int16_vec get_tree() const { return m_tree; }
bool is_valid() const { return m_code_sizes.size() > 0; }
@@ -436,9 +440,11 @@
return v;
}
- inline uint32_t decode_huffman(const huffman_decoding_table &ct)
+ inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits)
{
assert(ct.m_code_sizes.size());
+
+ const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
while (m_bit_buf_size < 16)
{
@@ -454,14 +460,14 @@
int code_len;
int sym;
- if ((sym = ct.m_lookup[m_bit_buf & (basisu::cHuffmanFastLookupSize - 1)]) >= 0)
+ if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0)
{
code_len = sym >> 16;
sym &= 0xFFFF;
}
else
{
- code_len = basisu::cHuffmanFastLookupBits;
+ code_len = fast_lookup_bits;
do
{
sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1