Merge pull request #134 from cwoffenden/transcoder-xcode

Xcode 11 transcoder warnings removed
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..7106fe5
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,18 @@
+dist: bionic
+
+stages:
+  - build
+
+matrix:
+  include:
+    - name: Static checks (format.sh)
+      stage: build
+      os: linux
+      addons:
+        apt:
+          packages:
+            - dos2unix
+            - recode
+
+script:
+  - bash ./format.sh
diff --git a/basisu_enc.cpp b/basisu_enc.cpp
index 57aac65..0c5f883 100644
--- a/basisu_enc.cpp
+++ b/basisu_enc.cpp
@@ -678,7 +678,7 @@
 			if ((s >= num_syms) || (A[r].m_key < A[s].m_key))
 			{
 				A[next].m_key = A[r].m_key;
-				A[r].m_key = static_cast<uint16_t>(next);
+				A[r].m_key = next;
 				++r;
 			}
 			else
@@ -689,13 +689,13 @@
 
 			if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key))
 			{
-				A[next].m_key = static_cast<uint16_t>(A[next].m_key + A[r].m_key);
-				A[r].m_key = static_cast<uint16_t>(next);
+				A[next].m_key = A[next].m_key + A[r].m_key;
+				A[r].m_key = next;
 				++r;
 			}
 			else
 			{
-				A[next].m_key = static_cast<uint16_t>(A[next].m_key + A[s].m_key);
+				A[next].m_key = A[next].m_key + A[s].m_key;
 				++s;
 			}
 		}
@@ -715,7 +715,7 @@
 				;
 
 			for ( ; num_avail > num_used; --next, --num_avail)
-				A[next].m_key = static_cast<uint16_t>(depth);
+				A[next].m_key = depth;
 
 			num_avail = 2 * num_used;
 			num_used = 0;
@@ -763,6 +763,10 @@
 		for (i = 0; i < num_syms; i++)
 		{
 			uint32_t freq = pSyms0[i].m_key;
+			
+			// We scale all input frequencies to 16-bits.
+			assert(freq <= UINT16_MAX);
+
 			hist[freq & 0xFF]++;
 			hist[256 + ((freq >> 8) & 0xFF)]++;
 		}
@@ -884,8 +888,13 @@
 		else
 		{
 			for (uint32_t i = 0; i < num_syms; i++)
+			{
 				if (pSym_freq[i])
-					sym_freq[i] = static_cast<uint16_t>(maximum<uint32_t>((pSym_freq[i] * 65534U + (max_freq >> 1)) / max_freq, 1));
+				{
+					uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq);
+					sym_freq[i] = static_cast<uint16_t>(clamp<uint32_t>(f, 1, 65534));
+				}
+			}
 		}
 
 		return init(num_syms, &sym_freq[0], max_code_size);
diff --git a/basisu_enc.h b/basisu_enc.h
index b1abba0..80a8074 100644
--- a/basisu_enc.h
+++ b/basisu_enc.h
@@ -1927,7 +1927,8 @@
 		
 	struct sym_freq
 	{
-		uint16_t m_key, m_sym_index;
+		uint32_t m_key;
+		uint16_t m_sym_index;
 	};
 
 	sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);
@@ -2008,7 +2009,7 @@
 		{
 			if (m_bit_buffer_size)
 			{
-				m_total_bits += 8;
+				m_total_bits += 8 - (m_bit_buffer_size & 7);
 				append_byte(static_cast<uint8_t>(m_bit_buffer));
 
 				m_bit_buffer = 0;
diff --git a/basisu_gpu_texture.cpp b/basisu_gpu_texture.cpp
index 8c92516..a9e3d92 100644
--- a/basisu_gpu_texture.cpp
+++ b/basisu_gpu_texture.cpp
@@ -95,19 +95,18 @@
 		bc1_block::unpack_color(l, r0, g0, b0);
 		bc1_block::unpack_color(h, r1, g1, b1);
 
+		c[0].set_noclamp_rgba(r0, g0, b0, 255);
+		c[1].set_noclamp_rgba(r1, g1, b1, 255);
+
 		bool used_punchthrough = false;
 
 		if (l > h)
 		{
-			c[0].set_noclamp_rgba(r0, g0, b0, 255);
-			c[1].set_noclamp_rgba(r1, g1, b1, 255);
 			c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
 			c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
 		}
 		else
 		{
-			c[0].set_noclamp_rgba(r0, g0, b0, 255);
-			c[1].set_noclamp_rgba(r1, g1, b1, 255);
 			c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
 			c[3].set_noclamp_rgba(0, 0, 0, 0);
 			used_punchthrough = true;
@@ -137,6 +136,142 @@
 		return used_punchthrough;
 	}
 
+	bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
+	{
+		static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
+
+		const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
+
+		const uint32_t l = pBlock->get_low_color();
+		const uint32_t h = pBlock->get_high_color();
+
+		color_rgba c[4];
+
+		int r0 = (l >> 11) & 31;
+		int g0 = (l >> 5) & 63;
+		int b0 = l & 31;
+		int r1 = (h >> 11) & 31;
+		int g1 = (h >> 5) & 63;
+		int b1 = h & 31;
+
+		c[0].b = (uint8_t)((3 * b0 * 22) / 8);
+		c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4));
+		c[0].r = (uint8_t)((3 * r0 * 22) / 8);
+		c[0].a = 0xFF;
+
+		c[1].r = (uint8_t)((3 * r1 * 22) / 8);
+		c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4));
+		c[1].b = (uint8_t)((3 * b1 * 22) / 8);
+		c[1].a = 0xFF;
+
+		int gdiff = c[1].g - c[0].g;
+
+		bool used_punchthrough = false;
+
+		if (l > h)
+		{
+			c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8);
+			c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256));
+			c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8);
+			c[2].a = 0xFF;
+
+			c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8);
+			c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256);
+			c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8);
+			c[3].a = 0xFF;
+		}
+		else
+		{
+			c[2].r = (uint8_t)(((r0 + r1) * 33) / 8);
+			c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256);
+			c[2].b = (uint8_t)(((b0 + b1) * 33) / 8);
+			c[2].a = 0xFF;
+
+			c[3].set_noclamp_rgba(0, 0, 0, 0);
+			used_punchthrough = true;
+		}
+
+		if (set_alpha)
+		{
+			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
+			{
+				pPixels[0] = c[pBlock->get_selector(0, y)]; 
+				pPixels[1] = c[pBlock->get_selector(1, y)]; 
+				pPixels[2] = c[pBlock->get_selector(2, y)]; 
+				pPixels[3] = c[pBlock->get_selector(3, y)];
+			}
+		}
+		else
+		{
+			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
+			{
+				pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); 
+				pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); 
+				pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); 
+				pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
+			}
+		}
+
+		return used_punchthrough;
+	}
+
+	static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; }
+	static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; }
+
+	bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
+	{
+		const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
+
+		const uint32_t l = pBlock->get_low_color();
+		const uint32_t h = pBlock->get_high_color();
+
+		color_rgba c[4];
+
+		uint32_t r0, g0, b0, r1, g1, b1;
+		bc1_block::unpack_color(l, r0, g0, b0);
+		bc1_block::unpack_color(h, r1, g1, b1);
+
+		c[0].set_noclamp_rgba(r0, g0, b0, 255);
+		c[1].set_noclamp_rgba(r1, g1, b1, 255);
+				
+		bool used_punchthrough = false;
+
+		if (l > h)
+		{
+			c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
+			c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
+		}
+		else
+		{
+			c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
+			c[3].set_noclamp_rgba(0, 0, 0, 0);
+			used_punchthrough = true;
+		}
+
+		if (set_alpha)
+		{
+			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
+			{
+				pPixels[0] = c[pBlock->get_selector(0, y)]; 
+				pPixels[1] = c[pBlock->get_selector(1, y)]; 
+				pPixels[2] = c[pBlock->get_selector(2, y)]; 
+				pPixels[3] = c[pBlock->get_selector(3, y)];
+			}
+		}
+		else
+		{
+			for (uint32_t y = 0; y < 4; y++, pPixels += 4)
+			{
+				pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); 
+				pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); 
+				pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); 
+				pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
+			}
+		}
+
+		return used_punchthrough;
+	}
+
 	struct bc4_block
 	{
 		enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
@@ -964,6 +1099,16 @@
 			unpack_bc1(pBlock, pPixels, true);
 			break;
 		}
+		case texture_format::cBC1_NV:
+		{
+			unpack_bc1_nv(pBlock, pPixels, true);
+			break;
+		}
+		case texture_format::cBC1_AMD:
+		{
+			unpack_bc1_amd(pBlock, pPixels, true);
+			break;
+		}
 		case texture_format::cBC3:
 		{
 			return unpack_bc3(pBlock, pPixels);
@@ -1234,6 +1379,8 @@
 		switch (fmt)
 		{
 		case texture_format::cBC1:
+		case texture_format::cBC1_NV:
+		case texture_format::cBC1_AMD:
 		{
 			internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT;
 			break;
diff --git a/contrib/previewers/lib/README.md b/contrib/previewers/lib/README.md
index 4b5a7b8..bbd2508 100644
--- a/contrib/previewers/lib/README.md
+++ b/contrib/previewers/lib/README.md
@@ -1,3 +1,3 @@
 # Single File Basis Universal Transcoder
 
-Header and implementation generated using the [single file transcoder](../../single_file_transcoder) post-process script. Unlike the examples in that subproject here the transcoder header was kept as a seperate file (using the `-k` option, see the [README](../../single_file_transcoder/README.md)).
\ No newline at end of file
+Header and implementation generated using the [single file transcoder](../../single_file_transcoder) post-process script. Unlike the examples in that subproject here the transcoder header was kept as a seperate file (using the `-k` option, see the [README](../../single_file_transcoder/README.md)).
diff --git a/contrib/single_file_transcoder/examples/testcard-rgba.basis.inc b/contrib/single_file_transcoder/examples/testcard-rgba.basis.inc
index 7529728..6b9df6e 100644
--- a/contrib/single_file_transcoder/examples/testcard-rgba.basis.inc
+++ b/contrib/single_file_transcoder/examples/testcard-rgba.basis.inc
@@ -1040,4 +1040,4 @@
 0x85, 0x35, 0xe1, 0x51, 0x6a, 0xa9, 0x19, 0x6b, 0x00, 0x9b, 0xcb, 0x5f,
 0xfb, 0xd8, 0xf8, 0x56, 0xfc, 0x3f, 0xe1, 0xda, 0xfa, 0xff, 0xf6, 0xe3,
 0x9c, 0x0e, 0x16, 0xff, 0x31, 0x76, 0x1f, 0x00, 0xf6, 0x64, 0x13, 0xb6,
-0xda, 0x38, 0x76, 0xb1, 0x11, 0xb6, 0xda, 0x38
\ No newline at end of file
+0xda, 0x38, 0x76, 0xb1, 0x11, 0xb6, 0xda, 0x38
diff --git a/contrib/single_file_transcoder/examples/testcard.basis.inc b/contrib/single_file_transcoder/examples/testcard.basis.inc
index 68102c1..d343ab2 100644
--- a/contrib/single_file_transcoder/examples/testcard.basis.inc
+++ b/contrib/single_file_transcoder/examples/testcard.basis.inc
@@ -700,4 +700,4 @@
 0xbf, 0xc0, 0x2f, 0x73, 0x60, 0x9f, 0x73, 0xf4, 0x1f, 0x93, 0xf9, 0x0b,
 0xf6, 0x8f, 0xdb, 0xfb, 0x12, 0x3e, 0x8a, 0x83, 0x00, 0xa7, 0x1b, 0x6f,
 0x76, 0x40, 0x5c, 0xe7, 0x7f, 0x02, 0xb9, 0xf3, 0x85, 0x79, 0x6b, 0x0f,
-0x79, 0x6b, 0x79
\ No newline at end of file
+0x79, 0x6b, 0x79
diff --git a/format.sh b/format.sh
new file mode 100755
index 0000000..5d90393
--- /dev/null
+++ b/format.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# Loops through all text files tracked by Git.
+git grep -zIl '' |
+while IFS= read -rd '' f; do
+    # Exclude some types of files.
+    if [[ $f == *"proj" ]]; then
+        continue
+    elif [[ $f == *"filters" ]]; then
+        continue
+    elif [[ $f == *"sln" ]]; then
+        continue
+    elif [[ $f == *"json" ]]; then
+        continue
+    elif [[ $f == *"min.js" ]]; then
+        continue
+    fi
+    # Ensures that files are UTF-8 formatted.
+    recode UTF-8 $f 2> /dev/null
+    # Ensures that files have LF line endings.
+    dos2unix $f 2> /dev/null
+    # Ensures that files do not contain a BOM.
+    sed -i '1s/^\xEF\xBB\xBF//' "$f"
+    # Ensures that files end with newline characters.
+    tail -c1 < "$f" | read -r _ || echo >> "$f";
+done
+
+git diff > patch.patch
+FILESIZE=$(stat -c%s patch.patch)
+MAXSIZE=5
+
+# If no patch has been generated all is OK, clean up, and exit.
+if (( FILESIZE < MAXSIZE )); then
+    printf "Files in this commit comply with the formatting rules.\n"
+    rm -f patch.patch
+    exit 0
+fi
+
+# A patch has been created, notify the user, clean up, and exit.
+printf "\n*** The following differences were found between the code "
+printf "and the formatting rules:\n\n"
+cat patch.patch
+printf "\n*** Aborting, please fix your commit(s) with 'git commit --amend' or 'git rebase -i <hash>'\n"
+rm -f patch.patch
+exit 1
diff --git a/spec/basis_spec.txt b/spec/basis_spec.txt
new file mode 100644
index 0000000..93ec749
--- /dev/null
+++ b/spec/basis_spec.txt
@@ -0,0 +1,389 @@
+[WORK IN PROGRESS]
+
+File: basis_spec.txt
+Version 1.00
+
+1.0 Introduction
+----------------
+
+The Basis Universal GPU texture codec supports reading and writing ".basis" files. 
+The .basis file format supports ETC1S or UASTC 4x4 texture data.
+
+* ETC1S is a simplified subset of ETC1.
+
+The mode is always differential (diff bit=1), the Rd, Gd, and Bd color deltas 
+are always (0,0,0), and the flip bit is always set. ETC1S texture data is fully 
+compliant with all existing software and hardware ETC1 decoders. Existing encoders 
+can be easily modified to limit their output to ETC1S.
+
+* UASTC 4x4 is a 19 mode subset of the ASTC texture format. Its specification is 
+[here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-Texture-Specification). UASTC texture data can always be losslessly transcoded to ASTC.
+
+2.0 High-Level File Structure
+-----------------------------
+
+A .basis file consists of multiple sections. Apart from the header, which must always
+be at the start of the file, the other sections may appear in any order. 
+
+Here's the high level organization of a typical .basis file:
+
+* The file header
+* Optional ETC1S compressed endpoint/selector codebooks
+* Optional ETC1S Huffman table information
+* A required "slice" description array describing the resolutions and file offset/compressed sizes of each texture slice present in the file
+* 1 or more slices containing ETC1S or UASTC compressed texture data. 
+* For future expansion, the format supports an "extended" header which may be located anywhere in the file. This section contains .PNG-like chunked data. 
+
+3.0 File Enums
+--------------
+
+// basis_file_header::m_tex_type
+enum basis_texture_type
+{
+  cBASISTexType2D = 0,     
+  cBASISTexType2DArray = 1,   
+  cBASISTexTypeCubemapArray = 2, 
+  cBASISTexTypeVideoFrames = 3, 
+  cBASISTexTypeVolume = 4,  
+  cBASISTexTypeTotal
+};
+
+// basis_slice_desc::flags
+enum basis_slice_desc_flags
+{
+  cSliceDescFlagsHasAlpha = 1,
+  cSliceDescFlagsFrameIsIFrame = 2   
+};
+
+// basis_file_header::m_tex_format enum basis_tex_format
+{
+  cETC1S = 0,
+  cUASTC4x4 = 1
+};
+
+// basis_file_header::m_flags enum basis_header_flags
+{
+  cBASISHeaderFlagETC1S = 1.
+  cBASISHeaderFlagYFlipped = 2,
+  cBASISHeaderFlagHasAlphaSlices = 4
+};
+
+
+4.0 File Structures
+-------------------
+
+All individual members in all file structures are byte aligned and little endian. The structs 
+have no padding (i.e. they are declared with #pragma pack(1)).
+
+4.1 "basis_file_header" structure
+---------------------------------
+
+The file header must always be at the beginning of the file.
+
+struct basis_file_header
+{
+  uint16      m_sig;              // 2 byte file signature
+  uint16      m_ver;              // File version
+  uint16      m_header_size;      // Header size in bytes, sizeof(basis_file_header) or 0x4D
+  uint16      m_header_crc16;     // CRC16/genibus of the remaining header data
+
+  uint32      m_data_size;        // The total size of all data after the header
+  uint16      m_data_crc16;       // The CRC16 of all data after the header
+
+  uint24      m_total_slices;     // The number of compressed slices 
+  uint24      m_total_images;     // The total # of images
+
+  byte        m_tex_format;       // enum basis_tex_format
+  uint16      m_flags;            // enum basis_header_flags
+  byte        m_tex_type;         // enum basis_texture_type
+  uint24      m_us_per_frame;     // Video: microseconds per frame
+
+  uint32      m_reserved;         // For future use
+  uint32      m_userdata0;        // For client use
+  uint32      m_userdata1;        // For client use
+
+  uint16      m_total_endpoints;          // ETC1S: The number of endpoints in the endpoint codebook 
+  uint32      m_endpoint_cb_file_ofs;     // ETC1S: The compressed endpoint codebook's file offset relative to the header
+  uint24      m_endpoint_cb_file_size;    // ETC1S: The compressed endpoint codebook's size in bytes
+
+  uint16      m_total_selectors;          // ETC1S: The number of selectors in the selector codebook 
+  uint32      m_selector_cb_file_ofs;     // ETC1S: The compressed selector codebook's file offset relative to the header
+  uint24      m_selector_cb_file_size;    // ETC1S: The compressed selector codebook's size in bytes
+
+  uint32      m_tables_file_ofs;          // ETC1S: The file offset of the compressed Huffman codelength tables.
+  uint32      m_tables_file_size;         // ETC1S: The file size in bytes of the compressed Huffman codelength tables.
+
+  uint32      m_slice_desc_file_ofs;      // The file offset to the slice description array, usually follows the header
+  uint32      m_extended_file_ofs;        // The file offset of the "extended" header and compressed data, for future use
+  uint32      m_extended_file_size;       // The file size in bytes of the "extended" header and compressed data, for future use
+};
+
+4.1.1 Details:
+
+* m_sig is always 'B' * 256 + 's', or 0x4273.
+* m_ver is currently always 0x10.
+* m_header_size is sizeof(basis_file_header). It's always 0x4D.
+* m_header_crc16 is the CRC-16 of the remaining header data. See the "CRC-16" section for more information.
+* m_data_size, m_data_crc16: The size of all data following the header, and its CRC-16.
+* m_total_slices: The total number of slices, from [1,2^24-1]
+* m_total_images: The total number of images (where one image can contain multiple mipmap levels, and each mipmap level is a different slice).
+* m_tex_format: basis_tex_format. Either cETC1S (0), or cUASTC4x4 (1).
+* m_flags: A combination of flags from the basis_header_flags enum.
+* m_tex_type: The texture type, from enum basis_texture_type
+* m_us_per_frame: Microseconds per frame, only valid for cBASISTexTypeVideoFrames texture types.
+* m_total_endpoints, m_endpoint_cb_file_ofs, m_endpoint_cb_file_size: Information about the compressed ETC1S endpoint codebook: The total # of entries, the offset to the compressed data, and the compressed data's size.
+* m_total_selectors, m_selector_cb_file_ofs, m_selector_cb_file_size: Information about the compressed ETC1S selector codebook: The total # of entries, the offset to the compressed data, and the compressed data's size.
+* m_tables_file_ofs, m_tables_file_size: The file offset and size of the compressed Huffman tables for ETC1S format files. 
+* m_slice_desc_file_ofs: The file offset to the array of slice description structures. There will be m_total_slices structures at this file offset.
+* m_extended_file_ofs, m_extended_file_size: The "extended" header, for future expansion. Currently unused.
+
+4.2 "basis_slice_desc" structure
+--------------------------------
+
+struct basis_slice_desc
+{
+    uint24 m_image_index;  
+    uint8 m_level_index;   
+    uint8 m_flags;         
+
+    uint16 m_orig_width;   
+    uint16 m_orig_height;  
+
+    uint16 m_num_blocks_x; 
+    uint16 m_num_blocks_y; 
+
+    uint32 m_file_ofs;     
+    uint32 m_file_size;    
+
+    uint16 m_slice_data_crc16; 
+};
+
+4.2.1 Details:
+
+* m_image_index: The index of the source image provided to the encoder (will always appear in order from first to last, first image index is 0, no skipping allowed)
+* m_level_index: The mipmap level index (mipmaps will always appear from largest to smallest)
+* m_flags: enum basis_slice_desc_flags
+* m_orig_width: The original image width (may not be a multiple of 4 pixels)
+* m_orig_height: The original image height (may not be a multiple of 4 pixels)
+* m_num_blocks_x: The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2.
+* m_num_blocks_y: The slice's block Y dimensions. 
+* m_file_ofs: Offset from the header to the start of the slice's data
+* m_file_size: The size of the compressed slice data in bytes
+* m_slice_data_crc16: The CRC16 of the compressed slice data, for extra-paranoid use cases
+
+5.0 CRC-16 Function
+-------------------
+
+.basis files use CRC-16/genibus(aka CRC-16 EPC, CRC-16 I-CODE, CRC-16 DARC) format CRC-16's. 
+
+Here's an example function in C++:
+
+uint16_t crc16(const void* r, size_t size, uint16_t crc)
+{
+  crc = ~crc;
+  const uint8_t* p = static_cast<const uint8_t*>(r);
+  for ( ; size; --size)
+  {
+    const uint16_t q = *p++ ^ (crc >> 8);
+    uint16_t k = (q >> 4) ^ q;
+    crc = (((crc << 8) ^ k) ^ (k << 5)) ^ (k << 12);
+  }
+
+  return static_cast<uint16_t>(~crc);
+}
+
+This function is called with 0 in the final "crc" parameter when computing CRC-16's of file data.
+
+6.0 Compressed Huffman Tables
+-----------------------------
+
+ETC1S format .basis files rely heavily on static [canonical Huffman
+prefix coding](https://en.wikipedia.org/wiki/Canonical_Huffman_code).  Multiple
+Huffman tables are used by each compressed section. Huffman codes are stored in
+each output byte in LSB to MSB order. (This is opposite of the JPEG format,
+which stores the codes in MSB to LSB order.)
+
+Huffman coding in .basis is compatible with the canonical Huffman methods used
+by Deflate encoders/decoders. Section 3.2.2 of [Deflate - RFC
+1951](https://tools.ietf.org/html/rfc1951), which describes how to compute the
+value of each Huffman code given an array of symbol codelengths. This document
+assumes familiarity with how Huffman coding works in Deflate.
+
+First, some enums:
+
+enum
+{
+    // Max supported Huffman code size is 16-bits
+    cHuffmanMaxSupportedCodeSize = 16, 
+
+    // The maximum number of symbols  is 2^14
+    cHuffmanMaxSymsLog2 = 14, 
+    cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2,
+
+    // Small zero runs may range from 3-10 entries
+    cHuffmanSmallZeroRunSizeMin = 3, 
+    cHuffmanSmallZeroRunSizeMax = 10, 
+    cHuffmanSmallZeroRunExtraBits = 3,
+
+    // Big zero runs may range from 11-138 entries
+    cHuffmanBigZeroRunSizeMin = 11, 
+    cHuffmanBigZeroRunSizeMax = 138, 
+    cHuffmanBigZeroRunExtraBits = 7,
+
+    // Small non-zero runs may range from 3-6 entries
+    cHuffmanSmallRepeatSizeMin = 3, 
+    cHuffmanSmallRepeatSizeMax = 6, 
+    cHuffmanSmallRepeatExtraBits = 2,
+
+    // Big non-zero run may range from 7-134 entries
+    cHuffmanBigRepeatSizeMin = 7, 
+    cHuffmanBigRepeatSizeMax = 134, 
+    cHuffmanBigRepeatExtraBits = 7,
+
+    // There are a maximum of 21 symbols in a compressed Huffman code length table.
+    cHuffmanTotalCodelengthCodes = 21, 
+    
+    // Symbols [0,16] indicate code sizes. Other symbols indicate zero runs or repeats:
+    cHuffmanSmallZeroRunCode = 17, 
+    cHuffmanBigZeroRunCode = 18, 
+    cHuffmanSmallRepeatCode = 19, 
+    cHuffmanBigRepeatCode = 20
+};
+
+A .basis Huffman table consists of 1 to cHuffmanMaxSyms symbols. Each compressed
+Huffman table is described by an array of symbol code lengths in bits.
+
+The table's symbol code lengths are themselves RLE+Huffman coded, just like
+Deflate. (Note this can be confusing to developers unfamiliar with Deflate.)
+Each table begins with a small fixed header:
+
+    14 bits: total_used_syms [1, cHuffmanMaxSyms]
+    5 bits: num_codelength_codes [1, cHuffmanTotalCodelengthCodes]
+    
+Next, the code lengths for the small Huffman table which is used to send the compressed codelengths (and RLE/repeat codes) are sent uncompressed but in a reordered manner:
+    
+    3*num_codelength_codes bits: Code size of each Huffman symbol for the compressed Huffman codelength table.
+    
+    These code lengths are sent in this order (to help reduce the number that must be sent):
+    
+    { 
+        cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode, cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, 
+        0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 
+    };
+            
+A canonical Huffman decoding table (of up to 21 symbols) should be built from
+these code lengths. Immediately following this data are the Huffman symbols
+(sometimes intermixed with raw bits) which describe how to unpack the
+codelengths of each symbol in the Huffman table:
+
+    - Symbols [0,16] indicate a specific symbol code length in bits.
+    
+    - Symbol cHuffmanSmallZeroRunCode (17) indicates a short run of symbols with 0 bit code lengths.
+      cHuffmanSmallZeroRunExtraBits (3) bits are sent after this symbol, which indicates the run's size after adding the minimum size (cHuffmanSmallZeroRunSizeMin).
+      
+    - Symbol cHuffmanBigZeroRunCode (18) indicates a long run of symbols with 0 bit code lengths. 
+      cHuffmanBigZeroRunExtraBits (7) bits are sent after this symbol, which indicates the run's size after adding the minimum size (cHuffmanBigZeroRunSizeMin)
+
+    - Symbol cHuffmanSmallRepeatCode (19) indicates a short run of symbols that repeat the previous symbol's code length.
+      cHuffmanSmallRepeatExtraBits (2) bits are sent after this symbol, which indicates the number of times to repeat the previous symbol's code length, 
+      after adding the minimum size (cHuffmanSmallRepeatSizeMin).
+      Cannot be the first symbol, and the previous symbol cannot have a code length of 0.
+      
+    - Symbol cHuffmanBigRepeatCode (20) indicates a short run of symbols that repeat the previous symbol's code length.
+      cHuffmanBigRepeatExtraBits (7) bits are sent after this symbol, which indicates the number of times to repeat the previous symbol's code length,
+      after adding the minimum size (cHuffmanBigRepeatSizeMin).
+      Cannot be the first symbol, and the previous symbol cannot have a code length of 0.
+      
+There should be exactly total_used_syms code lengths stored in the compressed Huffman table. If not the stream is either corrupted or invalid.
+
+After all the symbol codelengths are uncompressed, the symbol codes can be computed and the canonical Huffman decoding tables can be built.
+
+7.0 ETC1S Endpoint Codebooks
+----------------------------
+
+The endpoint codebook section starts at file offset
+basis_file_header::m_endpoint_cb_file_ofs and is m_endpoint_cb_file_size bytes
+long. The endpoint codebook will have basis_file_header::m_total_endpoints total
+entries.
+
+At the beginning of the compressed endpoint codebook section are four compressed
+Huffman tables, stored using the procedure outlined in section 6.0. The Huffman tables
+appear in this order:
+
+    1. color5_delta_model0
+    2. color5_delta_model1
+    3. color5_delta_model2
+    4. inten_delta_model
+
+Following the data for these Huffman tables is a single 1-bit code which
+indicates if the color endpoint codebook is grayscale or not. 
+
+Immediately following this code is the compressed color endpoint codebook data. 
+A simple form of DPCM coding is used to send the ETC1S intensity table indices and
+color values. Here is the procedure to decode the endpoint codebook:
+
+    const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31;
+    const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21;
+    const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9;
+
+    // Assume previous endpoint color is (16, 16, 16), and the previous intensity is 0.
+    color32 prev_color5(16, 16, 16, 0);
+    uint32_t prev_inten = 0;
+
+    // For each endpoint codebook entry
+    for (uint32_t i = 0; i < num_endpoints; i++)
+    {
+        // Decode the intensity delta Huffman code
+        uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model);
+        m_endpoints[i].m_inten5 = static_cast<uint8_t>((inten_delta + prev_inten) & 7);
+        prev_inten = m_endpoints[i].m_inten5;
+
+        // Now decode the endpoint entry's color or intensity value
+        for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++)
+        {
+            // The Huffman table we used to decode the delta depends on the previous color's value
+            int delta;
+            if (prev_color5[c] <= basist::COLOR5_PAL0_PREV_HI)
+                delta = sym_codec.decode_huffman(color5_delta_model0);
+            else if (prev_color5[c] <= basist::COLOR5_PAL1_PREV_HI)
+                delta = sym_codec.decode_huffman(color5_delta_model1);
+            else
+                delta = sym_codec.decode_huffman(color5_delta_model2);
+
+            // Apply the delta
+            int v = (prev_color5[c] + delta) & 31;
+
+            m_endpoints[i].m_color5[c] = static_cast<uint8_t>(v);
+
+            prev_color5[c] = static_cast<uint8_t>(v);
+        }
+
+        // If the endpoints are grayscale, set G and B to match R.
+        if (endpoints_are_grayscale)
+        {
+            m_endpoints[i].m_color5[1] = m_endpoints[i].m_color5[0];
+            m_endpoints[i].m_color5[2] = m_endpoints[i].m_color5[0];
+        }
+    }
+
+The rest of the section's data (if any) can be ignored.
+
+8.0 ETC1S Selector Codebooks
+----------------------------
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/transcoder/basisu.h b/transcoder/basisu.h
index c4d5bfc..6e6f46d 100644
--- a/transcoder/basisu.h
+++ b/transcoder/basisu.h
@@ -88,7 +88,7 @@
 #define BASISU_ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
 #define BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(x) x(const x &) = delete; x& operator= (const x &) = delete;
 #define BASISU_ASSUME(x) static_assert(x, #x);
-#define BASISU_OFFSETOF(s, m) (uint32_t)(intptr_t)(&((s *)(0))->m)
+#define BASISU_OFFSETOF(s, m) offsetof(s, m)
 #define BASISU_STRINGIZE(x) #x
 #define BASISU_STRINGIZE2(x) BASISU_STRINGIZE(x)
 
@@ -293,7 +293,7 @@
 	enum
 	{
 		cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, 
-		cHuffmanFastLookupBits = 10, cHuffmanFastLookupSize = 1 << cHuffmanFastLookupBits,
+		cHuffmanFastLookupBits = 10, 
 		cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2,
 
 		// Small zero runs
@@ -341,6 +341,8 @@
 		cETC2_R11_EAC,
 		cETC2_RG11_EAC,
 		cUASTC4x4,		
+		cBC1_NV,
+		cBC1_AMD,
 		
 		// Uncompressed/raw pixels
 		cRGBA32,
@@ -359,6 +361,8 @@
 		case texture_format::cETC2_RGB:
 		case texture_format::cETC2_ALPHA:
 		case texture_format::cBC1:
+		case texture_format::cBC1_NV:
+		case texture_format::cBC1_AMD:
 		case texture_format::cBC4:
 		case texture_format::cPVRTC1_4_RGB:
 		case texture_format::cPVRTC1_4_RGBA:
diff --git a/transcoder/basisu_transcoder.cpp b/transcoder/basisu_transcoder.cpp
index b6d29b8..056767f 100644
--- a/transcoder/basisu_transcoder.cpp
+++ b/transcoder/basisu_transcoder.cpp
@@ -17,17 +17,22 @@
 #include <limits.h>
 #include <vector>
 
-#ifndef IS_BIG_ENDIAN
+#ifndef BASISD_IS_BIG_ENDIAN
 // TODO: This doesn't work on OSX. How can this be so difficult?
 //#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
-//	#define IS_BIG_ENDIAN (1)
+//	#define BASISD_IS_BIG_ENDIAN (1)
 //#else
-	#define IS_BIG_ENDIAN (0)
+	#define BASISD_IS_BIG_ENDIAN (0)
 //#endif
 #endif
 
-#ifndef USE_UNALIGNED_WORD_READS
-#define USE_UNALIGNED_WORD_READS (1)
+#ifndef BASISD_USE_UNALIGNED_WORD_READS
+	#ifdef __EMSCRIPTEN__
+		// Can't use unaligned loads/stores with WebAssembly.
+		#define BASISD_USE_UNALIGNED_WORD_READS (0)
+	#else
+		#define BASISD_USE_UNALIGNED_WORD_READS (1)
+	#endif
 #endif
 
 #define BASISD_SUPPORTED_BASIS_VERSION (0x13)
@@ -190,7 +195,7 @@
 	{
 		crc = ~crc;
 
-		const uint8_t* p = reinterpret_cast<const uint8_t*>(r);
+		const uint8_t* p = static_cast<const uint8_t*>(r);
 		for (; size; --size)
 		{
 			const uint16_t q = *p++ ^ (crc >> 8);
@@ -8510,7 +8515,7 @@
 						for (uint32_t i = 0; i < 4; i++)
 						{
 							packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31));
-							if (IS_BIG_ENDIAN)
+							if (BASISD_IS_BIG_ENDIAN)
 								packed_colors[i] = byteswap_uint16(packed_colors[i]);
 						}
 					}
@@ -8519,7 +8524,7 @@
 						for (uint32_t i = 0; i < 4; i++)
 						{
 							packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31));
-							if (IS_BIG_ENDIAN)
+							if (BASISD_IS_BIG_ENDIAN)
 								packed_colors[i] = byteswap_uint16(packed_colors[i]);
 						}
 					}
@@ -8560,12 +8565,12 @@
 						for (uint32_t x = 0; x < max_x; x++)
 						{
 							uint16_t cur = reinterpret_cast<uint16_t*>(pDst_pixels)[x];
-							if (IS_BIG_ENDIAN)
+							if (BASISD_IS_BIG_ENDIAN)
 								cur = byteswap_uint16(cur);
 
 							cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3];
 							
-							if (IS_BIG_ENDIAN)
+							if (BASISD_IS_BIG_ENDIAN)
 								cur = byteswap_uint16(cur);
 
 							reinterpret_cast<uint16_t*>(pDst_pixels)[x] = cur;
@@ -8591,7 +8596,7 @@
 					for (uint32_t i = 0; i < 4; i++)
 					{
 						packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF);
-						if (IS_BIG_ENDIAN)
+						if (BASISD_IS_BIG_ENDIAN)
 							packed_colors[i] = byteswap_uint16(packed_colors[i]);
 					}
 
@@ -8622,7 +8627,7 @@
 					for (uint32_t i = 0; i < 4; i++)
 					{
 						packed_colors[i] = mul_8(colors[i].g, 15);
-						if (IS_BIG_ENDIAN)
+						if (BASISD_IS_BIG_ENDIAN)
 							packed_colors[i] = byteswap_uint16(packed_colors[i]);
 					}
 
@@ -11795,7 +11800,7 @@
 		if (!codesize)
 			return 0;
 
-		if ((IS_BIG_ENDIAN) || (!USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
+		if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112))
 		{
 			const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
 
@@ -11849,7 +11854,7 @@
 			return 0;
 		assert(bit_offset < 112);
 
-		if ((IS_BIG_ENDIAN) || (!USE_UNALIGNED_WORD_READS))
+		if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
 		{
 			const uint8_t* pBytes = &pBuf[bit_offset >> 3U];
 
@@ -12179,7 +12184,7 @@
 			uint64_t bits;
 			
 			// Read the weight bits
-			if ((IS_BIG_ENDIAN) || (!USE_UNALIGNED_WORD_READS))
+			if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS))
 				bits = read_bits64(blk.m_bytes, bit_ofs, std::min<int>(64, 128 - (int)bit_ofs));
 			else
 			{
diff --git a/transcoder/basisu_transcoder_internal.h b/transcoder/basisu_transcoder_internal.h
index dc234bd..80e43e6 100644
--- a/transcoder/basisu_transcoder_internal.h
+++ b/transcoder/basisu_transcoder_internal.h
@@ -122,7 +122,7 @@
 			basisu::clear_vector(m_tree);
 		}
 
-		bool init(uint32_t total_syms, const uint8_t *pCode_sizes)
+		bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits)
 		{
 			if (!total_syms)
 			{
@@ -133,8 +133,10 @@
 			m_code_sizes.resize(total_syms);
 			memcpy(&m_code_sizes[0], pCode_sizes, total_syms);
 
+			const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
+
 			m_lookup.resize(0);
-			m_lookup.resize(basisu::cHuffmanFastLookupSize);
+			m_lookup.resize(huffman_fast_lookup_size);
 
 			m_tree.resize(0);
 			m_tree.resize(total_syms * 2);
@@ -172,10 +174,10 @@
 				for (l = code_size; l > 0; l--, cur_code >>= 1)
 					rev_code = (rev_code << 1) | (cur_code & 1);
 
-				if (code_size <= basisu::cHuffmanFastLookupBits)
+				if (code_size <= fast_lookup_bits)
 				{
 					uint32_t k = (code_size << 16) | sym_index;
-					while (rev_code < basisu::cHuffmanFastLookupSize)
+					while (rev_code < huffman_fast_lookup_size)
 					{
 						if (m_lookup[rev_code] != 0)
 						{
@@ -190,9 +192,9 @@
 				}
 
 				int tree_cur;
-				if (0 == (tree_cur = m_lookup[rev_code & (basisu::cHuffmanFastLookupSize - 1)]))
+				if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)]))
 				{
-					const uint32_t idx = rev_code & (basisu::cHuffmanFastLookupSize - 1);
+					const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1);
 					if (m_lookup[idx] != 0)
 					{
 						// Supplied codesizes can't create a valid prefix code.
@@ -210,9 +212,9 @@
 					return false;
 				}
 
-				rev_code >>= (basisu::cHuffmanFastLookupBits - 1);
+				rev_code >>= (fast_lookup_bits - 1);
 
-				for (int j = code_size; j > (basisu::cHuffmanFastLookupBits + 1); j--)
+				for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--)
 				{
 					tree_cur -= ((rev_code >>= 1) & 1);
 
@@ -260,6 +262,8 @@
 		}
 
 		const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; }
+		const basisu::int_vec get_lookup() const { return m_lookup; }
+		const basisu::int16_vec get_tree() const { return m_tree; }
 
 		bool is_valid() const { return m_code_sizes.size() > 0; }
 
@@ -436,9 +440,11 @@
 			return v;
 		}
 
-		inline uint32_t decode_huffman(const huffman_decoding_table &ct)
+		inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits)
 		{
 			assert(ct.m_code_sizes.size());
+
+			const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits;
 						
 			while (m_bit_buf_size < 16)
 			{
@@ -454,14 +460,14 @@
 			int code_len;
 
 			int sym;
-			if ((sym = ct.m_lookup[m_bit_buf & (basisu::cHuffmanFastLookupSize - 1)]) >= 0)
+			if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0)
 			{
 				code_len = sym >> 16;
 				sym &= 0xFFFF;
 			}
 			else
 			{
-				code_len = basisu::cHuffmanFastLookupBits;
+				code_len = fast_lookup_bits;
 				do
 				{
 					sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1
diff --git a/webgl/transcoder/build/basis_loader.js b/webgl/transcoder/build/basis_loader.js
index f5cd071..8cd4a0e 100644
--- a/webgl/transcoder/build/basis_loader.js
+++ b/webgl/transcoder/build/basis_loader.js
@@ -491,4 +491,4 @@
       fail(id, `No url or buffer specified`);

     }

   };

-}
\ No newline at end of file
+}