UASTC HDR merge
diff --git a/transcoder/basisu.h b/transcoder/basisu.h
index 1230b59..939ee79 100644
--- a/transcoder/basisu.h
+++ b/transcoder/basisu.h
@@ -1,5 +1,5 @@
 // basisu.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
 // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -117,13 +117,26 @@
 	typedef basisu::vector<uint64_t> uint64_vec;
 	typedef basisu::vector<int> int_vec;
 	typedef basisu::vector<bool> bool_vec;
+	typedef basisu::vector<float> float_vec;
 
 	void enable_debug_printf(bool enabled);
 	void debug_printf(const char *pFmt, ...);
-		
 
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
+#endif                  
+#endif
+		
 	template <typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); }
 
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif                            
+#endif
+
 	template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; }
 
 	template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
@@ -162,10 +175,45 @@
 	template<typename T> inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; }
 	template<typename T> inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; }
 
+	// Open interval
+	inline bool in_bounds(int v, int l, int h)
+	{
+		return (v >= l) && (v < h);
+	}
+
+	// Closed interval
+	inline bool in_range(int v, int l, int h)
+	{
+		return (v >= l) && (v <= h);
+	}
+
 	inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; }
 
 	template<typename T> inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); }
 
+	inline uint32_t get_bit(uint32_t src, int ndx)
+	{
+		assert(in_bounds(ndx, 0, 32));
+		return (src >> ndx) & 1;
+	}
+
+	inline bool is_bit_set(uint32_t src, int ndx)
+	{
+		return get_bit(src, ndx) != 0;
+	}
+
+	inline uint32_t get_bits(uint32_t val, int low, int high)
+	{
+		const int num_bits = (high - low) + 1;
+		assert(in_range(num_bits, 1, 32));
+
+		val >>= low;
+		if (num_bits != 32)
+			val &= ((1u << num_bits) - 1);
+
+		return val;
+	}
+
 	template<typename T, typename R> inline void append_vector(T &vec, const R *pObjs, size_t n) 
 	{ 
 		if (n)
@@ -267,6 +315,11 @@
 		return true;
 	}
 
+	static inline uint32_t read_le_word(const uint8_t* pBytes)
+	{
+		return (pBytes[1] << 8U) | (pBytes[0]);
+	}
+
 	static inline uint32_t read_le_dword(const uint8_t *pBytes)
 	{
 		return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]);
@@ -303,6 +356,10 @@
 			return *this;
 		}
 
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"            
+#endif  
 		inline operator uint32_t() const
 		{
 			switch (NumBytes)
@@ -354,6 +411,9 @@
 				}
 			}
 		}
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
 	};
 
 	enum eZero { cZero };
@@ -402,8 +462,11 @@
 		cBC3,				// DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block)
 		cBC4,				// DXT5A
 		cBC5,				// 3DC/DXN (two BC4/DXT5A blocks)
+		cBC6HSigned,		// HDR
+		cBC6HUnsigned,		// HDR
 		cBC7,
-		cASTC4x4,		// LDR only
+		cASTC_LDR_4x4,		// ASTC 4x4 LDR only
+		cASTC_HDR_4x4,		// ASTC 4x4 HDR only (but may use LDR ASTC blocks internally)
 		cPVRTC1_4_RGB,
 		cPVRTC1_4_RGBA,
 		cATC_RGB,
@@ -413,17 +476,22 @@
 		cETC2_R11_EAC,
 		cETC2_RG11_EAC,
 		cUASTC4x4,		
+		cUASTC_HDR_4x4,
 		cBC1_NV,
 		cBC1_AMD,
-		
+				
 		// Uncompressed/raw pixels
 		cRGBA32,
 		cRGB565,
 		cBGR565,
 		cRGBA4444,
-		cABGR4444
+		cABGR4444,
+		cRGBA_HALF,
+		cRGB_HALF,
+		cRGB_9E5
 	};
 
+	// This is bytes per block for GPU formats, or bytes per texel for uncompressed formats.
 	inline uint32_t get_bytes_per_block(texture_format fmt)
 	{
 		switch (fmt)
@@ -443,13 +511,27 @@
 		case texture_format::cETC2_R11_EAC:
 			return 8;
 		case texture_format::cRGBA32:
-			return sizeof(uint32_t) * 16;
+		case texture_format::cRGB_9E5:
+			return sizeof(uint32_t);
+		case texture_format::cRGB_HALF:
+			return sizeof(uint16_t) * 3;
+		case texture_format::cRGBA_HALF:
+			return sizeof(uint16_t) * 4;
+		case texture_format::cRGB565:
+		case texture_format::cBGR565:
+		case texture_format::cRGBA4444:
+		case texture_format::cABGR4444:
+			return sizeof(uint16_t);
+
 		default:
 			break;
 		}
+		
+		// Everything else is 16 bytes/block.
 		return 16;
 	}
 
+	// This is qwords per block for GPU formats, or not valid for uncompressed formats.
 	inline uint32_t get_qwords_per_block(texture_format fmt)
 	{
 		return get_bytes_per_block(fmt) >> 3;
@@ -473,6 +555,17 @@
 		BASISU_NOTE_UNUSED(fmt);
 		return 4;
 	}
+
+	inline bool is_hdr_texture_format(texture_format fmt)
+	{
+		if (fmt == texture_format::cASTC_HDR_4x4)
+			return true;
+		if (fmt == texture_format::cUASTC_HDR_4x4)
+			return true;
+		if ((fmt == texture_format::cBC6HSigned) || (fmt == texture_format::cBC6HUnsigned))
+			return true;
+		return false;
+	}
 							
 } // namespace basisu
 
diff --git a/transcoder/basisu_containers.h b/transcoder/basisu_containers.h
index 67416ee..bfc51bb 100644
--- a/transcoder/basisu_containers.h
+++ b/transcoder/basisu_containers.h
@@ -189,7 +189,8 @@
 #define BASISU_IS_SCALAR_TYPE(T) (scalar_type<T>::cFlag)
 
 #if !defined(BASISU_HAVE_STD_TRIVIALLY_COPYABLE) && defined(__GNUC__) && __GNUC__<5
-   #define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+   //#define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+    #define BASISU_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__)
 #else
    #define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
 #endif
@@ -286,8 +287,19 @@
 
          if (BASISU_IS_BITWISE_COPYABLE(T))
          {
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
+#endif                  
+#endif
              if ((m_p) && (other.m_p))
                 memcpy(m_p, other.m_p, m_size * sizeof(T));
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif                
+#endif
          }
          else
          {
@@ -330,8 +342,19 @@
 
          if (BASISU_IS_BITWISE_COPYABLE(T))
          {
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
+#endif         
+#endif
              if ((m_p) && (other.m_p))
                 memcpy(m_p, other.m_p, other.m_size * sizeof(T));
+#ifndef __EMSCRIPTEN__          
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif                            
+#endif
          }
          else
          {
@@ -501,7 +524,7 @@
 
          if (new_capacity > m_capacity)
          {
-            if (!increase_capacity(new_capacity, false))
+            if (!increase_capacity(new_capacity, false, true))
                return false;
          }
          else if (new_capacity < m_capacity)
@@ -509,7 +532,8 @@
             // Must work around the lack of a "decrease_capacity()" method.
             // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize.
             vector tmp;
-            tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false);
+            if (!tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false, true))
+                return false;
             tmp = *this;
             swap(tmp);
          }
@@ -750,7 +774,21 @@
             }
 
             // Copy "down" the objects to preserve, filling in the empty slots.
+
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
+#endif
+#endif
+
             memmove(pDst, pSrc, num_to_move * sizeof(T));
+
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif            
+#endif
          }
          else
          {
@@ -1003,7 +1041,21 @@
       inline void set_all(const T& o)
       {
          if ((sizeof(T) == 1) && (scalar_type<T>::cFlag))
+         {
+#ifndef __EMSCRIPTEN__
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wclass-memaccess"            
+#endif              
+#endif
             memset(m_p, *reinterpret_cast<const uint8_t*>(&o), m_size);
+
+#ifndef __EMSCRIPTEN__            
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif                        
+#endif
+         }
          else
          {
             T* pDst = m_p;
@@ -1029,7 +1081,7 @@
       // Important: This method is used in Basis Universal. If you change how this container allocates memory, you'll need to change any users of this method.
       inline bool grant_ownership(T* p, uint32_t size, uint32_t capacity)
       {
-         // To to prevent the caller from obviously shooting themselves in the foot.
+         // To prevent the caller from obviously shooting themselves in the foot.
          if (((p + capacity) > m_p) && (p < (m_p + m_capacity)))
          {
             // Can grant ownership of a block inside the container itself!
diff --git a/transcoder/basisu_containers_impl.h b/transcoder/basisu_containers_impl.h
index d5cb615..60c0b3d 100644
--- a/transcoder/basisu_containers_impl.h
+++ b/transcoder/basisu_containers_impl.h
@@ -19,23 +19,30 @@
       if (m_capacity >= min_new_capacity)
          return true;
 
-      size_t new_capacity = min_new_capacity;
-      if ((grow_hint) && (!helpers::is_power_of_2((uint64_t)new_capacity)))
+      uint64_t new_capacity_u64 = min_new_capacity;
+      if ((grow_hint) && (!helpers::is_power_of_2(new_capacity_u64)))
+          new_capacity_u64 = helpers::next_pow2(new_capacity_u64);
+
+      size_t new_capacity = (size_t)new_capacity_u64;
+      if (new_capacity != new_capacity_u64)
       {
-         new_capacity = (size_t)helpers::next_pow2((uint64_t)new_capacity);
-
-         assert(new_capacity && (new_capacity > m_capacity));
-
-         if (new_capacity < min_new_capacity)
-         {
-            if (nofail)
-               return false;
-            fprintf(stderr, "vector too large\n");
-            abort();
-         }
+          if (nofail)
+              return false;
+          fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
+          abort();
       }
             
-      const size_t desired_size = element_size * new_capacity;
+      const uint64_t desired_size_u64 = (uint64_t)element_size * new_capacity;
+            
+      const size_t desired_size = (size_t)desired_size_u64;
+      if (desired_size_u64 != desired_size)
+      {
+          if (nofail)
+              return false;
+          fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
+          abort();
+      }
+
       size_t actual_size = 0;
       if (!pMover)
       {
@@ -46,11 +53,7 @@
                return false;
 
             char buf[256];
-#ifdef _MSC_VER
-            sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
-#else
-            sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size);
-#endif
+            snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size);
             fprintf(stderr, "%s", buf);
             abort();
          }
@@ -75,11 +78,7 @@
                return false;
 
             char buf[256];
-#ifdef _MSC_VER
-            sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
-#else
-            sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size);
-#endif
+            snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size);
             fprintf(stderr, "%s", buf);
             abort();
          }
diff --git a/transcoder/basisu_file_headers.h b/transcoder/basisu_file_headers.h
index 4316d73..d29e3fe 100644
--- a/transcoder/basisu_file_headers.h
+++ b/transcoder/basisu_file_headers.h
@@ -1,5 +1,5 @@
 // basis_file_headers.h
-// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -89,7 +89,8 @@
 	enum class basis_tex_format
 	{
 		cETC1S = 0,
-		cUASTC4x4 = 1
+		cUASTC4x4 = 1,
+		cUASTC_HDR_4x4 = 2
 	};
 
 	struct basis_file_header
diff --git a/transcoder/basisu_transcoder.cpp b/transcoder/basisu_transcoder.cpp
index 3aeba0e..ea994b0 100644
--- a/transcoder/basisu_transcoder.cpp
+++ b/transcoder/basisu_transcoder.cpp
@@ -1,5 +1,5 @@
 // basisu_transcoder.cpp
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,6 +17,11 @@
 #include <limits.h>
 #include "basisu_containers_impl.h"
 
+#define BASISU_ASTC_HELPERS_IMPLEMENTATION
+#include "basisu_astc_helpers.h"
+
+#include "basisu_astc_hdr_core.h"
+
 #ifndef BASISD_IS_BIG_ENDIAN
 // TODO: This doesn't work on OSX. How can this be so difficult?
 //#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN)
@@ -139,6 +144,10 @@
 	#endif
 #endif
 
+#ifndef BASISD_SUPPORT_UASTC_HDR
+	#define BASISD_SUPPORT_UASTC_HDR 1
+#endif
+
 #define BASISD_WRITE_NEW_BC7_MODE5_TABLES			0
 #define BASISD_WRITE_NEW_DXT1_TABLES				0
 #define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES		0
@@ -1908,17 +1917,24 @@
 	void basisu_transcoder_init()
 	{
 		if (g_transcoder_initialized)
-      {
-         BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");      
+		{
+			BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n");      
 			return;
-      }
+		}
          
-     BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");      
+		BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n");      
 
 #if BASISD_SUPPORT_UASTC
 		uastc_init();
 #endif
 
+#if BASISD_SUPPORT_UASTC_HDR
+		// TODO: Examine this, optimize for startup time/mem utilization.
+		astc_helpers::init_tables(false);
+
+		astc_hdr_core_init();
+#endif
+
 #if BASISD_SUPPORT_ASTC
 		transcoder_init_astc();
 #endif
@@ -2027,6 +2043,10 @@
 		transcoder_init_pvrtc2();
 #endif
 
+#if BASISD_SUPPORT_UASTC_HDR
+		bc6h_enc_init();
+#endif
+
 		g_transcoder_initialized = true;
 	}
 
@@ -6928,7 +6948,7 @@
 
 	static inline int sq(int x) { return x * x; }
 						
-	// PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. 
+	// PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is expanded from 4 to 8 bits means it can never be 0. 
 	// This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! 
 	// And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it.
 	static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook)
@@ -7515,6 +7535,8 @@
 	}
 #endif // BASISD_SUPPORT_PVRTC2
 
+	//------------------------------------------------------------------------------------------------
+
 	basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder() :
 		m_pGlobal_codebook(nullptr),
 		m_selector_history_buf_size(0)
@@ -8620,7 +8642,7 @@
 			// Now make sure the output buffer is large enough, or we'll overwrite memory.
 			if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels))
 			{
-				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
+				BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n");
 				return false;
 			}
 		}
@@ -8632,7 +8654,7 @@
 
 			if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1)
 			{
-				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
+				BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n");
 				return false;
 			}
 		}
@@ -8640,7 +8662,7 @@
 		{
 			if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks)
 			{
-				BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
+				BASISU_DEVEL_ERROR("basis_validate_output_buffer_size: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n");
 				return false;
 			}
 		}
@@ -9242,13 +9264,17 @@
 
 		return status;
 	}
+
+	//------------------------------------------------------------------------------------------------
 	
 	basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder()
 	{
 	}
 
-	bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
-        uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+	bool basisu_lowlevel_uastc_transcoder::transcode_slice(
+		void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+        uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, 
+		const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
 		basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
 	{
 		BASISU_NOTE_UNUSED(pState);
@@ -9784,6 +9810,317 @@
 
 		return status;
 	}
+
+	//------------------------------------------------------------------------------------------------
+
+	basisu_lowlevel_uastc_hdr_transcoder::basisu_lowlevel_uastc_hdr_transcoder()
+	{
+	}
+
+	bool basisu_lowlevel_uastc_hdr_transcoder::transcode_slice(
+		void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+		uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, 
+		const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels,
+		basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags)
+	{
+		BASISU_NOTE_UNUSED(pState);
+		BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks);
+		BASISU_NOTE_UNUSED(has_alpha);
+		BASISU_NOTE_UNUSED(channel0);
+		BASISU_NOTE_UNUSED(channel1);
+		BASISU_NOTE_UNUSED(decode_flags);
+
+		assert(g_transcoder_initialized);
+		if (!g_transcoder_initialized)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder not globally initialized.\n");
+			return false;
+		}
+
+#if BASISD_SUPPORT_UASTC_HDR
+		const uint32_t total_blocks = num_blocks_x * num_blocks_y;
+
+		if (!output_row_pitch_in_blocks_or_pixels)
+		{
+			if (basis_block_format_is_uncompressed(fmt))
+				output_row_pitch_in_blocks_or_pixels = orig_width;
+			else
+				output_row_pitch_in_blocks_or_pixels = num_blocks_x;
+		}
+
+		if (basis_block_format_is_uncompressed(fmt))
+		{
+			if (!output_rows_in_pixels)
+				output_rows_in_pixels = orig_height;
+		}
+
+		uint32_t total_expected_block_bytes = sizeof(astc_blk) * total_blocks;
+		if (image_data_size < total_expected_block_bytes)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n");
+			return false;
+		}
+
+		const astc_blk* pSource_block = reinterpret_cast<const astc_blk*>(pImage_data);
+
+		bool status = false;
+
+		// TODO: Optimize pure memcpy() case.
+			
+		for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y)
+		{
+			void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes;
+
+			for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t*)pDst_block + output_block_or_pixel_stride_in_bytes)
+			{
+				switch (fmt)
+				{
+				case block_format::cUASTC_HDR_4x4:
+				case block_format::cASTC_HDR_4x4:
+				{
+					// Nothing to do, UASTC HDR is just ASTC.
+					memcpy(pDst_block, pSource_block, sizeof(uastc_block));
+					status = true;
+					break;
+				}
+				case block_format::cBC6H:
+				{
+					status = astc_hdr_transcode_to_bc6h(*pSource_block, *(bc6h_block *)pDst_block);
+					break;
+				}
+				case block_format::cRGB_9E5:
+				{
+					astc_helpers::log_astc_block log_blk;
+					status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+					if (status)
+					{
+						uint32_t* pDst_pixels = reinterpret_cast<uint32_t*>(
+							static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t)
+							);
+
+						uint32_t blk_texels[4][4];
+
+						status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeRGB9E5);
+						
+						if (status)
+						{
+							const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+							const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+							for (uint32_t y = 0; y < max_y; y++)
+							{
+								memcpy(pDst_pixels, &blk_texels[y][0], sizeof(uint32_t) * max_x);
+
+								pDst_pixels += output_row_pitch_in_blocks_or_pixels;
+							} // y
+						}
+					}
+					
+					break;
+				}
+				case block_format::cRGBA_HALF:
+				{
+					astc_helpers::log_astc_block log_blk;
+					status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+					if (status)
+					{
+						half_float* pDst_pixels = reinterpret_cast<half_float*>(
+							static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 4
+							);
+												
+						half_float blk_texels[4][4][4];
+						status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
+
+						if (status)
+						{
+							const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+							const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+							for (uint32_t y = 0; y < max_y; y++)
+							{
+								for (uint32_t x = 0; x < max_x; x++)
+								{
+									pDst_pixels[0 + 4 * x] = blk_texels[y][x][0];
+									pDst_pixels[1 + 4 * x] = blk_texels[y][x][1];
+									pDst_pixels[2 + 4 * x] = blk_texels[y][x][2];
+									pDst_pixels[3 + 4 * x] = blk_texels[y][x][3];
+								} // x
+
+								pDst_pixels += output_row_pitch_in_blocks_or_pixels * 4;
+							} // y
+						}
+					}
+
+					break;
+				}
+				case block_format::cRGB_HALF:
+				{
+					astc_helpers:: log_astc_block log_blk;
+					status = astc_helpers::unpack_block(pSource_block, log_blk, 4, 4);
+					if (status)
+					{
+						half_float* pDst_pixels =
+							reinterpret_cast<half_float*>(static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(half_float) * 3);
+
+						half_float blk_texels[4][4][4];
+						status = astc_helpers::decode_block(log_blk, blk_texels, 4, 4, astc_helpers::cDecodeModeHDR16);
+						if (status)
+						{
+							const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4);
+							const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4);
+
+							for (uint32_t y = 0; y < max_y; y++)
+							{
+								for (uint32_t x = 0; x < max_x; x++)
+								{
+									pDst_pixels[0 + 3 * x] = blk_texels[y][x][0];
+									pDst_pixels[1 + 3 * x] = blk_texels[y][x][1];
+									pDst_pixels[2 + 3 * x] = blk_texels[y][x][2];
+								} // x
+
+								pDst_pixels += output_row_pitch_in_blocks_or_pixels * 3;
+							} // y
+						}
+					}
+
+					break;
+				}
+				default:
+					assert(0);
+					break;
+
+				}
+
+				if (!status)
+				{
+					BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: Transcoder failed to unpack a UASTC HDR block - this is a bug, or the data was corrupted\n");					return false;
+				}
+
+			} // block_x
+
+		} // block_y
+
+		return true;
+#else
+		BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_slice: UASTC_HDR is unsupported\n");
+
+		BASISU_NOTE_UNUSED(decode_flags);
+		BASISU_NOTE_UNUSED(channel0);
+		BASISU_NOTE_UNUSED(channel1);
+		BASISU_NOTE_UNUSED(output_rows_in_pixels);
+		BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels);
+		BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes);
+		BASISU_NOTE_UNUSED(fmt);
+		BASISU_NOTE_UNUSED(image_data_size);
+		BASISU_NOTE_UNUSED(pImage_data);
+		BASISU_NOTE_UNUSED(num_blocks_x);
+		BASISU_NOTE_UNUSED(num_blocks_y);
+		BASISU_NOTE_UNUSED(pDst_blocks);
+
+		return false;
+#endif
+	}
+
+	bool basisu_lowlevel_uastc_hdr_transcoder::transcode_image(
+		transcoder_texture_format target_format,
+		void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+		const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+		uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+		uint32_t slice_offset, uint32_t slice_length,
+		uint32_t decode_flags,
+		bool has_alpha,
+		bool is_video,
+		uint32_t output_row_pitch_in_blocks_or_pixels,
+		basisu_transcoder_state* pState,
+		uint32_t output_rows_in_pixels,
+		int channel0, int channel1)
+	{
+		BASISU_NOTE_UNUSED(is_video);
+		BASISU_NOTE_UNUSED(level_index);
+		BASISU_NOTE_UNUSED(decode_flags);
+
+		if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length)
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: source data buffer too small\n");
+			return false;
+		}
+
+		const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format);
+		const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y;
+
+		if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks))
+		{
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: output buffer size too small\n");
+			return false;
+		}
+
+		bool status = false;
+
+		switch (target_format)
+		{
+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+		{
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_HDR_4x4,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to ASTC_HDR failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFBC6H:
+		{
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC6H,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to BC6H failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFRGB_HALF:
+		{
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_HALF,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGB_HALF failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFRGBA_HALF:
+		{
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA_HALF,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
+			}
+			break;
+		}
+		case transcoder_texture_format::cTFRGB_9E5:
+		{
+			status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB_9E5,
+				bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+			if (!status)
+			{
+				BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: transcode_slice() to RGBA_HALF failed\n");
+			}
+			break;
+		}
+		default:
+		{
+			assert(0);
+			BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_hdr_transcoder::transcode_image: Invalid format\n");
+			break;
+		}
+		}
+
+		return status;
+	}
+
+	//------------------------------------------------------------------------------------------------
 	
 	basisu_transcoder::basisu_transcoder() :
 		m_ready_to_transcode(false)
@@ -10390,7 +10727,7 @@
 		}
 		else
 		{
-			// Nothing special to do for UASTC.
+			// Nothing special to do for UASTC/UASTC HDR.
 			if (m_lowlevel_etc1s_decoder.m_local_endpoints.size())
 			{
 				m_lowlevel_etc1s_decoder.clear();
@@ -10510,7 +10847,14 @@
 			return false;
 		}
 				
-		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
+		{
+			return m_lowlevel_uastc_hdr_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
+				pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
+				fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState,
+				output_rows_in_pixels, channel0, channel1, decode_flags);
+		}
+		else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
 		{
 			return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
 				pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
@@ -10742,7 +11086,18 @@
 			memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel);
 		}
 		
-		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
+		if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC_HDR_4x4)
+		{
+			const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
+
+			// Use the container independent image transcode method.
+			status = m_lowlevel_uastc_hdr_decoder.transcode_image(fmt,
+				pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+				(const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index,
+				pSlice_desc->m_file_ofs, pSlice_desc->m_file_size,
+				decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels);
+		}
+		else if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4)
 		{
 			const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index];
 
@@ -10808,20 +11163,27 @@
 			return 8;
 		case transcoder_texture_format::cTFBC7_RGBA:
 		case transcoder_texture_format::cTFBC7_ALT:
+		case transcoder_texture_format::cTFBC6H:
 		case transcoder_texture_format::cTFETC2_RGBA:
 		case transcoder_texture_format::cTFBC3_RGBA:
 		case transcoder_texture_format::cTFBC5_RG:
 		case transcoder_texture_format::cTFASTC_4x4_RGBA:
+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
 		case transcoder_texture_format::cTFATC_RGBA:
 		case transcoder_texture_format::cTFFXT1_RGB:
 		case transcoder_texture_format::cTFETC2_EAC_RG11:
 			return 16;
 		case transcoder_texture_format::cTFRGBA32:
+		case transcoder_texture_format::cTFRGB_9E5:
 			return sizeof(uint32_t);
 		case transcoder_texture_format::cTFRGB565:
 		case transcoder_texture_format::cTFBGR565:
 		case transcoder_texture_format::cTFRGBA4444:
 			return sizeof(uint16_t);
+		case transcoder_texture_format::cTFRGB_HALF:
+			return sizeof(half_float) * 3;
+		case transcoder_texture_format::cTFRGBA_HALF:
+			return sizeof(half_float) * 4;
 		default:
 			assert(0);
 			BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
@@ -10845,17 +11207,22 @@
 		case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA";
 		case transcoder_texture_format::cTFBC5_RG: return "BC5_RG";
 		case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA";
+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return "ASTC_HDR_RGBA";
 		case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB";
 		case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA";
 		case transcoder_texture_format::cTFRGBA32: return "RGBA32";
 		case transcoder_texture_format::cTFRGB565: return "RGB565";
 		case transcoder_texture_format::cTFBGR565: return "BGR565";
 		case transcoder_texture_format::cTFRGBA4444: return "RGBA4444";
+		case transcoder_texture_format::cTFRGBA_HALF: return "RGBA_HALF";
+		case transcoder_texture_format::cTFRGB_9E5: return "RGB_9E5";
+		case transcoder_texture_format::cTFRGB_HALF: return "RGB_HALF";
 		case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB";
 		case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB";
 		case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
 		case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11";
 		case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11";
+		case transcoder_texture_format::cTFBC6H: return "BC6H";
 		default:
 			assert(0);
 			BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n");
@@ -10881,7 +11248,13 @@
 		case block_format::cRGB565: return "RGB565";
 		case block_format::cBGR565: return "BGR565";
 		case block_format::cRGBA4444: return "RGBA4444";
+		case block_format::cRGBA_HALF: return "RGBA_HALF";
+		case block_format::cRGB_HALF: return "RGB_HALF";
+		case block_format::cRGB_9E5: return "RGB_9E5";
 		case block_format::cUASTC_4x4: return "UASTC_4x4";
+		case block_format::cUASTC_HDR_4x4: return "UASTC_HDR_4x4";
+		case block_format::cBC6H: return "BC6H";
+		case block_format::cASTC_HDR_4x4: return "ASTC_HDR_4x4";
 		case block_format::cFXT1_RGB: return "FXT1_RGB";
 		case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB";
 		case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA";
@@ -10914,11 +11287,13 @@
 
 	bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt)
 	{
+		// TODO: Technically ASTC_HDR does support alpha, but UASTC_HDR doesn't yet support it. Unsure what to do here.
 		switch (fmt)
 		{
 		case transcoder_texture_format::cTFETC2_RGBA:
 		case transcoder_texture_format::cTFBC3_RGBA:
 		case transcoder_texture_format::cTFASTC_4x4_RGBA:
+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
 		case transcoder_texture_format::cTFBC7_RGBA:
 		case transcoder_texture_format::cTFBC7_ALT:
 		case transcoder_texture_format::cTFPVRTC1_4_RGBA:
@@ -10926,6 +11301,23 @@
 		case transcoder_texture_format::cTFATC_RGBA:
 		case transcoder_texture_format::cTFRGBA32:
 		case transcoder_texture_format::cTFRGBA4444:
+		case transcoder_texture_format::cTFRGBA_HALF:
+			return true;
+		default:
+			break;
+		}
+		return false;
+	}
+
+	bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt)
+	{
+		switch (fmt)
+		{
+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+		case transcoder_texture_format::cTFBC6H:
+		case transcoder_texture_format::cTFRGBA_HALF:
+		case transcoder_texture_format::cTFRGB_HALF:
+		case transcoder_texture_format::cTFRGB_9E5:
 			return true;
 		default:
 			break;
@@ -10947,13 +11339,18 @@
 		case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA;
 		case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3;
 		case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5;
-		case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4;
+		case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC_LDR_4x4;
+		case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA: return basisu::texture_format::cASTC_HDR_4x4;
+		case transcoder_texture_format::cTFBC6H: return basisu::texture_format::cBC6HUnsigned;
 		case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB;
 		case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA;
 		case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32;
 		case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565;
 		case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565;
 		case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444;
+		case transcoder_texture_format::cTFRGBA_HALF: return basisu::texture_format::cRGBA_HALF;
+		case transcoder_texture_format::cTFRGB_9E5: return basisu::texture_format::cRGB_9E5;
+		case transcoder_texture_format::cTFRGB_HALF: return basisu::texture_format::cRGB_HALF;
 		case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB;
 		case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA;
 		case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA;
@@ -10975,6 +11372,9 @@
 		case transcoder_texture_format::cTFRGB565:
 		case transcoder_texture_format::cTFBGR565:
 		case transcoder_texture_format::cTFRGBA4444:
+		case transcoder_texture_format::cTFRGB_HALF:
+		case transcoder_texture_format::cTFRGBA_HALF:
+		case transcoder_texture_format::cTFRGB_9E5:
 			return true;
 		default:
 			break;
@@ -10995,6 +11395,9 @@
 		case block_format::cRGBA4444_COLOR:
 		case block_format::cRGBA4444_ALPHA:
 		case block_format::cRGBA4444_COLOR_OPAQUE:
+		case block_format::cRGBA_HALF:
+		case block_format::cRGB_HALF:
+		case block_format::cRGB_9E5:
 			return true;
 		default:
 			break;
@@ -11007,11 +11410,16 @@
 		switch (fmt)
 		{
 		case transcoder_texture_format::cTFRGBA32:
+		case transcoder_texture_format::cTFRGB_9E5:
 			return sizeof(uint32_t); 
 		case transcoder_texture_format::cTFRGB565:
 		case transcoder_texture_format::cTFBGR565:
 		case transcoder_texture_format::cTFRGBA4444:
 			return sizeof(uint16_t);
+		case transcoder_texture_format::cTFRGB_HALF:
+			return sizeof(half_float) * 3;
+		case transcoder_texture_format::cTFRGBA_HALF:
+			return sizeof(half_float) * 4;
 		default:
 			break;
 		}
@@ -11038,8 +11446,26 @@
 	
 	bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt)
 	{
-		if (fmt == basis_tex_format::cUASTC4x4)
+		if (fmt == basis_tex_format::cUASTC_HDR_4x4)
 		{
+			// UASTC HDR
+#if BASISD_SUPPORT_UASTC_HDR
+			switch (tex_type)
+			{
+			case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+			case transcoder_texture_format::cTFBC6H:
+			case transcoder_texture_format::cTFRGBA_HALF:
+			case transcoder_texture_format::cTFRGB_HALF:
+			case transcoder_texture_format::cTFRGB_9E5:
+				return true;
+			default:
+				break;
+			}
+#endif
+		}
+		else if (fmt == basis_tex_format::cUASTC4x4)
+		{
+			// UASTC LDR
 #if BASISD_SUPPORT_UASTC
 			switch (tex_type)
 			{
@@ -11049,6 +11475,12 @@
 			case transcoder_texture_format::cTFATC_RGB:
 			case transcoder_texture_format::cTFATC_RGBA:
 			case transcoder_texture_format::cTFFXT1_RGB:
+			// UASTC LDR doesn't support transcoding to HDR formats
+			case transcoder_texture_format::cTFASTC_HDR_4x4_RGBA:
+			case transcoder_texture_format::cTFBC6H:
+			case transcoder_texture_format::cTFRGBA_HALF:
+			case transcoder_texture_format::cTFRGB_HALF:
+			case transcoder_texture_format::cTFRGB_9E5:
 				return false;
 			default:
 				return true;
@@ -11057,6 +11489,7 @@
 		}
 		else
 		{
+			// ETC1S
 			switch (tex_type)
 			{
 				// ETC1 and uncompressed are always supported.
@@ -11812,7 +12245,7 @@
 	// Encodes 3 values to output, usable for any range that uses quints and bits
 	static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n)
 	{
-		// First extract the trits and the bits from the 5 input values
+		// First extract the quints and the bits from the 3 input values
 		int quints = 0, bits[3];
 		const uint32_t bit_mask = (1 << n) - 1;
 		for (int i = 0; i < 3; i++)
@@ -12131,11 +12564,13 @@
 
 			return bits & ((1U << codesize) - 1U);
 		}
-
-		uint32_t byte_bit_offset = bit_offset & 7U;
-		const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
-		bit_offset += codesize;
-		return (w >> byte_bit_offset)& ((1U << codesize) - 1U);
+		else
+		{
+			uint32_t byte_bit_offset = bit_offset & 7U;
+			const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]);
+			bit_offset += codesize;
+			return (w >> byte_bit_offset) & ((1U << codesize) - 1U);
+		}
 	}
 
 	bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints)
@@ -12170,6 +12605,7 @@
 			return false;
 
 		unpacked.m_mode = mode;
+		unpacked.m_common_pattern = 0;
 
 		uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1];
 
@@ -16663,10 +17099,12 @@
 
 		memcpy(&m_header, pData, sizeof(m_header));
 
-		// We only support UASTC and ETC1S
-		if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED)
+		// We only support UASTC LDR, UASTC HDR and ETC1S.
+		// Note the DFD's contents are what we are guided by for decoding the KTX2 file, not this format field (currently).
+		if ((m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) && 
+			(m_header.m_vk_format != basist::KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK))
 		{
-			BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n");
+			BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC LDR/HDR format\n");
 			return false;
 		}
 
@@ -16890,6 +17328,16 @@
 			// We're assuming "DATA" means RGBA so it has alpha.
 			m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
 		}
+		else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC_HDR)
+		{
+			m_format = basist::basis_tex_format::cUASTC_HDR_4x4;
+
+			m_dfd_samples = 1;
+			m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15);
+
+			// We're assuming "DATA" means RGBA so it has alpha.
+			m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG);
+		}
 		else
 		{
 			// Unsupported DFD color model.
@@ -17167,7 +17615,8 @@
 				return false;
 			}
 		}
-		else if (m_format == basist::basis_tex_format::cUASTC4x4)
+		else if ((m_format == basist::basis_tex_format::cUASTC4x4) ||
+			     (m_format == basist::basis_tex_format::cUASTC_HDR_4x4))
 		{
 			// Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices.
 			assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length);
@@ -17188,14 +17637,29 @@
 				return false;
 			}
 
-			if (!m_uastc_transcoder.transcode_image(fmt,
-				pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
-				(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
-				0, (uint32_t)total_2D_image_size,
-				decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+			if (m_format == basist::basis_tex_format::cUASTC_HDR_4x4)
 			{
-				BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
-				return false;
+				if (!m_uastc_hdr_transcoder.transcode_image(fmt,
+					pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+					(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+					0, (uint32_t)total_2D_image_size,
+					decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+				{
+					BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC HDR transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+					return false;
+				}
+			}
+			else
+			{
+				if (!m_uastc_transcoder.transcode_image(fmt,
+					pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels,
+					(const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index,
+					0, (uint32_t)total_2D_image_size,
+					decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1))
+				{
+					BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n");
+					return false;
+				}
 			}
 		}
 		else
@@ -17476,4 +17940,1531 @@
 #endif
 	}
 
+	//-------------------------------
+
+#ifdef BASISD_SUPPORT_UASTC_HDR
+	// This float->half conversion matches how "F32TO16" works on Intel GPU's.
+	basist::half_float float_to_half(float val)
+	{
+		union { float f; int32_t i; uint32_t u; } fi = { val };
+		const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
+		int s = flt_s, e = 0, m = 0;
+
+		// inf/NaN
+		if (flt_e == 0xff)
+		{
+			e = 31;
+			if (flt_m != 0) // NaN
+				m = 1;
+		}
+		// not zero or denormal
+		else if (flt_e != 0)
+		{
+			int new_exp = flt_e - 127;
+			if (new_exp > 15)
+				e = 31;
+			else if (new_exp < -14)
+				m = lrintf((1 << 24) * fabsf(fi.f));
+			else
+			{
+				e = new_exp + 15;
+				m = lrintf(flt_m * (1.0f / ((float)(1 << 13))));
+			}
+		}
+
+		assert((0 <= m) && (m <= 1024));
+		if (m == 1024)
+		{
+			e++;
+			m = 0;
+		}
+
+		assert((s >= 0) && (s <= 1));
+		assert((e >= 0) && (e <= 31));
+		assert((m >= 0) && (m <= 1023));
+
+		basist::half_float result = (basist::half_float)((s << 15) | (e << 10) | m);
+		return result;
+	}
+		
+	//------------------------------------------------------------------------------------------------
+	// HDR support
+	// 
+	// Originally from bc6h_enc.cpp
+	// BC6H decoder fuzzed vs. DirectXTex's for unsigned/signed
+
+	const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4] = // base bits, r, g, b
+	{
+		// 2 subsets
+		{ 10, 5, 5, 5, },	// 0, mode 1 in MS/D3D docs
+		{ 7, 6, 6, 6, },	// 1
+		{ 11, 5, 4, 4, },	// 2
+		{ 11, 4, 5, 4, },	// 3
+		{ 11, 4, 4, 5, },	// 4
+		{ 9, 5, 5, 5, },	// 5
+		{ 8, 6, 5, 5, },	// 6
+		{ 8, 5, 6, 5, },	// 7
+		{ 8, 5, 5, 6, },	// 8
+		{ 6, 6, 6, 6, },	// 9, endpoints not delta encoded, mode 10 in MS/D3D docs
+		// 1 subset
+		{ 10, 10, 10, 10, }, // 10, endpoints not delta encoded, mode 11 in MS/D3D docs
+		{ 11, 9, 9, 9, },	// 11
+		{ 12, 8, 8, 8, },	// 12
+		{ 16, 4, 4, 4, }	// 13, also useful for solid blocks
+	};
+
+	const int8_t g_bc6h_mode_lookup[32] = { 0, 1, 2, 10, 0, 1, 3, 11, 0, 1, 4, 12, 0, 1, 5, 13, 0, 1, 6, -1, 0, 1, 7, -1, 0, 1, 8, -1, 0, 1, 9, -1 };
+
+	const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX] =
+	{
+		// comp_index, subset*2+lh_index, last_bit, first_bit
+		//------------------------        mode 0: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (10.555, 10.555, 10.555), delta            
+		{ { 1, 2, 4, -1 }, { 2, 2, 4, -1 }, { 2, 3, 4, -1 }, { 0, 0, 9, 0 }, { 1, 0, 9, 0 }, { 2, 0, 9, 0 }, { 0, 1, 4, 0 },
+		{ 1, 3, 4, -1 }, { 1, 2, 3, 0 }, { 1, 1, 4, 0 }, { 2, 3, 0, -1 }, { 1, 3, 3, 0 }, { 2, 1, 4, 0 }, { 2, 3, 1, -1 },
+		{ 2, 2, 3, 0 }, { 0, 2, 4, 0 }, { 2, 3, 2, -1 }, { 0, 3, 4, 0 }, { 2, 3, 3, -1 }, { 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 1: 2 subsets, Weight bits: 46 bits, Endpoint bits: 75 bits (7.666, 7.666, 7.666), delta
+		{ { 1, 2, 5, -1 },{ 1, 3, 4, -1 },{ 1, 3, 5, -1 },{ 0, 0, 6, 0 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },
+		{ 1, 0, 6, 0 },{ 2, 2, 5, -1 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 6, 0 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },
+		{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },
+		{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 2: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.555, 11.444, 11.444), delta
+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 4, 0 },{ 0, 0, 10, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },{ 1, 0, 10, -1 },
+		{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },
+		{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 3: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.555, 11.444), delta
+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },
+		{ 1, 0, 10, -1 },{ 1, 3, 3, 0 },{ 2, 1, 3, 0 },{ 2, 0, 10, -1 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 0, -1 },
+		{ 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 1, 2, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 4: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (11.444, 11.444, 11.555), delta
+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, -1 },{ 2, 2, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 3, 0 },
+		{ 1, 0, 10, -1 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 0, 10, -1 },{ 2, 2, 3, 0 },{ 0, 2, 3, 0 },{ 2, 3, 1, -1 },
+		{ 2, 3, 2, -1 },{ 0, 3, 3, 0 },{ 2, 3, 4, -1 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 5: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (9.555, 9.555, 9.555), delta
+		{ { 0, 0, 8, 0 },{ 2, 2, 4, -1 },{ 1, 0, 8, 0 },{ 1, 2, 4, -1 },{ 2, 0, 8, 0 },{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },
+		{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },
+		{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 6: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.666, 8.555, 8.555), delta
+		{ { 0, 0, 7, 0 },{ 1, 3, 4, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 3, -1 },
+		{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
+		{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 7: 2 subsets, Weight bits: 46 bits, Endpoints bits: 72 bits (8.555, 8.666, 8.555), delta
+		{ { 0, 0, 7, 0 },{ 2, 3, 0, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 1, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 1, 3, 5, -1 },
+		{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 4, 0 },{ 2, 3, 1, -1 },
+		{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 8: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (8.555, 8.555, 8.666), delta
+		{ { 0, 0, 7, 0 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 7, 0 },{ 2, 2, 5, -1 },{ 1, 2, 4, -1 },{ 2, 0, 7, 0 },{ 2, 3, 5, -1 },
+		{ 2, 3, 4, -1 },{ 0, 1, 4, 0 },{ 1, 3, 4, -1 },{ 1, 2, 3, 0 },{ 1, 1, 4, 0 },{ 2, 3, 0, -1 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },
+		{ 2, 2, 3, 0 },{ 0, 2, 4, 0 },{ 2, 3, 2, -1 },{ 0, 3, 4, 0 },{ 2, 3, 3, -1 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 9: 2 subsets, Weight bits: 46 bits, Endpoint bits: 72 bits (6.6.6.6, 6.6.6.6, 6.6.6.6), NO delta
+		{ { 0, 0, 5, 0 },{ 1, 3, 4, -1 },{ 2, 3, 0, -1 },{ 2, 3, 1, -1 },{ 2, 2, 4, -1 },{ 1, 0, 5, 0 },{ 1, 2, 5, -1 },{ 2, 2, 5, -1 },
+		{ 2, 3, 2, -1 },{ 1, 2, 4, -1 },{ 2, 0, 5, 0 },{ 1, 3, 5, -1 },{ 2, 3, 3, -1 },{ 2, 3, 5, -1 },{ 2, 3, 4, -1 },{ 0, 1, 5, 0 },
+		{ 1, 2, 3, 0 },{ 1, 1, 5, 0 },{ 1, 3, 3, 0 },{ 2, 1, 5, 0 },{ 2, 2, 3, 0 },{ 0, 2, 5, 0 },{ 0, 3, 5, 0 },{ 3, -1, 4, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 10: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (10.10, 10.10, 10.10), NO delta
+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 9, 0 },{ 1, 1, 9, 0 },{ 2, 1, 9, 0 }, {-1, 0, 0, 0} },
+		//------------------------        mode 11: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (11.9, 11.9, 11.9), delta
+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 8, 0 },{ 0, 0, 10, -1 },{ 1, 1, 8, 0 },{ 1, 0, 10, -1 },{ 2, 1, 8, 0 },{ 2, 0, 10, -1 }, {-1, 0, 0, 0} },
+		//------------------------        mode 12: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (12.8, 12.8, 12.8), delta
+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 7, 0 },{ 0, 0, 10, 11 },{ 1, 1, 7, 0 },{ 1, 0, 10, 11 },{ 2, 1, 7, 0 },{ 2, 0, 10, 11 }, {-1, 0, 0, 0} },
+		//------------------------        mode 13: 1 subset, Weight bits: 63 bits, Endpoint bits: 60 bits (16.4, 16.4, 16.4), delta
+		{ { 0, 0, 9, 0 },{ 1, 0, 9, 0 },{ 2, 0, 9, 0 },{ 0, 1, 3, 0 },{ 0, 0, 10, 15 },{ 1, 1, 3, 0 },{ 1, 0, 10, 15 },{ 2, 1, 3, 0 },{ 2, 0, 10, 15 }, {-1, 0, 0, 0} }
+	};
+
+	// The same as the first 32 2-subset patterns in BC7. 
+	// Bit 7 is a flag indicating that the weight uses 1 less bit than usual.
+	const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4] = // [pat][y][x]
+	{
+		{ {0x80, 0, 1, 1}, { 0, 0, 1, 1 }, { 0, 0, 1, 1 }, { 0, 0, 1, 0x81 }}, { {0x80, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0x81} },
+		{ {0x80, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 0x81} }, { {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
+		{ {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} },
+		{ {0x80, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 1}, {0, 1, 1, 0x81} },
+		{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 0, 1, 0x81} }, { {0x80, 0, 1, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
+		{ {0x80, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 1}, {0, 1, 1, 0x81} },
+		{ {0x80, 0, 0, 1}, {0, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 0x81} },
+		{ {0x80, 0, 0, 0}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 0x81} }, { {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 1, 1, 0x81} },
+		{ {0x80, 0, 0, 0}, {1, 0, 0, 0}, {1, 1, 1, 0}, {1, 1, 1, 0x81} }, { {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} },
+		{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 1, 0x81, 1}, {0, 0, 1, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} },
+		{ {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 0}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 1, 0, 0}, {1, 1, 1, 0} },
+		{ {0x80, 0, 0, 0}, {0, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} }, { {0x80, 1, 1, 1}, {0, 0, 1, 1}, {  0, 0, 1, 1}, {0, 0, 0, 0x81} },
+		{ {0x80, 0, 0x81, 1}, {0, 0, 0, 1}, {0, 0, 0, 1}, {0, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 0, 0, 0}, {0x81, 0, 0, 0}, {1, 1, 0, 0} },
+		{ {0x80, 1, 0x81, 0}, {0, 1, 1, 0}, {0, 1, 1, 0}, {0, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {0, 1, 1, 0}, {0, 1, 1, 0}, {1, 1, 0, 0} },
+		{ {0x80, 0, 0, 1}, {0, 1, 1, 1}, {0x81, 1, 1, 0}, {1, 0, 0, 0} }, { {0x80, 0, 0, 0}, {1, 1, 1, 1}, {0x81, 1, 1, 1}, {0, 0, 0, 0} },
+		{ {0x80, 1, 0x81, 1}, {0, 0, 0, 1}, {1, 0, 0, 0}, {1, 1, 1, 0} }, { {0x80, 0, 0x81, 1}, {1, 0, 0, 1}, {1, 0, 0, 1}, {1, 1, 0, 0} }
+	};
+
+	const uint8_t g_bc6h_weight3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+	const uint8_t g_bc6h_weight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
+		
+	struct bc6h_logical_block
+	{
+		uint32_t m_mode;
+		uint32_t m_partition_pattern;	// must be 0 if 1 subset
+		uint32_t m_endpoints[3][4];		// [comp][subset*2+lh_index] - must be already properly packed
+		uint8_t m_weights[16];			// weights must be of the proper size, taking into account skipped MSB's which must be 0
+
+		void clear()
+		{
+			basisu::clear_obj(*this);
+		}
+	};
+
+	static inline void write_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
+	{
+		assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
+		assert(val < (1ULL << num_bits));
+
+		if (bit_pos < 64)
+		{
+			l |= (val << bit_pos);
+
+			if ((bit_pos + num_bits) > 64)
+				h |= (val >> (64 - bit_pos));
+		}
+		else
+		{
+			h |= (val << (bit_pos - 64));
+		}
+
+		bit_pos += num_bits;
+		assert(bit_pos <= 128);
+	}
+
+	static inline void write_rev_bits(uint64_t val, uint32_t num_bits, uint32_t& bit_pos, uint64_t& l, uint64_t& h)
+	{
+		assert((num_bits) && (num_bits < 64) && (bit_pos < 128));
+		assert(val < (1ULL << num_bits));
+
+		for (uint32_t i = 0; i < num_bits; i++)
+			write_bits((val >> (num_bits - 1u - i)) & 1, 1, bit_pos, l, h);
+	}
+
+	static void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk)
+	{
+		const uint8_t s_mode_bits[NUM_BC6H_MODES] = { 0b00, 0b01, 0b00010, 0b00110, 0b01010, 0b01110, 0b10010, 0b10110, 0b11010, 0b11110, 0b00011, 0b00111, 0b01011, 0b01111 };
+
+		const uint32_t mode = log_blk.m_mode;
+		assert(mode < NUM_BC6H_MODES);
+
+		uint64_t l = s_mode_bits[mode], h = 0;
+		uint32_t bit_pos = (mode >= 2) ? 5 : 2;
+
+		const uint32_t num_subsets = (mode >= BC6H_FIRST_1SUBSET_MODE_INDEX) ? 1 : 2;
+
+		assert(((num_subsets == 2) && (log_blk.m_partition_pattern < TOTAL_BC6H_PARTITION_PATTERNS)) ||
+			((num_subsets == 1) && (!log_blk.m_partition_pattern)));
+
+		// Sanity checks
+		for (uint32_t c = 0; c < 3; c++)
+		{
+			assert(log_blk.m_endpoints[c][0] < (1u << g_bc6h_mode_sig_bits[mode][0]));	   // 1st subset l, base bits
+			assert(log_blk.m_endpoints[c][1] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 1st subset h, these are deltas except for modes 9,10
+			assert(log_blk.m_endpoints[c][2] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset l
+			assert(log_blk.m_endpoints[c][3] < (1u << g_bc6h_mode_sig_bits[mode][c + 1])); // 2nd subset h
+		}
+
+		const bc6h_bit_layout* pLayout = &g_bc6h_bit_layouts[mode][0];
+
+		while (pLayout->m_comp != -1)
+		{
+			uint32_t v = (pLayout->m_comp == 3) ? log_blk.m_partition_pattern : log_blk.m_endpoints[pLayout->m_comp][pLayout->m_index];
+
+			if (pLayout->m_first_bit == -1)
+			{
+				write_bits((v >> pLayout->m_last_bit) & 1, 1, bit_pos, l, h);
+			}
+			else
+			{
+				const uint32_t total_bits = basisu::iabs(pLayout->m_last_bit - pLayout->m_first_bit) + 1;
+
+				v >>= basisu::minimum(pLayout->m_first_bit, pLayout->m_last_bit);
+				v &= ((1 << total_bits) - 1);
+
+				if (pLayout->m_first_bit > pLayout->m_last_bit)
+					write_rev_bits(v, total_bits, bit_pos, l, h);
+				else
+					write_bits(v, total_bits, bit_pos, l, h);
+			}
+
+			pLayout++;
+		}
+
+		const uint32_t num_mode_sel_bits = (num_subsets == 1) ? 4 : 3;
+		const uint8_t* pPat = &g_bc6h_2subset_patterns[log_blk.m_partition_pattern][0][0];
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			const uint32_t sel = log_blk.m_weights[i];
+
+			uint32_t num_bits = num_mode_sel_bits;
+			if (num_subsets == 2)
+			{
+				const uint32_t subset_index = pPat[i];
+				num_bits -= (subset_index >> 7);
+			}
+			else if (!i)
+			{
+				num_bits--;
+			}
+
+			assert(sel < (1u << num_bits));
+
+			write_bits(sel, num_bits, bit_pos, l, h);
+		}
+
+		assert(bit_pos == 128);
+
+		basisu::write_le_dword(&dst_blk.m_bytes[0], (uint32_t)l);
+		basisu::write_le_dword(&dst_blk.m_bytes[4], (uint32_t)(l >> 32u));
+		basisu::write_le_dword(&dst_blk.m_bytes[8], (uint32_t)h);
+		basisu::write_le_dword(&dst_blk.m_bytes[12], (uint32_t)(h >> 32u));
+	}
+
+#if 0
+	static inline uint32_t bc6h_blog_dequantize_to_blog16(uint32_t comp, uint32_t bits_per_comp)
+	{
+		int unq;
+
+		if (bits_per_comp >= 15)
+			unq = comp;
+		else if (comp == 0)
+			unq = 0;
+		else if (comp == ((1u << bits_per_comp) - 1u))
+			unq = 0xFFFFu;
+		else
+			unq = ((comp << 16u) + 0x8000u) >> bits_per_comp;
+
+		return unq;
+	}
+#endif
+
+	// Suboptimal, but very close.
+	static inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
+	{
+		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+		return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
+	}
+
+	// 6,7,8,9,10,11,12
+	const uint32_t BC6H_BLOG_TAB_MIN = 6;
+	const uint32_t BC6H_BLOG_TAB_MAX = 12;
+	//const uint32_t BC6H_BLOG_TAB_NUM = BC6H_BLOG_TAB_MAX - BC6H_BLOG_TAB_MIN + 1;
+	
+	// Handles 16, or 6-12 bits. Others assert.
+	static inline uint32_t half_to_blog_tab(half_float h, uint32_t num_bits)
+	{
+		BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MIN);
+		BASISU_NOTE_UNUSED(BC6H_BLOG_TAB_MAX);
+
+		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+
+		if (num_bits == 16)
+		{
+			return bc6h_half_to_blog(h, 16);
+		}
+		else
+		{
+			assert((num_bits >= BC6H_BLOG_TAB_MIN) && (num_bits <= BC6H_BLOG_TAB_MAX));
+			
+			// Note: This used to be done using a table lookup, but it required ~224KB of tables. This isn't quite as accurate, but the error is very slight (+-1 half values as ints).
+			return bc6h_half_to_blog(h, num_bits);
+		}
+	}
+
+	bool g_bc6h_enc_initialized;
+
+	void bc6h_enc_init()
+	{
+		if (g_bc6h_enc_initialized)
+			return;
+
+		g_bc6h_enc_initialized = true;
+	}
+
+	// mode 10, 4-bit weights
+	void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+	{
+		assert(g_bc6h_enc_initialized);
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			assert(pWeights[i] <= 15);
+		}
+
+		bc6h_logical_block log_blk;
+		log_blk.clear();
+
+		// Convert half endpoints to blog10 (mode 10 doesn't use delta encoding)
+		for (uint32_t c = 0; c < 3; c++)
+		{
+			log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 10);
+			log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 10);
+		}
+
+		memcpy(log_blk.m_weights, pWeights, 16);
+
+		if (log_blk.m_weights[0] & 8)
+		{
+			for (uint32_t i = 0; i < 16; i++)
+				log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
+
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+			}
+		}
+
+		log_blk.m_mode = BC6H_FIRST_1SUBSET_MODE_INDEX;
+		pack_bc6h_block(*pPacked_block, log_blk);
+	}
+
+	// Tries modes 11-13 (delta endpoint) encoding, falling back to mode 10 only when necessary, 4-bit weights
+	void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+	{
+		assert(g_bc6h_enc_initialized);
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			assert(pWeights[i] <= 15);
+		}
+
+		bc6h_logical_block log_blk;
+		log_blk.clear();
+
+		for (uint32_t mode = BC6H_LAST_MODE_INDEX; mode > BC6H_FIRST_1SUBSET_MODE_INDEX; mode--)
+		{
+			const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0], num_delta_bits = g_bc6h_mode_sig_bits[mode][1];
+			const int base_bitmask = (1 << num_base_bits) - 1;
+			const int delta_bitmask = (1 << num_delta_bits) - 1;
+			BASISU_NOTE_UNUSED(base_bitmask);
+
+			assert(num_delta_bits < num_base_bits);
+			assert((num_delta_bits == g_bc6h_mode_sig_bits[mode][2]) && (num_delta_bits == g_bc6h_mode_sig_bits[mode][3]));
+
+			uint32_t blog_endpoints[3][2];
+
+			// Convert half endpoints to blog 16, 12, or 11
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
+				assert((int)blog_endpoints[c][0] <= base_bitmask);
+
+				blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
+				assert((int)blog_endpoints[c][1] <= base_bitmask);
+			}
+
+			// Copy weights
+			memcpy(log_blk.m_weights, pWeights, 16);
+
+			// Ensure first weight MSB is 0
+			if (log_blk.m_weights[0] & 8)
+			{
+				// Invert weights
+				for (uint32_t i = 0; i < 16; i++)
+					log_blk.m_weights[i] = 15 - log_blk.m_weights[i];
+
+				// Swap blog quantized endpoints
+				for (uint32_t c = 0; c < 3; c++)
+				{
+					std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+				}
+			}
+
+			const int max_delta = (1 << (num_delta_bits - 1)) - 1;
+			const int min_delta = -(max_delta + 1);
+			assert((max_delta - min_delta) == delta_bitmask);
+
+			bool failed_flag = false;
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+				int delta = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+				if ((delta < min_delta) || (delta > max_delta))
+				{
+					failed_flag = true;
+					break;
+				}
+
+				log_blk.m_endpoints[c][1] = delta & delta_bitmask;
+			}
+
+			if (failed_flag)
+				continue;
+
+			log_blk.m_mode = mode;
+			pack_bc6h_block(*pPacked_block, log_blk);
+						
+			return;
+		}
+
+		// Worst case fall back to mode 10, which can handle any endpoints
+		bc6h_enc_block_mode10(pPacked_block, pEndpoints, pWeights);
+	}
+
+	// Mode 9 (direct endpoint encoding), 3-bit weights, but only 1 subset
+	void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+	{
+		assert(g_bc6h_enc_initialized);
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			assert(pWeights[i] <= 7);
+		}
+
+		bc6h_logical_block log_blk;
+		log_blk.clear();
+
+		// Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
+		for (uint32_t c = 0; c < 3; c++)
+		{
+			log_blk.m_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], 6);
+			log_blk.m_endpoints[c][2] = log_blk.m_endpoints[c][0];
+
+			log_blk.m_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], 6);
+			log_blk.m_endpoints[c][3] = log_blk.m_endpoints[c][1];
+		}
+
+		memcpy(log_blk.m_weights, pWeights, 16);
+
+		const uint32_t pat_index = 0;
+		const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+		if (log_blk.m_weights[0] & 4)
+		{
+			for (uint32_t c = 0; c < 3; c++)
+				std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+
+			for (uint32_t i = 0; i < 16; i++)
+				if ((pPat[i] & 0x7F) == 0)
+					log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+		}
+
+		if (log_blk.m_weights[15] & 4)
+		{
+			for (uint32_t c = 0; c < 3; c++)
+				std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
+
+			for (uint32_t i = 0; i < 16; i++)
+				if ((pPat[i] & 0x7F) == 1)
+					log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+		}
+
+		log_blk.m_mode = 9;
+		log_blk.m_partition_pattern = pat_index;
+		pack_bc6h_block(*pPacked_block, log_blk);
+	}
+
+	// Tries modes 0-8, falls back to mode 9
+	void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights)
+	{
+		assert(g_bc6h_enc_initialized);
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			assert(pWeights[i] <= 7);
+		}
+
+		bc6h_logical_block log_blk;
+		log_blk.clear();
+
+		for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
+		{
+			static const int s_mode_order[9] = { 2, 3, 4, 0,  5, 6, 7, 8,  1 }; // ordered from largest base bits to least
+			const uint32_t mode = s_mode_order[mode_iter];
+
+			const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
+			const int base_bitmask = (1 << num_base_bits) - 1;
+			BASISU_NOTE_UNUSED(base_bitmask);
+
+			const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
+			const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
+
+			uint32_t blog_endpoints[3][4];
+
+			// Convert half endpoints to blog 7-11
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				blog_endpoints[c][0] = half_to_blog_tab(pEndpoints[c][0], num_base_bits);
+				blog_endpoints[c][2] = blog_endpoints[c][0];
+				assert((int)blog_endpoints[c][0] <= base_bitmask);
+
+				blog_endpoints[c][1] = half_to_blog_tab(pEndpoints[c][1], num_base_bits);
+				blog_endpoints[c][3] = blog_endpoints[c][1];
+				assert((int)blog_endpoints[c][1] <= base_bitmask);
+			}
+
+			const uint32_t pat_index = 0;
+			const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+			memcpy(log_blk.m_weights, pWeights, 16);
+
+			if (log_blk.m_weights[0] & 4)
+			{
+				// Swap part 0's endpoints/weights
+				for (uint32_t c = 0; c < 3; c++)
+					std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+
+				for (uint32_t i = 0; i < 16; i++)
+					if ((pPat[i] & 0x7F) == 0)
+						log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+			}
+
+			if (log_blk.m_weights[15] & 4)
+			{
+				// Swap part 1's endpoints/weights
+				for (uint32_t c = 0; c < 3; c++)
+					std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
+
+				for (uint32_t i = 0; i < 16; i++)
+					if ((pPat[i] & 0x7F) == 1)
+						log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+			}
+
+			bool failed_flag = false;
+
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
+
+				const int min_delta = -(max_delta + 1);
+				assert((max_delta - min_delta) == delta_bitmasks[c]);
+
+				log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+				int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+				int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
+				int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
+
+				if ((delta0 < min_delta) || (delta0 > max_delta) ||
+					(delta1 < min_delta) || (delta1 > max_delta) ||
+					(delta2 < min_delta) || (delta2 > max_delta))
+				{
+					failed_flag = true;
+					break;
+				}
+
+				log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
+				log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
+				log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
+
+				if (failed_flag)
+					break;
+			}
+			if (failed_flag)
+				continue;
+
+			log_blk.m_mode = mode;
+			log_blk.m_partition_pattern = pat_index;
+			pack_bc6h_block(*pPacked_block, log_blk);
+
+			return;
+
+		} // mode_iter
+
+		bc6h_enc_block_1subset_mode9_3bit_weights(pPacked_block, pEndpoints, pWeights);
+	}
+
+	// pEndpoints[subset][comp][lh_index]
+	void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
+	{
+		assert(g_bc6h_enc_initialized);
+		assert(common_part_index < basist::TOTAL_ASTC_BC7_COMMON_PARTITIONS2);
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			assert(pWeights[i] <= 7);
+		}
+
+		bc6h_logical_block log_blk;
+		log_blk.clear();
+
+		// Convert half endpoints to blog6 (mode 9 doesn't use delta encoding)
+		for (uint32_t s = 0; s < 2; s++)
+		{
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				log_blk.m_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], 6);
+				log_blk.m_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], 6);
+			}
+		}
+
+		memcpy(log_blk.m_weights, pWeights, 16);
+
+		//const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
+		const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
+
+		const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
+		if (invert_flag)
+		{
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][2]);
+				std::swap(log_blk.m_endpoints[c][1], log_blk.m_endpoints[c][3]);
+			}
+		}
+
+		const uint32_t pat_index = bc7_pattern;
+		assert(pat_index < 32);
+		const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+		bool swap_flags[2] = { false, false };
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			if ((pPat[i] & 0x80) == 0)
+				continue;
+
+			if (log_blk.m_weights[i] & 4)
+			{
+				const uint32_t p = pPat[i] & 1;
+				swap_flags[p] = true;
+			}
+		}
+
+		if (swap_flags[0])
+		{
+			for (uint32_t c = 0; c < 3; c++)
+				std::swap(log_blk.m_endpoints[c][0], log_blk.m_endpoints[c][1]);
+
+			for (uint32_t i = 0; i < 16; i++)
+				if ((pPat[i] & 0x7F) == 0)
+					log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+		}
+
+		if (swap_flags[1])
+		{
+			for (uint32_t c = 0; c < 3; c++)
+				std::swap(log_blk.m_endpoints[c][2], log_blk.m_endpoints[c][3]);
+
+			for (uint32_t i = 0; i < 16; i++)
+				if ((pPat[i] & 0x7F) == 1)
+					log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+		}
+
+		log_blk.m_mode = 9;
+		log_blk.m_partition_pattern = pat_index;
+		pack_bc6h_block(*pPacked_block, log_blk);
+	}
+
+	void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights)
+	{
+		assert(g_bc6h_enc_initialized);
+
+		for (uint32_t i = 0; i < 16; i++)
+		{
+			assert(pWeights[i] <= 7);
+		}
+
+		bc6h_logical_block log_blk;
+		log_blk.clear();
+
+		for (uint32_t mode_iter = 0; mode_iter <= 8; mode_iter++)
+		{
+			static const int s_mode_order[9] = { 2, 3, 4, 0,  5, 6, 7, 8,  1 }; // ordered from largest base bits to least
+			const uint32_t mode = s_mode_order[mode_iter];
+
+			const uint32_t num_base_bits = g_bc6h_mode_sig_bits[mode][0];
+			const int base_bitmask = (1 << num_base_bits) - 1;
+			BASISU_NOTE_UNUSED(base_bitmask);
+
+			const uint32_t num_delta_bits[3] = { g_bc6h_mode_sig_bits[mode][1], g_bc6h_mode_sig_bits[mode][2], g_bc6h_mode_sig_bits[mode][3] };
+			const int delta_bitmasks[3] = { (1 << num_delta_bits[0]) - 1, (1 << num_delta_bits[1]) - 1, (1 << num_delta_bits[2]) - 1 };
+
+			uint32_t blog_endpoints[3][4];
+
+			// Convert half endpoints to blog 7-11
+			for (uint32_t s = 0; s < 2; s++)
+			{
+				for (uint32_t c = 0; c < 3; c++)
+				{
+					blog_endpoints[c][0 + s * 2] = half_to_blog_tab(pEndpoints[s][c][0], num_base_bits);
+					blog_endpoints[c][1 + s * 2] = half_to_blog_tab(pEndpoints[s][c][1], num_base_bits);
+				}
+			}
+
+			memcpy(log_blk.m_weights, pWeights, 16);
+
+			//const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_astc;
+			const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[common_part_index].m_bc7;
+
+			const bool invert_flag = basist::g_astc_bc7_common_partitions2[common_part_index].m_invert;
+			if (invert_flag)
+			{
+				for (uint32_t c = 0; c < 3; c++)
+				{
+					std::swap(blog_endpoints[c][0], blog_endpoints[c][2]);
+					std::swap(blog_endpoints[c][1], blog_endpoints[c][3]);
+				}
+			}
+
+			const uint32_t pat_index = bc7_pattern;
+			assert(pat_index < 32);
+			const uint8_t* pPat = &g_bc6h_2subset_patterns[pat_index][0][0];
+
+			bool swap_flags[2] = { false, false };
+			for (uint32_t i = 0; i < 16; i++)
+			{
+				if ((pPat[i] & 0x80) == 0)
+					continue;
+
+				if (log_blk.m_weights[i] & 4)
+				{
+					const uint32_t p = pPat[i] & 1;
+					swap_flags[p] = true;
+				}
+			}
+
+			if (swap_flags[0])
+			{
+				for (uint32_t c = 0; c < 3; c++)
+					std::swap(blog_endpoints[c][0], blog_endpoints[c][1]);
+
+				for (uint32_t i = 0; i < 16; i++)
+					if ((pPat[i] & 0x7F) == 0)
+						log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+			}
+
+			if (swap_flags[1])
+			{
+				for (uint32_t c = 0; c < 3; c++)
+					std::swap(blog_endpoints[c][2], blog_endpoints[c][3]);
+
+				for (uint32_t i = 0; i < 16; i++)
+					if ((pPat[i] & 0x7F) == 1)
+						log_blk.m_weights[i] = 7 - log_blk.m_weights[i];
+			}
+
+			// Try packing the endpoints
+			bool failed_flag = false;
+
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				const int max_delta = (1 << (num_delta_bits[c] - 1)) - 1;
+
+				const int min_delta = -(max_delta + 1);
+				assert((max_delta - min_delta) == delta_bitmasks[c]);
+
+				log_blk.m_endpoints[c][0] = blog_endpoints[c][0];
+
+				int delta0 = (int)blog_endpoints[c][1] - (int)blog_endpoints[c][0];
+				int delta1 = (int)blog_endpoints[c][2] - (int)blog_endpoints[c][0];
+				int delta2 = (int)blog_endpoints[c][3] - (int)blog_endpoints[c][0];
+
+				if ((delta0 < min_delta) || (delta0 > max_delta) ||
+					(delta1 < min_delta) || (delta1 > max_delta) ||
+					(delta2 < min_delta) || (delta2 > max_delta))
+				{
+					failed_flag = true;
+					break;
+				}
+
+				log_blk.m_endpoints[c][1] = delta0 & delta_bitmasks[c];
+				log_blk.m_endpoints[c][2] = delta1 & delta_bitmasks[c];
+				log_blk.m_endpoints[c][3] = delta2 & delta_bitmasks[c];
+
+				if (failed_flag)
+					break;
+			}
+			if (failed_flag)
+				continue;
+
+			log_blk.m_mode = mode;
+			log_blk.m_partition_pattern = pat_index;
+			pack_bc6h_block(*pPacked_block, log_blk);
+
+			//half_float blk[16 * 3];
+			//unpack_bc6h(pPacked_block, blk, false);
+
+			return;
+		}
+
+		bc6h_enc_block_2subset_mode9_3bit_weights(pPacked_block, common_part_index, pEndpoints, pWeights);
+	}
+
+	bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3])
+	{
+		assert(g_bc6h_enc_initialized);
+
+		if ((pColor[0] | pColor[1] | pColor[2]) & 0x8000)
+			return false;
+
+		// ASTC block unpacker won't allow Inf/NaN's to come through.
+		//if (is_half_inf_or_nan(pColor[0]) || is_half_inf_or_nan(pColor[1]) || is_half_inf_or_nan(pColor[2]))
+		//	return false;
+
+		uint8_t weights[16];
+		memset(weights, 0, sizeof(weights));
+
+		half_float endpoints[3][2];
+		endpoints[0][0] = pColor[0];
+		endpoints[0][1] = pColor[0];
+				
+		endpoints[1][0] = pColor[1];
+		endpoints[1][1] = pColor[1];
+
+		endpoints[2][0] = pColor[2];
+		endpoints[2][1] = pColor[2];
+				
+		bc6h_enc_block_1subset_4bit_weights(pPacked_block, endpoints, weights);
+
+		return true;
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+	// basisu_astc_hdr_core.cpp
+
+	static bool g_astc_hdr_core_initialized;
+	static int8_t g_astc_partition_id_to_common_bc7_pat_index[1024];
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	void astc_hdr_core_init()
+	{
+		if (g_astc_hdr_core_initialized)
+			return;
+
+		memset(g_astc_partition_id_to_common_bc7_pat_index, 0xFF, sizeof(g_astc_partition_id_to_common_bc7_pat_index));
+
+		for (uint32_t part_index = 0; part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2; ++part_index)
+		{
+			const uint32_t astc_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_astc;
+			//const uint32_t bc7_pattern = basist::g_astc_bc7_common_partitions2[part_index].m_bc7;
+
+			assert(astc_pattern < 1024);
+			g_astc_partition_id_to_common_bc7_pat_index[astc_pattern] = (int8_t)part_index;
+		}
+
+		g_astc_hdr_core_initialized = true;
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	static inline int astc_hdr_sign_extend(int src, int num_src_bits)
+	{
+		assert(basisu::in_range(num_src_bits, 2, 31));
+
+		const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
+		if (negative)
+			return src | ~((1 << num_src_bits) - 1);
+		else
+			return src & ((1 << num_src_bits) - 1);
+	}
+
+	static inline void astc_hdr_pack_bit(
+		int& dst, int dst_bit,
+		int src_val, int src_bit = 0)
+	{
+		assert(dst_bit >= 0 && dst_bit <= 31);
+		int bit = basisu::get_bit(src_val, src_bit);
+		dst |= (bit << dst_bit);
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	void decode_mode7_to_qlog12_ise20(
+		const uint8_t* pEndpoints,
+		int e[2][3],
+		int* pScale)
+	{
+		assert(g_astc_hdr_core_initialized);
+
+		for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
+		{
+			assert(pEndpoints[i] <= 255);
+		}
+
+		const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
+
+		// Extract mode bits and unpack to major component and mode.
+		const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
+
+		int majcomp, mode;
+		if ((modeval & 0xC) != 0xC)
+		{
+			majcomp = modeval >> 2;
+			mode = modeval & 3;
+		}
+		else if (modeval != 0xF)
+		{
+			majcomp = modeval & 3;
+			mode = 4;
+		}
+		else
+		{
+			majcomp = 0;
+			mode = 5;
+		}
+
+		// Extract low-order bits of r, g, b, and s.
+		int red = v0 & 0x3f;
+		int green = v1 & 0x1f;
+		int blue = v2 & 0x1f;
+		int scale = v3 & 0x1f;
+
+		// Extract high-order bits, which may be assigned depending on mode
+		int x0 = (v1 >> 6) & 1;
+		int x1 = (v1 >> 5) & 1;
+		int x2 = (v2 >> 6) & 1;
+		int x3 = (v2 >> 5) & 1;
+		int x4 = (v3 >> 7) & 1;
+		int x5 = (v3 >> 6) & 1;
+		int x6 = (v3 >> 5) & 1;
+
+		// Now move the high-order xs into the right place.
+		const int ohm = 1 << mode;
+		if (ohm & 0x30) green |= x0 << 6;
+		if (ohm & 0x3A) green |= x1 << 5;
+		if (ohm & 0x30) blue |= x2 << 6;
+		if (ohm & 0x3A) blue |= x3 << 5;
+		if (ohm & 0x3D) scale |= x6 << 5;
+		if (ohm & 0x2D) scale |= x5 << 6;
+		if (ohm & 0x04) scale |= x4 << 7;
+		if (ohm & 0x3B) red |= x4 << 6;
+		if (ohm & 0x04) red |= x3 << 6;
+		if (ohm & 0x10) red |= x5 << 7;
+		if (ohm & 0x0F) red |= x2 << 7;
+		if (ohm & 0x05) red |= x1 << 8;
+		if (ohm & 0x0A) red |= x0 << 8;
+		if (ohm & 0x05) red |= x0 << 9;
+		if (ohm & 0x02) red |= x6 << 9;
+		if (ohm & 0x01) red |= x3 << 10;
+		if (ohm & 0x02) red |= x5 << 10;
+
+		// Shift the bits to the top of the 12-bit result.
+		static const int s_shamts[6] = { 1,1,2,3,4,5 };
+
+		const int shamt = s_shamts[mode];
+		red <<= shamt;
+		green <<= shamt;
+		blue <<= shamt;
+		scale <<= shamt;
+
+		// Minor components are stored as differences
+		if (mode != 5)
+		{
+			green = red - green;
+			blue = red - blue;
+		}
+
+		// Swizzle major component into place
+		if (majcomp == 1)
+			std::swap(red, green);
+
+		if (majcomp == 2)
+			std::swap(red, blue);
+
+		// Clamp output values, set alpha to 1.0
+		e[1][0] = basisu::clamp(red, 0, 0xFFF);
+		e[1][1] = basisu::clamp(green, 0, 0xFFF);
+		e[1][2] = basisu::clamp(blue, 0, 0xFFF);
+
+		e[0][0] = basisu::clamp(red - scale, 0, 0xFFF);
+		e[0][1] = basisu::clamp(green - scale, 0, 0xFFF);
+		e[0][2] = basisu::clamp(blue - scale, 0, 0xFFF);
+
+		if (pScale)
+			*pScale = scale;
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	bool decode_mode7_to_qlog12(
+		const uint8_t* pEndpoints,
+		int e[2][3],
+		int* pScale,
+		uint32_t ise_endpoint_range)
+	{
+		assert(g_astc_hdr_core_initialized);
+
+		if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+		{
+			decode_mode7_to_qlog12_ise20(pEndpoints, e, pScale);
+		}
+		else
+		{
+			uint8_t dequantized_endpoints[NUM_MODE7_ENDPOINTS];
+
+			for (uint32_t i = 0; i < NUM_MODE7_ENDPOINTS; i++)
+				dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
+
+			decode_mode7_to_qlog12_ise20(dequantized_endpoints, e, pScale);
+		}
+
+		for (uint32_t i = 0; i < 2; i++)
+		{
+			if (e[i][0] > (int)MAX_QLOG12)
+				return false;
+
+			if (e[i][1] > (int)MAX_QLOG12)
+				return false;
+
+			if (e[i][2] > (int)MAX_QLOG12)
+				return false;
+		}
+
+		return true;
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	void decode_mode11_to_qlog12_ise20(
+		const uint8_t* pEndpoints,
+		int e[2][3])
+	{
+#ifdef _DEBUG
+		for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
+		{
+			assert(pEndpoints[i] <= 255);
+		}
+#endif
+
+		const uint32_t maj_comp = basisu::get_bit(pEndpoints[4], 7) | (basisu::get_bit(pEndpoints[5], 7) << 1);
+
+		if (maj_comp == 3)
+		{
+			// Direct, qlog8 and qlog7
+			e[0][0] = pEndpoints[0] << 4;
+			e[1][0] = pEndpoints[1] << 4;
+
+			e[0][1] = pEndpoints[2] << 4;
+			e[1][1] = pEndpoints[3] << 4;
+
+			e[0][2] = (pEndpoints[4] & 127) << 5;
+			e[1][2] = (pEndpoints[5] & 127) << 5;
+		}
+		else
+		{
+			int v0 = pEndpoints[0];
+			int v1 = pEndpoints[1];
+			int v2 = pEndpoints[2];
+			int v3 = pEndpoints[3];
+			int v4 = pEndpoints[4];
+			int v5 = pEndpoints[5];
+
+			int mode = 0;
+			astc_hdr_pack_bit(mode, 0, v1, 7);
+			astc_hdr_pack_bit(mode, 1, v2, 7);
+			astc_hdr_pack_bit(mode, 2, v3, 7);
+
+			int va = v0;
+			astc_hdr_pack_bit(va, 8, v1, 6);
+
+			int vb0 = v2 & 63;
+			int vb1 = v3 & 63;
+			int vc = v1 & 63;
+
+			int vd0 = v4 & 0x7F; // this takes more bits than is sometimes needed
+			int vd1 = v5 & 0x7F; // this takes more bits than is sometimes needed
+			static const int8_t dbitstab[8] = { 7,6,7,6,5,6,5,6 };
+			vd0 = astc_hdr_sign_extend(vd0, dbitstab[mode]);
+			vd1 = astc_hdr_sign_extend(vd1, dbitstab[mode]);
+
+			int x0 = basisu::get_bit(v2, 6);
+			int x1 = basisu::get_bit(v3, 6);
+			int x2 = basisu::get_bit(v4, 6);
+			int x3 = basisu::get_bit(v5, 6);
+			int x4 = basisu::get_bit(v4, 5);
+			int x5 = basisu::get_bit(v5, 5);
+
+			const uint32_t ohm = 1U << mode;
+			if (ohm & 0xA4) va |= (x0 << 9);
+			if (ohm & 0x08) va |= (x2 << 9);
+			if (ohm & 0x50) va |= (x4 << 9);
+			if (ohm & 0x50) va |= (x5 << 10);
+			if (ohm & 0xA0) va |= (x1 << 10);
+			if (ohm & 0xC0) va |= (x2 << 11);
+			if (ohm & 0x04) vc |= (x1 << 6);
+			if (ohm & 0xE8) vc |= (x3 << 6);
+			if (ohm & 0x20) vc |= (x2 << 7);
+			if (ohm & 0x5B) vb0 |= (x0 << 6);
+			if (ohm & 0x5B) vb1 |= (x1 << 6);
+			if (ohm & 0x12) vb0 |= (x2 << 7);
+			if (ohm & 0x12) vb1 |= (x3 << 7);
+
+			const int shamt = (mode >> 1) ^ 3;
+			
+			va  = (uint32_t)va  << shamt;
+			vb0 = (uint32_t)vb0 << shamt;
+			vb1 = (uint32_t)vb1 << shamt;
+			vc  = (uint32_t)vc  << shamt;
+			vd0 = (uint32_t)vd0 << shamt;
+			vd1 = (uint32_t)vd1 << shamt;
+
+			// qlog12
+			e[1][0] = basisu::clamp<int>(va, 0, 0xFFF);
+			e[1][1] = basisu::clamp<int>(va - vb0, 0, 0xFFF);
+			e[1][2] = basisu::clamp<int>(va - vb1, 0, 0xFFF);
+
+			e[0][0] = basisu::clamp<int>(va - vc, 0, 0xFFF);
+			e[0][1] = basisu::clamp<int>(va - vb0 - vc - vd0, 0, 0xFFF);
+			e[0][2] = basisu::clamp<int>(va - vb1 - vc - vd1, 0, 0xFFF);
+
+			if (maj_comp)
+			{
+				std::swap(e[0][0], e[0][maj_comp]);
+				std::swap(e[1][0], e[1][maj_comp]);
+			}
+		}
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	bool decode_mode11_to_qlog12(
+		const uint8_t* pEndpoints,
+		int e[2][3],
+		uint32_t ise_endpoint_range)
+	{
+		assert(g_astc_hdr_core_initialized);
+		assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
+
+		if (ise_endpoint_range == astc_helpers::BISE_256_LEVELS)
+		{
+			decode_mode11_to_qlog12_ise20(pEndpoints, e);
+		}
+		else
+		{
+			uint8_t dequantized_endpoints[NUM_MODE11_ENDPOINTS];
+
+			for (uint32_t i = 0; i < NUM_MODE11_ENDPOINTS; i++)
+				dequantized_endpoints[i] = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_val[pEndpoints[i]];
+
+			decode_mode11_to_qlog12_ise20(dequantized_endpoints, e);
+		}
+
+		for (uint32_t i = 0; i < 2; i++)
+		{
+			if (e[i][0] > (int)MAX_QLOG12)
+				return false;
+
+			if (e[i][1] > (int)MAX_QLOG12)
+				return false;
+
+			if (e[i][2] > (int)MAX_QLOG12)
+				return false;
+		}
+
+		return true;
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	bool transcode_bc6h_1subset(half_float h_e[3][2], const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
+	{
+		assert(g_astc_hdr_core_initialized);
+		assert((best_blk.m_weight_ise_range >= 1) && (best_blk.m_weight_ise_range <= 8));
+		
+		if (best_blk.m_weight_ise_range == 5)
+		{
+			// Use 3-bit BC6H weights which are a perfect match for 3-bit ASTC weights, but encode 1-subset as 2 equal subsets
+			bc6h_enc_block_1subset_3bit_weights(&transcoded_bc6h_blk, h_e, best_blk.m_weights);
+		}
+		else
+		{
+			uint8_t bc6h_weights[16];
+
+			if (best_blk.m_weight_ise_range == 1)
+			{
+				// weight ISE 1: 3 levels
+				static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 8, 15 };
+
+				for (uint32_t i = 0; i < 16; i++)
+					bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
+			}
+			else if (best_blk.m_weight_ise_range == 2)
+			{
+				// weight ISE 2: 4 levels
+				static const uint8_t s_astc2_to_bc6h_4[4] = { 0, 5, 10, 15 };
+
+				for (uint32_t i = 0; i < 16; i++)
+					bc6h_weights[i] = s_astc2_to_bc6h_4[best_blk.m_weights[i]];
+			}
+			else if (best_blk.m_weight_ise_range == 3)
+			{
+				// weight ISE 3: 5 levels
+				static const uint8_t s_astc3_to_bc6h_4[5] = { 0, 4, 7, 11, 15 };
+
+				for (uint32_t i = 0; i < 16; i++)
+					bc6h_weights[i] = s_astc3_to_bc6h_4[best_blk.m_weights[i]];
+			}
+			else if (best_blk.m_weight_ise_range == 4)
+			{
+				// weight ISE 4: 6 levels
+				static const uint8_t s_astc4_to_bc6h_4[6] = { 0, 15, 3, 12, 6, 9 };
+
+				for (uint32_t i = 0; i < 16; i++)
+					bc6h_weights[i] = s_astc4_to_bc6h_4[best_blk.m_weights[i]];
+			}
+			else if (best_blk.m_weight_ise_range == 6)
+			{
+				// weight ISE 6: 10 levels
+				static const uint8_t s_astc6_to_bc6h_4[10] = { 0, 15, 2, 13, 3, 12, 5, 10, 6, 9 };
+
+				for (uint32_t i = 0; i < 16; i++)
+					bc6h_weights[i] = s_astc6_to_bc6h_4[best_blk.m_weights[i]];
+			}
+			else if (best_blk.m_weight_ise_range == 7)
+			{
+				// weight ISE 7: 12 levels
+				static const uint8_t s_astc7_to_bc6h_4[12] = { 0, 15, 4, 11, 1, 14, 5, 10, 2, 13, 6, 9 };
+
+				for (uint32_t i = 0; i < 16; i++)
+					bc6h_weights[i] = s_astc7_to_bc6h_4[best_blk.m_weights[i]];
+			}
+			else if (best_blk.m_weight_ise_range == 8)
+			{
+				// 16 levels
+				memcpy(bc6h_weights, best_blk.m_weights, 16);
+			}
+			else
+			{
+				assert(0);
+				return false;
+			}
+
+			bc6h_enc_block_1subset_4bit_weights(&transcoded_bc6h_blk, h_e, bc6h_weights);
+		}
+
+		return true;
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	bool transcode_bc6h_2subsets(uint32_t common_part_index, const astc_helpers::log_astc_block& best_blk, bc6h_block& transcoded_bc6h_blk)
+	{
+		assert(g_astc_hdr_core_initialized);
+		assert(best_blk.m_num_partitions == 2);
+		assert(common_part_index < basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+		
+		half_float bc6h_endpoints[2][3][2]; // [subset][comp][lh_index]
+
+		// UASTC HDR checks
+		// Both CEM's must be equal in 2-subset UASTC HDR.
+		if (best_blk.m_color_endpoint_modes[0] != best_blk.m_color_endpoint_modes[1])
+			return false;
+		if ((best_blk.m_color_endpoint_modes[0] != 7) && (best_blk.m_color_endpoint_modes[0] != 11))
+			return false;
+				
+		if (best_blk.m_color_endpoint_modes[0] == 7)
+		{
+			if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 20)) ||
+		 		  ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 20)) ||
+				  ((best_blk.m_weight_ise_range == 3) && (best_blk.m_endpoint_ise_range == 19)) ||
+				  ((best_blk.m_weight_ise_range == 4) && (best_blk.m_endpoint_ise_range == 17)) ||
+				  ((best_blk.m_weight_ise_range == 5) && (best_blk.m_endpoint_ise_range == 15))))
+			{
+				return false;
+			}
+		}
+		else
+		{
+			if (!(((best_blk.m_weight_ise_range == 1) && (best_blk.m_endpoint_ise_range == 14)) ||
+				  ((best_blk.m_weight_ise_range == 2) && (best_blk.m_endpoint_ise_range == 12))))
+			{
+				return false;
+			}
+		}
+
+		for (uint32_t s = 0; s < 2; s++)
+		{
+			int e[2][3];
+			if (best_blk.m_color_endpoint_modes[0] == 7)
+			{
+				bool success = decode_mode7_to_qlog12(best_blk.m_endpoints + s * NUM_MODE7_ENDPOINTS, e, nullptr, best_blk.m_endpoint_ise_range);
+				if (!success)
+					return false;
+			}
+			else
+			{
+				bool success = decode_mode11_to_qlog12(best_blk.m_endpoints + s * NUM_MODE11_ENDPOINTS, e, best_blk.m_endpoint_ise_range);
+				if (!success)
+					return false;
+			}
+
+			for (uint32_t c = 0; c < 3; c++)
+			{
+				bc6h_endpoints[s][c][0] = qlog_to_half_slow(e[0][c], 12);
+				if (is_half_inf_or_nan(bc6h_endpoints[s][c][0]))
+					return false;
+
+				bc6h_endpoints[s][c][1] = qlog_to_half_slow(e[1][c], 12);
+				if (is_half_inf_or_nan(bc6h_endpoints[s][c][1]))
+					return false;
+			}
+		}
+
+		uint8_t bc6h_weights[16];
+		if (best_blk.m_weight_ise_range == 1)
+		{
+			static const uint8_t s_astc1_to_bc6h_3[3] = { 0, 4, 7 };
+
+			for (uint32_t i = 0; i < 16; i++)
+				bc6h_weights[i] = s_astc1_to_bc6h_3[best_blk.m_weights[i]];
+		}
+		else if (best_blk.m_weight_ise_range == 2)
+		{
+			static const uint8_t s_astc2_to_bc6h_3[4] = { 0, 2, 5, 7 };
+
+			for (uint32_t i = 0; i < 16; i++)
+				bc6h_weights[i] = s_astc2_to_bc6h_3[best_blk.m_weights[i]];
+		}
+		else if (best_blk.m_weight_ise_range == 3)
+		{
+			static const uint8_t s_astc3_to_bc6h_3[5] = { 0, 2, 4, 5, 7 };
+
+			for (uint32_t i = 0; i < 16; i++)
+				bc6h_weights[i] = s_astc3_to_bc6h_3[best_blk.m_weights[i]];
+		}
+		else if (best_blk.m_weight_ise_range == 4)
+		{
+			static const uint8_t s_astc4_to_bc6h_3[6] = { 0, 7, 1, 6, 3, 4 };
+
+			for (uint32_t i = 0; i < 16; i++)
+				bc6h_weights[i] = s_astc4_to_bc6h_3[best_blk.m_weights[i]];
+		}
+		else if (best_blk.m_weight_ise_range == 5)
+		{
+			memcpy(bc6h_weights, best_blk.m_weights, 16);
+		}
+		else
+		{
+			assert(0);
+			return false;
+		}
+
+		bc6h_enc_block_2subset_3bit_weights(&transcoded_bc6h_blk, common_part_index, bc6h_endpoints, bc6h_weights);
+
+		return true;
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+	// Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
+	bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk)
+	{
+		assert(g_astc_hdr_core_initialized);
+		if (!g_astc_hdr_core_initialized)
+		{
+			assert(0);
+			return false;
+		}
+
+		astc_helpers::log_astc_block log_blk;
+
+		if (!astc_helpers::unpack_block(&src_blk, log_blk, 4, 4))
+		{
+			// Failed unpacking ASTC data
+			return false;
+		}
+
+		return astc_hdr_transcode_to_bc6h(log_blk, dst_blk);
+	}
+
+	//--------------------------------------------------------------------------------------------------------------------------
+	// Transcodes an UASTC HDR block to BC6H. Must have been encoded to UASTC HDR, or this fails.
+	bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk)
+	{
+		assert(g_astc_hdr_core_initialized);
+		if (!g_astc_hdr_core_initialized)
+		{
+			assert(0);
+			return false;
+		}
+				
+		if (log_blk.m_solid_color_flag_ldr)
+		{
+			// Don't support LDR solid colors.
+			return false;
+		}
+
+		if (log_blk.m_solid_color_flag_hdr)
+		{
+			// Solid color HDR block
+			return bc6h_enc_block_solid_color(&dst_blk, log_blk.m_solid_color);
+		}
+
+		// Only support 4x4 grid sizes
+		if ((log_blk.m_grid_width != 4) || (log_blk.m_grid_height != 4))
+			return false;
+				
+		// Don't support dual plane encoding
+		if (log_blk.m_dual_plane)
+			return false;
+
+		if (log_blk.m_num_partitions == 1)
+		{
+			// Handle 1 partition (or subset)
+			
+			// UASTC HDR checks
+			if ((log_blk.m_weight_ise_range < 1) || (log_blk.m_weight_ise_range > 8))
+				return false;
+									
+			int e[2][3];
+			bool success;
+
+			if (log_blk.m_color_endpoint_modes[0] == 7)
+			{
+				if (log_blk.m_endpoint_ise_range != 20)
+					return false;
+
+				success = decode_mode7_to_qlog12(log_blk.m_endpoints, e, nullptr, log_blk.m_endpoint_ise_range);
+			}
+			else if (log_blk.m_color_endpoint_modes[0] == 11)
+			{
+				// UASTC HDR checks
+				if (log_blk.m_weight_ise_range <= 7)
+				{
+					if (log_blk.m_endpoint_ise_range != 20)
+						return false;
+				}
+				else if (log_blk.m_endpoint_ise_range != 19)
+				{
+					return false;
+				}
+
+				success = decode_mode11_to_qlog12(log_blk.m_endpoints, e, log_blk.m_endpoint_ise_range);
+			}
+			else
+			{
+				return false;
+			}
+
+			if (!success)
+				return false;
+
+			// Transform endpoints to half float
+			half_float h_e[3][2] =
+			{
+				{ qlog_to_half_slow(e[0][0], 12), qlog_to_half_slow(e[1][0], 12) },
+				{ qlog_to_half_slow(e[0][1], 12), qlog_to_half_slow(e[1][1], 12) },
+				{ qlog_to_half_slow(e[0][2], 12), qlog_to_half_slow(e[1][2], 12) }
+			};
+
+			// Sanity check for NaN/Inf
+			for (uint32_t i = 0; i < 2; i++)
+				if (is_half_inf_or_nan(h_e[0][i]) || is_half_inf_or_nan(h_e[1][i]) || is_half_inf_or_nan(h_e[2][i]))
+					return false;
+			
+			// Transcode to bc6h
+			if (!transcode_bc6h_1subset(h_e, log_blk, dst_blk))
+				return false;
+		}
+		else if (log_blk.m_num_partitions == 2)
+		{
+			// Handle 2 partition (or subset)
+			int common_bc7_pat_index = g_astc_partition_id_to_common_bc7_pat_index[log_blk.m_partition_id];
+			if (common_bc7_pat_index < 0)
+				return false;
+
+			assert(common_bc7_pat_index < (int)basist::TOTAL_ASTC_BC6H_COMMON_PARTITIONS2);
+						
+			if (!transcode_bc6h_2subsets(common_bc7_pat_index, log_blk, dst_blk))
+				return false;
+		}
+		else
+		{
+			// Only supports 1 or 2 partitions (or subsets)
+			return false;
+		}
+
+		return true;
+	}
+#endif // BASISD_SUPPORT_UASTC_HDR
+
 } // namespace basist
diff --git a/transcoder/basisu_transcoder.h b/transcoder/basisu_transcoder.h
index 3327e8d..8324e99 100644
--- a/transcoder/basisu_transcoder.h
+++ b/transcoder/basisu_transcoder.h
@@ -1,5 +1,5 @@
 // basisu_transcoder.h
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
 // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -29,6 +29,7 @@
 
 // Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development.
 #ifndef BASISU_FORCE_DEVEL_MESSAGES
+	// TODO - disable before checking in
 	#define BASISU_FORCE_DEVEL_MESSAGES 0
 #endif
 
@@ -55,7 +56,7 @@
 		cTFETC2_RGBA = 1,							// Opaque+alpha, ETC2_EAC_A8 block followed by a ETC1 block, alpha channel will be opaque for opaque .basis files
 
 		// BC1-5, BC7 (desktop, some mobile devices)
-		cTFBC1_RGB = 2,							// Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
+		cTFBC1_RGB = 2,								// Opaque only, no punchthrough alpha support yet, transcodes alpha slice if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
 		cTFBC3_RGBA = 3, 							// Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files
 		cTFBC4_R = 4,								// Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified
 		cTFBC5_RG = 5,								// XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's)
@@ -63,10 +64,11 @@
 
 		// PVRTC1 4bpp (mobile, PowerVR devices)
 		cTFPVRTC1_4_RGB = 8,						// Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format.
-		cTFPVRTC1_4_RGBA = 9,					// Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
+		cTFPVRTC1_4_RGBA = 9,						// Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format.
 
 		// ASTC (mobile, Intel devices, hopefully all desktop GPU's one day)
-		cTFASTC_4x4_RGBA = 10,					// Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
+		cTFASTC_4x4_RGBA = 10,						// LDR. Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. 
+													// LDR: Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions.
 
 		// ATC (mobile, Adreno devices, this is a niche format)
 		cTFATC_RGB = 11,							// Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD)
@@ -74,8 +76,8 @@
 
 		// FXT1 (desktop, Intel devices, this is a super obscure format)
 		cTFFXT1_RGB = 17,							// Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630).
-														// Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
-														// See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
+													// Punch-through alpha is relatively easy to support, but full alpha is harder. This format is only here for completeness so opaque-only is fine for now.
+													// See the BASISU_USE_ORIGINAL_3DFX_FXT1_ENCODING macro in basisu_transcoder_internal.h.
 
 		cTFPVRTC2_4_RGB = 18,					// Opaque-only, almost BC1 quality, much faster to transcode and supports arbitrary texture dimensions (unlike PVRTC1 RGB).
 		cTFPVRTC2_4_RGBA = 19,					// Opaque+alpha, slower to encode than cTFPVRTC2_4_RGB. Premultiplied alpha is highly recommended, otherwise the color channel can leak into the alpha channel on transparent blocks.
@@ -83,13 +85,22 @@
 		cTFETC2_EAC_R11 = 20,					// R only (ETC2 EAC R11 unsigned)
 		cTFETC2_EAC_RG11 = 21,					// RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps
 
+		cTFBC6H = 22,							// HDR, RGB only, unsigned
+		cTFASTC_HDR_4x4_RGBA = 23,				// HDR, RGBA (currently UASTC HDR is only RGB), unsigned
+
 		// Uncompressed (raw pixel) formats
+		// Note these uncompressed formats (RGBA32, 565, and 4444) can only be transcoded to from LDR input files (ETC1S or UASTC LDR).
 		cTFRGBA32 = 13,							// 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte.
 		cTFRGB565 = 14,							// 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11
 		cTFBGR565 = 15,							// 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0
-		cTFRGBA4444 = 16,							// 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
+		cTFRGBA4444 = 16,						// 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
+		
+		// Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR).
+		cTFRGB_HALF = 24,						// 48bpp RGB half (16-bits/component, 3 components)
+		cTFRGBA_HALF = 25,						// 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha)
+		cTFRGB_9E5 = 26,						// 32bpp RGB 9E5 (shared exponent, positive only, see GL_EXT_texture_shared_exponent)
 
-		cTFTotalTextureFormats = 22,
+		cTFTotalTextureFormats = 27,
 
 		// Old enums for compatibility with code compiled against previous versions
 		cTFETC1 = cTFETC1_RGB,
@@ -124,6 +135,9 @@
 	// Returns true if the format supports an alpha channel.
 	bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt);
 
+	// Returns true if the format is HDR.
+	bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt);
+
 	// Returns the basisu::texture_format corresponding to the specified transcoder_texture_format.
 	basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt);
 
@@ -142,7 +156,7 @@
 	// Returns the block height for the specified texture format, which is currently always 4.
 	uint32_t basis_get_block_height(transcoder_texture_format tex_type);
 
-	// Returns true if the specified format was enabled at compile time.
+	// Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR).
 	bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S);
 
 	// Validates that the output buffer is large enough to hold the entire transcoded texture.
@@ -317,6 +331,42 @@
 			int channel0 = -1, int channel1 = -1);
 	};
 
+	class basisu_lowlevel_uastc_hdr_transcoder
+	{
+		friend class basisu_transcoder;
+
+	public:
+		basisu_lowlevel_uastc_hdr_transcoder();
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0);
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0)
+		{
+			return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt,
+				output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels,
+				pState, output_rows_in_pixels, channel0, channel1, decode_flags);
+		}
+
+		// Container independent transcoding
+		bool transcode_image(
+			transcoder_texture_format target_format,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+			uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+			uint32_t slice_offset, uint32_t slice_length,
+			uint32_t decode_flags = 0,
+			bool has_alpha = false,
+			bool is_video = false,
+			uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr,
+			uint32_t output_rows_in_pixels = 0,
+			int channel0 = -1, int channel1 = -1);
+	};
+
 	struct basisu_slice_info
 	{
 		uint32_t m_orig_width;
@@ -530,6 +580,7 @@
 	private:
 		mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder;
 		mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder;
+		mutable basisu_lowlevel_uastc_hdr_transcoder m_lowlevel_uastc_hdr_decoder;
 
 		bool m_ready_to_transcode;
 
@@ -612,10 +663,12 @@
 #pragma pack(pop)
 
 	const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0;
+	const uint32_t KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK = 1000066000; // TODO, is this correct?
 	const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166;
+	const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR = 167;
 	const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163;
 	const uint32_t KTX2_IMAGE_IS_P_FRAME = 2;
-	const uint32_t KTX2_UASTC_BLOCK_SIZE = 16;
+	const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR
 	const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased
 
 	// The KTX2 transfer functions supported by KTX2
@@ -800,13 +853,15 @@
 		// Returns 0 or the number of layers in the texture array or texture video. Valid after init().
 		uint32_t get_layers() const { return m_header.m_layer_count; }
 
-		// Returns cETC1S or cUASTC4x4. Valid after init().
+		// Returns cETC1S, cUASTC4x4, or cUASTC_HDR_4x4. Valid after init().
 		basist::basis_tex_format get_format() const { return m_format; } 
-
+				
 		bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; }
 
 		bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; }
 
+		bool is_hdr() const { return get_format() == basist::basis_tex_format::cUASTC_HDR_4x4; }
+
 		// Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init().
 		uint32_t get_has_alpha() const { return m_has_alpha; }
 
@@ -913,6 +968,7 @@
 								
 		basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder;
 		basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder;
+		basist::basisu_lowlevel_uastc_hdr_transcoder m_uastc_hdr_transcoder;
 				
 		ktx2_transcoder_state m_def_transcoder_state;
 
diff --git a/transcoder/basisu_transcoder_internal.h b/transcoder/basisu_transcoder_internal.h
index aa3bee3..17c9dc7 100644
--- a/transcoder/basisu_transcoder_internal.h
+++ b/transcoder/basisu_transcoder_internal.h
@@ -1,5 +1,5 @@
 // basisu_transcoder_internal.h - Universal texture format transcoder library.
-// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2019-2024 Binomial LLC. All Rights Reserved.
 //
 // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing
 //
@@ -20,8 +20,9 @@
 #pragma warning (disable: 4127) //  conditional expression is constant
 #endif
 
-#define BASISD_LIB_VERSION 116
-#define BASISD_VERSION_STRING "01.16"
+// v1.50: Added UASTC HDR support
+#define BASISD_LIB_VERSION 150
+#define BASISD_VERSION_STRING "01.50"
 
 #ifdef _DEBUG
 #define BASISD_BUILD_DEBUG
@@ -82,9 +83,15 @@
 		cRGBA4444_ALPHA,
 		cRGBA4444_COLOR_OPAQUE,
 		cRGBA4444,
+		cRGBA_HALF,
+		cRGB_HALF,
+		cRGB_9E5,
 
-		cUASTC_4x4,
-						
+		cUASTC_4x4,							// LDR, universal
+		cUASTC_HDR_4x4,						// HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed
+		cBC6H,
+		cASTC_HDR_4x4,
+								
 		cTotalBlockFormats
 	};
 
@@ -789,7 +796,198 @@
 	};
 
 	bool basis_block_format_is_uncompressed(block_format tex_type);
-	
+
+	//------------------------------------
+
+	typedef uint16_t half_float;
+
+	const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number
+	const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number
+	const double MAX_HALF_FLOAT = 65504.0; // largest normal number
+
+	inline uint32_t get_bits(uint32_t val, int low, int high)
+	{
+		const int num_bits = (high - low) + 1;
+		assert((num_bits >= 1) && (num_bits <= 32));
+
+		val >>= low;
+		if (num_bits != 32)
+			val &= ((1u << num_bits) - 1);
+
+		return val;
+	}
+
+	inline bool is_half_inf_or_nan(half_float v)
+	{
+		return get_bits(v, 10, 14) == 31;
+	}
+
+	inline bool is_half_denorm(half_float v)
+	{
+		int e = (v >> 10) & 31;
+		return !e;
+	}
+
+	inline int get_half_exp(half_float v)
+	{
+		int e = ((v >> 10) & 31);
+		return e ? (e - 15) : -14;
+	}
+
+	inline int get_half_mantissa(half_float v)
+	{
+		if (is_half_denorm(v))
+			return v & 0x3FF;
+		return (v & 0x3FF) | 0x400;
+	}
+
+	inline float get_half_mantissaf(half_float v)
+	{
+		return ((float)get_half_mantissa(v)) / 1024.0f;
+	}
+
+	inline int get_half_sign(half_float v)
+	{
+		return v ? ((v & 0x8000) ? -1 : 1) : 0;
+	}
+
+	inline bool half_is_signed(half_float v)
+	{
+		return (v & 0x8000) != 0;
+	}
+
+#if 0
+	int hexp = get_half_exp(Cf);
+	float hman = get_half_mantissaf(Cf);
+	int hsign = get_half_sign(Cf);
+	float k = powf(2.0f, hexp) * hman * hsign;
+	if (is_half_inf_or_nan(Cf))
+		k = std::numeric_limits<float>::quiet_NaN();
+#endif
+
+	half_float float_to_half(float val);
+
+	inline float half_to_float(half_float hval)
+	{
+		union { float f; uint32_t u; } x = { 0 };
+
+		uint32_t s = ((uint32_t)hval >> 15) & 1;
+		uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
+		uint32_t m = (uint32_t)hval & 0x3FF;
+
+		if (!e)
+		{
+			if (!m)
+			{
+				// +- 0
+				x.u = s << 31;
+				return x.f;
+			}
+			else
+			{
+				// denormalized
+				while (!(m & 0x00000400))
+				{
+					m <<= 1;
+					--e;
+				}
+
+				++e;
+				m &= ~0x00000400;
+			}
+		}
+		else if (e == 31)
+		{
+			if (m == 0)
+			{
+				// +/- INF
+				x.u = (s << 31) | 0x7f800000;
+				return x.f;
+			}
+			else
+			{
+				// +/- NaN
+				x.u = (s << 31) | 0x7f800000 | (m << 13);
+				return x.f;
+			}
+		}
+
+		e = e + (127 - 15);
+		m = m << 13;
+
+		assert(s <= 1);
+		assert(m <= 0x7FFFFF);
+		assert(e <= 255);
+
+		x.u = m | (e << 23) | (s << 31);
+		return x.f;
+	}
+
+	// Originally from bc6h_enc.h
+
+	void bc6h_enc_init();
+
+	const uint32_t MAX_BLOG16_VAL = 0xFFFF;
+
+	// BC6H internals
+	const uint32_t NUM_BC6H_MODES = 14;
+	const uint32_t BC6H_LAST_MODE_INDEX = 13;
+	const uint32_t BC6H_FIRST_1SUBSET_MODE_INDEX = 10; // in the MS docs, this is "mode 11" (where the first mode is 1), 60 bits for endpoints (10.10, 10.10, 10.10), 63 bits for weights
+	const uint32_t TOTAL_BC6H_PARTITION_PATTERNS = 32;
+
+	extern const uint8_t g_bc6h_mode_sig_bits[NUM_BC6H_MODES][4]; // base, r, g, b
+
+	struct bc6h_bit_layout
+	{
+		int8_t m_comp; // R=0,G=1,B=2,D=3 (D=partition index)
+		int8_t m_index; // 0-3, 0-1 Low/High subset 1, 2-3 Low/High subset 2, -1=partition index (d)
+		int8_t m_last_bit;
+		int8_t m_first_bit; // may be -1 if a single bit, may be >m_last_bit if reversed
+	};
+
+	const uint32_t MAX_BC6H_LAYOUT_INDEX = 25;
+	extern const bc6h_bit_layout g_bc6h_bit_layouts[NUM_BC6H_MODES][MAX_BC6H_LAYOUT_INDEX];
+
+	extern const uint8_t g_bc6h_2subset_patterns[TOTAL_BC6H_PARTITION_PATTERNS][4][4]; // [y][x]
+
+	extern const uint8_t g_bc6h_weight3[8];
+	extern const uint8_t g_bc6h_weight4[16];
+
+	extern const int8_t g_bc6h_mode_lookup[32];
+		
+	// Converts b16 to half float
+	inline half_float bc6h_blog16_to_half(uint32_t comp)
+	{
+		assert(comp <= 0xFFFF);
+
+		// scale the magnitude by 31/64
+		comp = (comp * 31u) >> 6u;
+		return (half_float)comp;
+	}
+
+	const uint32_t MAX_BC6H_HALF_FLOAT_AS_UINT = 0x7BFF;
+
+	// Inverts bc6h_blog16_to_half().
+	// Returns the nearest blog16 given a half value. 
+	inline uint32_t bc6h_half_to_blog16(half_float h)
+	{
+		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+		return (h * 64 + 30) / 31;
+	}
+
+	struct bc6h_block
+	{
+		uint8_t m_bytes[16];
+	};
+
+	void bc6h_enc_block_mode10(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+	void bc6h_enc_block_1subset_4bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+	void bc6h_enc_block_1subset_mode9_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+	void bc6h_enc_block_1subset_3bit_weights(bc6h_block* pPacked_block, const half_float pEndpoints[3][2], const uint8_t* pWeights);
+	void bc6h_enc_block_2subset_mode9_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
+	void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
+	bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);
+		
 } // namespace basist
 
 
diff --git a/transcoder/basisu_transcoder_tables_dxt1_5.inc b/transcoder/basisu_transcoder_tables_dxt1_5.inc
index 8244550..205758b 100644
--- a/transcoder/basisu_transcoder_tables_dxt1_5.inc
+++ b/transcoder/basisu_transcoder_tables_dxt1_5.inc
@@ -1,4 +1,4 @@
-// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/transcoder/basisu_transcoder_tables_dxt1_6.inc b/transcoder/basisu_transcoder_tables_dxt1_6.inc
index fad45fe..f2d324f 100644
--- a/transcoder/basisu_transcoder_tables_dxt1_6.inc
+++ b/transcoder/basisu_transcoder_tables_dxt1_6.inc
@@ -1,4 +1,4 @@
-// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+// Copyright (C) 2017-2024 Binomial LLC. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/transcoder/basisu_transcoder_uastc.h b/transcoder/basisu_transcoder_uastc.h
index f91314f..457bd51 100644
--- a/transcoder/basisu_transcoder_uastc.h
+++ b/transcoder/basisu_transcoder_uastc.h
@@ -13,6 +13,7 @@
 	const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8;
 
 	const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30;
+	const uint32_t TOTAL_ASTC_BC6H_COMMON_PARTITIONS2 = 27; // BC6H only supports only 5-bit pattern indices, BC7 supports 4-bit or 6-bit
 	const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11;
 	const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19;