Merge pull request #234 from omar-polo/master

fix the build on OpenBSD
diff --git a/README.md b/README.md
index 17a76cc..1f97139 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,8 @@
 
 The command line tool used to create, validate, and transcode/unpack .basis/.KTX2 files is named "basisu". Run basisu without any parameters for help. 
 
+The library and command line tool have no other 3rd party dependencies (that are not already in the repo), so it's pretty easy to build.
+
 To build basisu (without SSE 4.1 support - the default):
 
 ```
@@ -86,11 +88,11 @@
 
 To compress a image to a higher quality UASTC .basis file with RDO post processing, so the .basis file is more compressible:
 
-`basisu -uastc -uastc_level 2 -uastc_rdo_q .75 x.png`
+`basisu -uastc -uastc_level 2 -uastc_rdo_l .75 x.png`
 
 -uastc_level X ranges from 0-4 and controls the UASTC encoder's performance vs. quality tradeoff. Level 0 is very fast, but low quality, level 2 is the default quality, while level 3 is the highest practical quality. Level 4 is impractically slow, but highest quality.
 
--uastc_rdo_q X controls the rate distortion stage's quality setting. The lower this value, the higher the quality, but the larger the compressed file size. Good values to try are between .2-3.0. The default is 1.0. RDO post-processing is currently pretty slow, but we'll be optimizing it over time.
+-uastc_rdo_l X controls the rate distortion stage's quality setting. The lower this value, the higher the quality, but the larger the compressed file size. Good values to try are between .2-3.0. The default is 1.0. RDO post-processing is currently pretty slow, but we'll be optimizing it over time.
 
 UASTC texture video is supported and has been tested. In RDO mode with 7zip LZMA, we've seen average bitrates between 1-2 bpp. ETC1S mode is recommended for texture video, which gets bitrates around .25-.3 bpp.
 
diff --git a/contrib/single_file_transcoder/basisu_transcoder-in.cpp b/contrib/single_file_transcoder/basisu_transcoder-in.cpp
index 19d3589..830cd35 100644
--- a/contrib/single_file_transcoder/basisu_transcoder-in.cpp
+++ b/contrib/single_file_transcoder/basisu_transcoder-in.cpp
@@ -1,17 +1,14 @@
 /**
  * Basis Universal single file library. Generated using:
  * \code
- *	./combine.sh -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -o basisu_transcoder.cpp basisu_transcoder-in.cpp
+ *	./combine.sh -r ../../transcoder -o basisu_transcoder.cpp basisu_transcoder-in.cpp
  * \endcode
- * 
- * \note The script above excludes the BC7 mode 6 tables, a choice reflected in
- * the build options.
  */
 
 /*
  * Transcoder build options for known platforms (iOS has ETC, ASTC and PVRTC;
  * Emscripten adds DXT to iOS's options; Android adds PVRTC2 to Emscripten's
- * options; other platforms build all except BC7 mode 6 and FXT1).
+ * options; other platforms build all except FXT1).
  * 
  * See https://github.com/BinomialLLC/basis_universal#shrinking-the-transcoders-compiled-size
  */
@@ -28,11 +25,14 @@
 	#ifndef __ANDROID__
 		#define BASISD_SUPPORT_PVRTC2 0
 	#endif
-#else
-	#define BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY 0
 #endif
 #define BASISD_SUPPORT_FXT1 0
 
+/*
+ * KTX2 support disabled.
+ */
+#define BASISD_SUPPORT_KTX2 0
+
 #include "basisu_transcoder.cpp"
 
 /**
diff --git a/contrib/single_file_transcoder/create_transcoder.sh b/contrib/single_file_transcoder/create_transcoder.sh
index 160f5fd..a041d2a 100755
--- a/contrib/single_file_transcoder/create_transcoder.sh
+++ b/contrib/single_file_transcoder/create_transcoder.sh
@@ -4,8 +4,7 @@
 OUT_FILE="tempbin"
 
 echo "Amalgamating files... this can take a while"
-echo "Note: basisu_transcoder_tables_bc7_m6.inc is excluded"
-./combine.sh -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -o basisu_transcoder.cpp basisu_transcoder-in.cpp
+./combine.sh -r ../../transcoder -o basisu_transcoder.cpp basisu_transcoder-in.cpp
 # Did combining work?
 if [ $? -ne 0 ]; then
   echo "Combine script: FAILED"
diff --git a/contrib/single_file_transcoder/examples/emscripten.cpp b/contrib/single_file_transcoder/examples/emscripten.cpp
index 740bd26..e56ae6a 100644
--- a/contrib/single_file_transcoder/examples/emscripten.cpp
+++ b/contrib/single_file_transcoder/examples/emscripten.cpp
@@ -5,8 +5,8 @@
  * \n
  * Compile using:
  * \code
- *	export CC_FLAGS="-std=c++11 -Wall -Wextra -Werror -Os -g0 -flto --llvm-lto 3 -fno-exceptions -fno-rtti -lGL -DNDEBUG=1"
- *	export EM_FLAGS="-s ENVIRONMENT=web -s WASM=1 --shell-file shell.html --closure 1"
+ *	export "CC_FLAGS=-std=c++11 -Wall -Wextra -Werror -Os -g0 -flto --llvm-lto 3 -fno-exceptions -fno-rtti -lGL -DNDEBUG=1"
+ *	export "EM_FLAGS=-s ENVIRONMENT=web -s WASM=1 --shell-file shell.html --closure 1"
  *	emcc $CC_FLAGS $EM_FLAGS -o out.html emscripten.cpp
  * \endcode
  * Alternatively include \c basisu_transcoder.h and compile \c
diff --git a/encoder/apg_bmp.c b/encoder/apg_bmp.c
index ef3d015..d342b20 100644
--- a/encoder/apg_bmp.c
+++ b/encoder/apg_bmp.c
@@ -247,7 +247,7 @@
   }
 
   // allocate memory for the output pixels block. cast to size_t in case width and height are both the max of 65536 and n_dst_chans > 1
-  unsigned char* dst_img_ptr = malloc( (size_t)width * (size_t)height * (size_t)n_dst_chans );
+  unsigned char* dst_img_ptr = (unsigned char*)malloc( (size_t)width * (size_t)height * (size_t)n_dst_chans );
   if ( !dst_img_ptr ) {
     free( record.data );
     return NULL;
@@ -480,7 +480,7 @@
     dib_hdr.bitmask_b = 0x0000FF00;
   }
 
-  uint8_t* dst_pixels_ptr = malloc( dst_pixels_padded_sz );
+  uint8_t* dst_pixels_ptr = (uint8_t*)malloc( dst_pixels_padded_sz );
   if ( !dst_pixels_ptr ) { return 0; }
   {
     size_t dst_byte_idx = 0;
diff --git a/encoder/basisu_bc7enc.cpp b/encoder/basisu_bc7enc.cpp
index 06aa7eb..22fdfa6 100644
--- a/encoder/basisu_bc7enc.cpp
+++ b/encoder/basisu_bc7enc.cpp
@@ -174,9 +174,8 @@
 	} // range
 }
 
-static inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w)
+static inline uint32_t astc_interpolate_linear(uint32_t l, uint32_t h, uint32_t w)
 {
-	// This is for linear values, not sRGB.
 	l = (l << 8) | l;
 	h = (h << 8) | h;
 	uint32_t k = (l * (64 - w) + h * w + 32) >> 6;
@@ -230,7 +229,7 @@
 			{
 				uint32_t high = (h << 4) | h;
 				
-				const int k = astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -259,7 +258,7 @@
 			{
 				uint32_t high = (h << 4) | h;
 				
-				const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -288,7 +287,7 @@
 			{
 				uint32_t high = g_astc_sorted_order_unquant[7][h].m_unquant;
 				
-				const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -317,7 +316,7 @@
 			{
 				uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant;
 				
-				const int k = astc_interpolate(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -346,7 +345,7 @@
 			{
 				uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant;
 				
-				const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -375,7 +374,7 @@
 			{
 				uint32_t high = g_astc_sorted_order_unquant[11][h].m_unquant;
 
-				const int k = astc_interpolate(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]);
+				const int k = astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]);
 				const int err = (k - c) * (k - c);
 
 				if (err < best.m_error)
@@ -650,7 +649,7 @@
 		uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i];
 		uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i];
 		
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]);
 	}
 	p.m_c[3] = 255;
 
@@ -689,7 +688,7 @@
 		uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i];
 		uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i];
 		
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]);
 	}
 	
 	uint64_t total_err = 0;
@@ -728,7 +727,7 @@
 		uint32_t low = g_astc_sorted_order_unquant[7][pResults->m_low_endpoint.m_c[i]].m_unquant;
 		uint32_t high = g_astc_sorted_order_unquant[7][pResults->m_high_endpoint.m_c[i]].m_unquant;
 		
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]);
 	}
 	p.m_c[3] = 255;
 
@@ -768,7 +767,7 @@
 		uint32_t low = g_astc_sorted_order_unquant[13][pResults->m_low_endpoint.m_c[i]].m_unquant;
 		uint32_t high = g_astc_sorted_order_unquant[13][pResults->m_high_endpoint.m_c[i]].m_unquant;
 		
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]);
 	}
 	
 	uint64_t total_err = 0;
@@ -807,7 +806,7 @@
 		uint32_t low = g_astc_sorted_order_unquant[11][pResults->m_low_endpoint.m_c[i]].m_unquant;
 		uint32_t high = g_astc_sorted_order_unquant[11][pResults->m_high_endpoint.m_c[i]].m_unquant;
 
-		p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]);
+		p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]);
 	}
 
 	uint64_t total_err = 0;
@@ -863,7 +862,7 @@
 		for (uint32_t i = 1; i < (N - 1); i++)
 		{
 			for (uint32_t j = 0; j < nc; j++)
-				weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i]));
+				weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate_linear(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i]));
 		}
 	}
 	else
@@ -1300,7 +1299,7 @@
 	
 	for (uint32_t i = 1; i < pParams->m_num_selector_weights - 1; i++)
 		for (uint32_t c = 0; c < 4; c++)
-			colors[i].m_c[c] = (uint8_t)astc_interpolate(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]);
+			colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]);
 
 	uint64_t total_err = 0;
 	for (uint32_t p = 0; p < pParams->m_num_pixels; p++)
@@ -1815,10 +1814,10 @@
 	weightedColors[num_weights - 1] = highColor;
 	for (uint32_t i = 1; i < (num_weights - 1); i++)
 	{
-		weightedColors[i].m_c[0] = (uint8_t)astc_interpolate(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]);
-		weightedColors[i].m_c[1] = (uint8_t)astc_interpolate(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]);
-		weightedColors[i].m_c[2] = (uint8_t)astc_interpolate(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]);
-		weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255;
+		weightedColors[i].m_c[0] = (uint8_t)astc_interpolate_linear(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]);
+		weightedColors[i].m_c[1] = (uint8_t)astc_interpolate_linear(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]);
+		weightedColors[i].m_c[2] = (uint8_t)astc_interpolate_linear(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]);
+		weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate_linear(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255;
 	}
 
 	// Compute dots and thresholds
diff --git a/encoder/basisu_bc7enc.h b/encoder/basisu_bc7enc.h
index 2346991..8d8b788 100644
--- a/encoder/basisu_bc7enc.h
+++ b/encoder/basisu_bc7enc.h
@@ -12,6 +12,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#pragma once
 #include "basisu_enc.h"
 #include "../transcoder/basisu_transcoder_uastc.h"
 
diff --git a/encoder/basisu_comp.cpp b/encoder/basisu_comp.cpp
index dc4ae11..10f96ce 100644
--- a/encoder/basisu_comp.cpp
+++ b/encoder/basisu_comp.cpp
@@ -467,7 +467,10 @@
 					return false;
 				}
 
-				printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height());
+				if (m_params.m_status_output)
+				{
+					printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height());
+				}
 
 				// Optionally load another image and put a grayscale version of it into the alpha channel.
 				if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size()))
@@ -1427,7 +1430,10 @@
 				return false;
 			}
 
-			printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str());
+			if (m_params.m_status_output)
+			{
+				printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str());
+			}
 		}
 
 		size_t comp_size = 0;
diff --git a/encoder/basisu_enc.cpp b/encoder/basisu_enc.cpp
index 12e7df1..daaf65b 100644
--- a/encoder/basisu_enc.cpp
+++ b/encoder/basisu_enc.cpp
@@ -1779,8 +1779,6 @@
 			return nullptr;
 		}
 
-		const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel;
-
 		const uint8_t *pSrc = pBuf + sizeof(tga_header);
 		uint32_t bytes_remaining = buf_size - sizeof(tga_header);
 
diff --git a/encoder/basisu_enc.h b/encoder/basisu_enc.h
index 05c95cb..0ce0114 100644
--- a/encoder/basisu_enc.h
+++ b/encoder/basisu_enc.h
@@ -1634,6 +1634,14 @@
 
 				if ((!l_weight) || (!r_weight))
 				{
+					l_children.resize(0);
+					new_l_child.set(0.0f);
+					l_ttsum = 0.0f;
+					l_weight = 0;
+					r_children.resize(0);
+					new_r_child.set(0.0f);
+					r_ttsum = 0.0f;
+					r_weight = 0;
 					TrainingVectorType firstVec;
 					for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
 					{
@@ -1660,7 +1668,7 @@
 						}
 					}
 
-					if (!l_weight)
+					if ((!l_weight) || (!r_weight))
 						return false;
 				}
 
diff --git a/encoder/basisu_resampler.cpp b/encoder/basisu_resampler.cpp
index e193ce8..f4cedf0 100644
--- a/encoder/basisu_resampler.cpp
+++ b/encoder/basisu_resampler.cpp
@@ -15,14 +15,6 @@
 #include "basisu_resampler.h"
 #include "basisu_resampler_filters.h"
 
-#ifndef max
-#define max(a, b) (((a) > (b)) ? (a) : (b))
-#endif
-
-#ifndef min
-#define min(a, b) (((a) < (b)) ? (a) : (b))
-#endif
-
 #define RESAMPLER_DEBUG 0
 
 namespace basisu
diff --git a/transcoder/basisu_transcoder.cpp b/transcoder/basisu_transcoder.cpp
index 29eb3c0..0b37333 100644
--- a/transcoder/basisu_transcoder.cpp
+++ b/transcoder/basisu_transcoder.cpp
@@ -10778,8 +10778,6 @@
 			return false;
 		}
 
-		const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0;
-
 		if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
 		{
 			BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: cDecodeFlagsPVRTCDecodeToNextPow2 currently unsupported\n");
@@ -17336,7 +17334,6 @@
 		
 	bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data)
 	{
-		const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData;
 		const uint64_t comp_size = m_levels[level_index].m_byte_length;
 		
 		const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length;
@@ -17361,6 +17358,7 @@
 		if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD)
 		{
 #if BASISD_SUPPORT_KTX2_ZSTD
+			const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData;
 			size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size);
 			if (ZSTD_isError(actualUncompSize))
 			{