Minor transcoder update: Adding support for arbitrary output texture row pitches, for usage with the Vulkan rendering API.
diff --git a/basisu_enc.cpp b/basisu_enc.cpp
index 8b6c211..48f724d 100644
--- a/basisu_enc.cpp
+++ b/basisu_enc.cpp
@@ -1177,4 +1177,24 @@
 		m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0, 0.0f, 300.0f) : 1e+10f;
 	}
 
+	void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed)
+	{
+		rand r(seed);
+
+		uint8_t *pDst = static_cast<uint8_t *>(pBuf);
+
+		while (size >= sizeof(uint32_t))
+		{
+			*(uint32_t *)pDst = r.urand32();
+			pDst += sizeof(uint32_t);
+			size -= sizeof(uint32_t);
+		}
+
+		while (size)
+		{
+			*pDst++ = r.byte();
+			size--;
+		}
+	}
+
 } // namespace basisu
diff --git a/basisu_enc.h b/basisu_enc.h
index 6788d03..b723c5a 100644
--- a/basisu_enc.h
+++ b/basisu_enc.h
@@ -2309,6 +2309,8 @@
 		return fopen(pFilename, pMode);
 #endif
 	}
+
+	void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1);
 		
 } // namespace basisu
 
diff --git a/basisu_gpu_texture.cpp b/basisu_gpu_texture.cpp
index b8690ad..3224895 100644
--- a/basisu_gpu_texture.cpp
+++ b/basisu_gpu_texture.cpp
@@ -428,7 +428,7 @@
 
 	bool gpu_image::unpack(image& img, bool pvrtc_wrap_addressing) const
 	{
-		img.resize(get_width(), get_height());
+		img.resize(get_pixel_width(), get_pixel_height());
 		img.set_all(g_black_color);
 
 		if (!img.get_width() || !img.get_height())
@@ -549,15 +549,15 @@
 
 			if (!array_index)
 			{
-				width = levels[0].get_width();
-				height = levels[0].get_height();
+				width = levels[0].get_pixel_width();
+				height = levels[0].get_pixel_height();
 				total_levels = (uint32_t)levels.size();
 				fmt = levels[0].get_format();
 			}
 			else
 			{
-				if ((width != levels[0].get_width()) ||
-				    (height != levels[0].get_height()) ||
+				if ((width != levels[0].get_pixel_width()) ||
+				    (height != levels[0].get_pixel_height()) ||
 				    (total_levels != levels.size()))
 				{
 					// All cubemap/texture array faces must be the same dimension
@@ -570,8 +570,8 @@
 			{
 				if (level_index)
 				{
-					if ( (levels[level_index].get_width() != maximum<uint32_t>(1, levels[0].get_width() >> level_index)) ||
-							(levels[level_index].get_height() != maximum<uint32_t>(1, levels[0].get_height() >> level_index)) )
+					if ( (levels[level_index].get_pixel_width() != maximum<uint32_t>(1, levels[0].get_pixel_width() >> level_index)) ||
+							(levels[level_index].get_pixel_height() != maximum<uint32_t>(1, levels[0].get_pixel_height() >> level_index)) )
 					{
 						// Malformed mipmap chain
 						assert(0);
diff --git a/basisu_gpu_texture.h b/basisu_gpu_texture.h
index 7e0a4b6..d5fad36 100644
--- a/basisu_gpu_texture.h
+++ b/basisu_gpu_texture.h
@@ -49,14 +49,23 @@
 		}
 
 		inline texture_format get_format() const { return m_fmt; }
-		inline uint32_t get_width() const { return m_width; }
-		inline uint32_t get_height() const { return m_height; }
+		
+		// Width/height in pixels
+		inline uint32_t get_pixel_width() const { return m_width; }
+		inline uint32_t get_pixel_height() const { return m_height; }
+		
+		// Width/height in blocks, row pitch is assumed to be m_blocks_x.
 		inline uint32_t get_blocks_x() const { return m_blocks_x; }
 		inline uint32_t get_blocks_y() const { return m_blocks_y; }
+
+		// Size of each block in pixels
 		inline uint32_t get_block_width() const { return m_block_width; }
 		inline uint32_t get_block_height() const { return m_block_height; }
+
 		inline uint32_t get_qwords_per_block() const { return m_qwords_per_block; }
 		inline uint32_t get_total_blocks() const { return m_blocks_x * m_blocks_y; }
+		inline uint32_t get_bytes_per_block() const { return get_qwords_per_block() * sizeof(uint64_t); }
+		inline uint32_t get_row_pitch_in_bytes() const { return get_bytes_per_block() * get_blocks_x(); }
 
 		inline const uint64_vec &get_blocks() const { return m_blocks; }
 		
diff --git a/basisu_tool.cpp b/basisu_tool.cpp
index 57b68ff..0cc7193 100644
--- a/basisu_tool.cpp
+++ b/basisu_tool.cpp
@@ -910,11 +910,42 @@
 					gpu_image &gi = gpu_images[transcoder_tex_fmt][image_index][level_index];
 					gi.init(tex_fmt, level_info.m_orig_width, level_info.m_orig_height);
 
+					// Fill the buffer with psuedo-random bytes, to help more visibly detect cases where the transcoder fails to write to part of the output.
+					fill_buffer_with_random_bytes(gi.get_ptr(), gi.get_size_in_bytes());
+
+#if 1
 					if (!dec.transcode_image_level(&basis_data[0], (uint32_t)basis_data.size(), image_index, level_index, gi.get_ptr(), gi.get_total_blocks(), transcoder_tex_fmt, 0))
 					{
 						error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, format_iter);
 						return false;
 					}
+#else
+					// quick and dirty row pitch parameter test, to be moved into a unit test
+					uint8_vec temp;
+					uint32_t block_pitch_to_test = level_info.m_num_blocks_x;
+					if (transcoder_tex_fmt != basist::cTFPVRTC1_4_OPAQUE_ONLY)
+						block_pitch_to_test += 5;
+
+					temp.resize(level_info.m_num_blocks_y * block_pitch_to_test * gi.get_bytes_per_block());
+					fill_buffer_with_random_bytes(&temp[0], temp.size());
+
+					if (!dec.transcode_image_level(&basis_data[0], (uint32_t)basis_data.size(), image_index, level_index, &temp[0], (uint32_t)(temp.size() / gi.get_bytes_per_block()), transcoder_tex_fmt, 0, block_pitch_to_test))
+					{
+						error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, format_iter);
+						return false;
+					}
+
+					if (transcoder_tex_fmt == basist::cTFPVRTC1_4_OPAQUE_ONLY)
+					{
+						assert(temp.size() == gi.get_size_in_bytes());
+						memcpy(gi.get_ptr(), &temp[0], gi.get_size_in_bytes());
+					}
+					else
+					{
+						for (uint32_t y = 0; y < level_info.m_num_blocks_y; y++)
+							memcpy(gi.get_block_ptr(0, y), &temp[y * block_pitch_to_test * gi.get_bytes_per_block()], gi.get_row_pitch_in_bytes());
+					}
+#endif
 
 					printf("Transcode of image %u level %u res %ux%u format %s succeeded\n", image_index, level_index, level_info.m_orig_width, level_info.m_orig_height, basist::basis_get_format_name(transcoder_tex_fmt));
 
diff --git a/transcoder/basisu_transcoder.cpp b/transcoder/basisu_transcoder.cpp
index d0ec678..40896dc 100644
--- a/transcoder/basisu_transcoder.cpp
+++ b/transcoder/basisu_transcoder.cpp
@@ -3683,10 +3683,13 @@
 	}
 			
 	bool basisu_lowlevel_transcoder::transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, 
-		uint32_t output_stride, bool pvrtc_wrap_addressing, bool bc1_allow_threecolor_blocks)
+		uint32_t output_block_stride_in_bytes, bool pvrtc_wrap_addressing, bool bc1_allow_threecolor_blocks, uint32_t output_row_pitch_in_blocks)
 	{
 		const uint32_t total_blocks = num_blocks_x * num_blocks_y;
 
+		if (!output_row_pitch_in_blocks)
+			output_row_pitch_in_blocks = num_blocks_x;
+
 		basist::bitwise_decoder sym_codec;
 				
 		if (!sym_codec.init(pImage_data, image_data_size))
@@ -3920,10 +3923,7 @@
 				{
 				case cETC1:
 				{
-					//block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
-					//memcpy(pDst_block, &block, sizeof(block));
-
-					decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * num_blocks_x) * output_stride);
+					decoder_etc_block* pDst_block = reinterpret_cast<decoder_etc_block*>(static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks) * output_block_stride_in_bytes);
 					pDst_block->m_uint32[0] = block.m_uint32[0];
 					pDst_block->set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
 
@@ -3933,7 +3933,7 @@
 				{
 					block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
 
-					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * num_blocks_x) * output_stride;
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks) * output_block_stride_in_bytes;
 #if BASISD_SUPPORT_DXT1
 					convert_etc1s_to_dxt1(static_cast<dxt1_block*>(pDst_block), &block, pSelector, bc1_allow_threecolor_blocks);
 #else
@@ -3945,7 +3945,7 @@
 				{
 					block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
 
-					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * num_blocks_x) * output_stride;
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks) * output_block_stride_in_bytes;
 #if BASISD_SUPPORT_DXT5A
 					convert_etc1s_to_dxt5a(static_cast<dxt5a_block*>(pDst_block), &block, pSelector);
 #else
@@ -3992,7 +3992,7 @@
 #if BASISD_SUPPORT_BC7
 					block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
 					
-					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * num_blocks_x) * output_stride;
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks) * output_block_stride_in_bytes;
 					convert_etc1s_to_bc7_m6(static_cast<bc7_mode_6*>(pDst_block), &block, pSelector);
 #else	
 					assert(0);
@@ -4002,7 +4002,7 @@
 				case cETC2_EAC_A8:
 				{
 					block.set_raw_selector_bits(pSelector->m_bytes[0], pSelector->m_bytes[1], pSelector->m_bytes[2], pSelector->m_bytes[3]);
-					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * num_blocks_x) * output_stride;
+					void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks) * output_block_stride_in_bytes;
 #if BASISD_SUPPORT_ETC2_EAC_A8
 					convert_etc1s_to_etc2_eac_a8(static_cast<eac_a8_block*>(pDst_block), &block, pSelector);
 #else
@@ -4550,7 +4550,7 @@
 	}
 
 	bool basisu_transcoder::transcode_slice(const void *pData, uint32_t data_size, uint32_t slice_index, void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks, block_format fmt, 
-		uint32_t output_stride, uint32_t decode_flags) const
+		uint32_t output_block_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks) const
 	{
 		if (!m_lowlevel_decoder.m_endpoints.size())
 		{
@@ -4618,7 +4618,7 @@
 				
 		return m_lowlevel_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
 			pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
-			fmt, output_stride, (decode_flags & cDecodeFlagsPVRTCWrapAddressing) != 0, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0);
+			fmt, output_block_stride_in_bytes, (decode_flags & cDecodeFlagsPVRTCWrapAddressing) != 0, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, output_row_pitch_in_blocks);
 	}
 
 	int basisu_transcoder::find_first_slice_index(const void *pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const
@@ -4672,11 +4672,16 @@
 		return -1;
 	}
 
-	static void write_opaque_alpha_blocks(uint32_t total_slice_blocks, void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks, block_format fmt, uint32_t stride)
+	static void write_opaque_alpha_blocks(
+		uint32_t num_blocks_x, uint32_t num_blocks_y, 
+		void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks, block_format fmt, 
+		uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks)
 	{
 		BASISU_NOTE_UNUSED(output_blocks_buf_size_in_blocks);
-		assert(total_slice_blocks <= output_blocks_buf_size_in_blocks);
 
+		if (!output_row_pitch_in_blocks)
+			output_row_pitch_in_blocks = num_blocks_x;
+		
 		if (fmt == cETC2_EAC_A8)
 		{
 #if BASISD_SUPPORT_ETC2_EAC_A8
@@ -4689,9 +4694,14 @@
 			static const uint8_t s_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 };
 			memcpy(&blk.m_selectors, s_etc2_eac_a8_sel4, sizeof(s_etc2_eac_a8_sel4));
 
-			for (uint32_t i = 0; i < total_slice_blocks; i++)
+			for (uint32_t y = 0; y < num_blocks_y; y++)
 			{
-				memcpy((uint8_t *)pOutput_blocks + stride * i, &blk, sizeof(blk));
+				uint32_t dst_ofs = y * output_row_pitch_in_blocks * block_stride_in_bytes;
+				for (uint32_t x = 0; x < num_blocks_x; x++)
+				{
+					memcpy((uint8_t *)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
+					dst_ofs += block_stride_in_bytes;
+				}
 			}
 #endif
 		}
@@ -4703,9 +4713,14 @@
 			blk.m_endpoints[1] = 255;
 			memset(blk.m_selectors, 0, sizeof(blk.m_selectors));
 			
-			for (uint32_t i = 0; i < total_slice_blocks; i++)
+			for (uint32_t y = 0; y < num_blocks_y; y++)
 			{
-				memcpy((uint8_t *)pOutput_blocks + stride * i, &blk, sizeof(blk));
+				uint32_t dst_ofs = y * output_row_pitch_in_blocks * block_stride_in_bytes;
+				for (uint32_t x = 0; x < num_blocks_x; x++)
+				{
+					memcpy((uint8_t *)pOutput_blocks + dst_ofs, &blk, sizeof(blk));
+					dst_ofs += block_stride_in_bytes;
+				}
 			}
 #endif
 		}
@@ -4716,7 +4731,7 @@
 		uint32_t image_index, uint32_t level_index, 
 		void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks,
 		transcoder_texture_format fmt,
-		uint32_t decode_flags) const
+		uint32_t decode_flags, uint32_t output_row_pitch_in_blocks) const
 	{
 		if (!m_lowlevel_decoder.m_endpoints.size())
 		{
@@ -4829,7 +4844,7 @@
 			if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
 				slice_index_to_decode++;
 
-			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cETC1, bytes_per_block, decode_flags);
+			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cETC1, bytes_per_block, decode_flags, output_row_pitch_in_blocks);
 			if (!status)
 			{
 				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC1 failed\n");
@@ -4848,7 +4863,7 @@
 			if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
 				slice_index_to_decode++;
 
-			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC1, bytes_per_block, decode_flags);
+			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC1, bytes_per_block, decode_flags, output_row_pitch_in_blocks);
 			if (!status)
 			{
 				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC1 failed\n");
@@ -4867,7 +4882,7 @@
 			if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
 				slice_index_to_decode++;
 
-			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC4, bytes_per_block, decode_flags);
+			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC4, bytes_per_block, decode_flags, output_row_pitch_in_blocks);
 			if (!status)
 			{
 				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC4 failed\n");
@@ -4886,7 +4901,8 @@
 			if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
 				slice_index_to_decode++;
 
-			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cPVRTC1_4_OPAQUE_ONLY, bytes_per_block, decode_flags);
+			// output_row_pitch_in_blocks is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?)
+			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cPVRTC1_4_OPAQUE_ONLY, bytes_per_block, decode_flags, output_row_pitch_in_blocks);
 			if (!status)
 			{
 				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to PVRTC1 4 opaque only failed\n");
@@ -4905,7 +4921,7 @@
 			if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats))
 				slice_index_to_decode++;
 
-			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC7_M6_OPAQUE_ONLY, bytes_per_block, decode_flags);
+			status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC7_M6_OPAQUE_ONLY, bytes_per_block, decode_flags, output_row_pitch_in_blocks);
 			if (!status)
 			{
 				BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC7 m6 opaque only failed\n");
@@ -4922,18 +4938,18 @@
 			if (basis_file_has_alpha_slices)
 			{
 				// First decode the alpha data 
-				status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks, cETC2_EAC_A8, 16, decode_flags);
+				status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks, cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks);
 			}
 			else
 			{
-				write_opaque_alpha_blocks(total_slice_blocks, pOutput_blocks, output_blocks_buf_size_in_blocks, cETC2_EAC_A8, 16);
+				write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks, cETC2_EAC_A8, 16, output_row_pitch_in_blocks);
 				status = true;
 			}
 
 			if (status)
 			{
 				// Now decode the color data
-				status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cETC1, 16, decode_flags);
+				status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cETC1, 16, decode_flags, output_row_pitch_in_blocks);
 				if (!status)
 				{
 					BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2 RGB failed\n");
@@ -4958,18 +4974,18 @@
 			// First decode the alpha data 
 			if (basis_file_has_alpha_slices)
 			{
-				status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC4, 16, decode_flags);
+				status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC4, 16, decode_flags, output_row_pitch_in_blocks);
 			}
 			else
 			{
-				write_opaque_alpha_blocks(total_slice_blocks, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC4, 16);
+				write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC4, 16, output_row_pitch_in_blocks);
 				status = true;
 			}
 
 			if (status)
 			{
 				// Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3.
-				status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks);
+				status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks);
 				if (!status)
 				{
 					BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC3 RGB failed\n");
@@ -4989,13 +5005,13 @@
 #endif
 			assert(total_slices == 2);
 			// Decode the R data (actually the green channel of the color data slice in the basis file)
-			status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC4, 16, decode_flags);
+			status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks, cBC4, 16, decode_flags, output_row_pitch_in_blocks);
 			if (status)
 			{
 				if (basis_file_has_alpha_slices)
 				{
 					// Decode the G data (actually the green channel of the alpha data slice in the basis file)
-					status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cBC4, 16, decode_flags);
+					status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cBC4, 16, decode_flags, output_row_pitch_in_blocks);
 					if (!status)
 					{
 						BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC5 1 failed\n");
@@ -5003,7 +5019,7 @@
 				}
 				else
 				{
-					write_opaque_alpha_blocks(total_slice_blocks, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cBC4, 16);
+					write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cBC4, 16, output_row_pitch_in_blocks);
 					status = true;
 				}
 			}
diff --git a/transcoder/basisu_transcoder.h b/transcoder/basisu_transcoder.h
index 5927f0a..c70845a 100644
--- a/transcoder/basisu_transcoder.h
+++ b/transcoder/basisu_transcoder.h
@@ -73,7 +73,8 @@
 
 		bool decode_tables(const uint8_t *pTable_data, uint32_t table_data_size);
 
-		bool transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_stride, bool wrap_addressing, bool bc1_allow_threecolor_blocks);
+		bool transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, 
+			uint32_t output_stride, bool wrap_addressing, bool bc1_allow_threecolor_blocks, uint32_t output_row_pitch_in_blocks = 0);
 
 	private:
 		struct endpoint
@@ -263,12 +264,13 @@
 		// Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. 
 		// output_blocks_buf_size_in_blocks should be at least the image level's total_blocks (num_blocks_x * num_blocks_y)
 		// If fmt isn't cETC1, basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables.
+		// output_row_pitch_in_blocks: Number of blocks per row. If 0, the transcoder uses the slice's num_blocks_x. Ignored for PVRTC1 (due to texture swizzling).
 		bool transcode_image_level(
 			const void *pData, uint32_t data_size, 
 			uint32_t image_index, uint32_t level_index, 
 			void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks,
 			transcoder_texture_format fmt,
-			uint32_t decode_flags = cDecodeFlagsPVRTCWrapAddressing) const;
+			uint32_t decode_flags = cDecodeFlagsPVRTCWrapAddressing, uint32_t output_row_pitch_in_blocks = 0) const;
 
 		// Finds the basis slice corresponding to the specified image/level/alpha params, or -1 if the slice can't be found.
 		int find_slice(const void *pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const;
@@ -278,9 +280,11 @@
 		// output_blocks_buf_size_in_blocks is just used for verification to make sure the output buffer is large enough.
 		// output_blocks_buf_size_in_blocks should be at least the slice's total_blocks (num_blocks_x * num_blocks_y)
 		// If fmt isn't cETC1, basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables.
+		// output_block_stride_in_bytes: Number of bytes between each output block.
+		// output_row_pitch_in_blocks: Number of blocks per row. If 0, the transcoder uses the slice's num_blocks_x. Ignored for PVRTC1 (due to texture swizzling).
 		bool transcode_slice(const void *pData, uint32_t data_size, uint32_t slice_index, 
 			void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks, 
-			block_format fmt, uint32_t output_stride, uint32_t decode_flags = cDecodeFlagsPVRTCWrapAddressing) const;
+			block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = cDecodeFlagsPVRTCWrapAddressing, uint32_t output_row_pitch_in_blocks = 0) const;
 
 	private:
 		const void *m_pFile_data;