Merge pull request #437 from billhollings/master

Support iOS builds for arm64e architecture.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 78395a8..db2a8e1 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -31,6 +31,7 @@
 - Support iOS builds for arm64e architecture.
 - Improvements to building external libraries.
 - Print Vulkan semantics when logging converted GLSL.
+- Support uploading S3TC-compressed 3D images.
 
 
 
diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
index c3ce45b..9fc8632 100644
--- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
+++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
@@ -9,6 +9,10 @@
 /* Begin PBXBuildFile section */
 		45003E73214AD4E500E989CB /* MVKExtensions.def in Headers */ = {isa = PBXBuildFile; fileRef = 45003E6F214AD4C900E989CB /* MVKExtensions.def */; };
 		45003E74214AD4E600E989CB /* MVKExtensions.def in Headers */ = {isa = PBXBuildFile; fileRef = 45003E6F214AD4C900E989CB /* MVKExtensions.def */; };
+		45557A5221C9EFF3008868BD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
+		45557A5321C9EFF3008868BD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
+		45557A5421C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
+		45557A5521C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
 		83A4AD2A21BD75570006C935 /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
 		83A4AD2B21BD75570006C935 /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
 		83A4AD2C21BD75570006C935 /* MVKVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */; };
@@ -265,6 +269,9 @@
 
 /* Begin PBXFileReference section */
 		45003E6F214AD4C900E989CB /* MVKExtensions.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKExtensions.def; sourceTree = "<group>"; };
+		45557A4D21C9EFF3008868BD /* MVKCodec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MVKCodec.cpp; sourceTree = "<group>"; };
+		45557A5121C9EFF3008868BD /* MVKCodec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCodec.h; sourceTree = "<group>"; };
+		45557A5721CD83C3008868BD /* MVKDXTnCodec.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKDXTnCodec.def; sourceTree = "<group>"; };
 		83A4AD2521BD75570006C935 /* MVKVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKVector.h; sourceTree = "<group>"; };
 		83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKVectorAllocator.h; sourceTree = "<group>"; };
 		A9096E5C1F81E16300DFBEA6 /* MVKCmdDispatch.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MVKCmdDispatch.h; sourceTree = "<group>"; };
@@ -497,6 +504,9 @@
 		A98149401FB6A3F7005F00B4 /* Utility */ = {
 			isa = PBXGroup;
 			children = (
+				45557A5721CD83C3008868BD /* MVKDXTnCodec.def */,
+				45557A4D21C9EFF3008868BD /* MVKCodec.cpp */,
+				45557A5121C9EFF3008868BD /* MVKCodec.h */,
 				83A4AD2521BD75570006C935 /* MVKVector.h */,
 				83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */,
 				A98149411FB6A3F7005F00B4 /* MVKBaseObject.cpp */,
@@ -611,6 +621,7 @@
 				A94FB7E01C7DFB4800632CA3 /* MVKDescriptorSet.h in Headers */,
 				A9E53DE12100B197002781DD /* NSString+MoltenVK.h in Headers */,
 				A9E53DDF2100B197002781DD /* CAMetalLayer+MoltenVK.h in Headers */,
+				45557A5421C9EFF3008868BD /* MVKCodec.h in Headers */,
 				A94FB8041C7DFB4800632CA3 /* MVKRenderPass.h in Headers */,
 				A9F042A61FB4CF83009FCCB8 /* MVKLogging.h in Headers */,
 				A94FB8001C7DFB4800632CA3 /* MVKQueue.h in Headers */,
@@ -674,6 +685,7 @@
 				A94FB7E11C7DFB4800632CA3 /* MVKDescriptorSet.h in Headers */,
 				A9E53DE22100B197002781DD /* NSString+MoltenVK.h in Headers */,
 				A9E53DE02100B197002781DD /* CAMetalLayer+MoltenVK.h in Headers */,
+				45557A5521C9EFF3008868BD /* MVKCodec.h in Headers */,
 				A94FB8051C7DFB4800632CA3 /* MVKRenderPass.h in Headers */,
 				A9F042A71FB4CF83009FCCB8 /* MVKLogging.h in Headers */,
 				A94FB8011C7DFB4800632CA3 /* MVKQueue.h in Headers */,
@@ -949,6 +961,7 @@
 				A94FB7CE1C7DFB4800632CA3 /* MVKCommand.mm in Sources */,
 				A94FB80E1C7DFB4800632CA3 /* MVKShaderModule.mm in Sources */,
 				A94FB81A1C7DFB4800632CA3 /* MVKSync.mm in Sources */,
+				45557A5221C9EFF3008868BD /* MVKCodec.cpp in Sources */,
 				A94FB7BE1C7DFB4800632CA3 /* MVKCmdPipeline.mm in Sources */,
 				A94FB81E1C7DFB4800632CA3 /* MVKLayers.mm in Sources */,
 				A94FB7EE1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */,
@@ -1001,6 +1014,7 @@
 				A94FB7CF1C7DFB4800632CA3 /* MVKCommand.mm in Sources */,
 				A94FB80F1C7DFB4800632CA3 /* MVKShaderModule.mm in Sources */,
 				A94FB81B1C7DFB4800632CA3 /* MVKSync.mm in Sources */,
+				45557A5321C9EFF3008868BD /* MVKCodec.cpp in Sources */,
 				A94FB7BF1C7DFB4800632CA3 /* MVKCmdPipeline.mm in Sources */,
 				A94FB81F1C7DFB4800632CA3 /* MVKLayers.mm in Sources */,
 				A94FB7EF1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */,
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
index 6bb9790..6b909f3 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@@ -617,6 +617,21 @@
 #pragma mark -
 #pragma mark MVKCmdBufferImageCopy
 
+// Matches shader struct.
+typedef struct {
+    uint32_t srcRowStride;
+    uint32_t srcRowStrideHigh;
+    uint32_t srcDepthStride;
+    uint32_t srcDepthStrideHigh;
+    uint32_t destRowStride;
+    uint32_t destRowStrideHigh;
+    uint32_t destDepthStride;
+    uint32_t destDepthStrideHigh;
+    VkFormat format;
+    VkOffset3D offset;
+    VkExtent3D extent;
+} MVKCmdCopyBufferToImageInfo;
+
 void MVKCmdBufferImageCopy::setContent(VkBuffer buffer,
                                        VkImage image,
                                        VkImageLayout imageLayout,
@@ -649,9 +664,8 @@
     if ( !mtlBuffer || !mtlTexture ) { return; }
 
 	NSUInteger mtlBuffOffset = _buffer->getMTLBufferOffset();
-    MTLPixelFormat mtlPixFmt = mtlTexture.pixelFormat;
+    MTLPixelFormat mtlPixFmt = _image->getMTLPixelFormat();
     MVKCommandUse cmdUse = _toImage ? kMVKCommandUseCopyBufferToImage : kMVKCommandUseCopyImageToBuffer;
-    id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(cmdUse);
 
     for (auto& cpyRgn : _mtlBuffImgCopyRegions) {
 
@@ -689,11 +703,87 @@
                 blitOptions |= MTLBlitOptionStencilFromDepthStencil;
             }
         }
+#if MVK_MACOS
+        if (_toImage && mvkFormatTypeFromMTLPixelFormat(mtlPixFmt) == kMVKFormatCompressed &&
+            mtlTexture.textureType == MTLTextureType3D) {
+            // If we're copying to a compressed 3D image, the image data need to be decompressed.
+            // If we're copying to mip level 0, we can skip the copy and just decode
+            // directly into the image. Otherwise, we need to use an intermediate
+            // buffer.
+            MVKCmdCopyBufferToImageInfo info;
+            info.srcRowStride = bytesPerRow & 0xffffffff;
+            info.srcRowStrideHigh = bytesPerRow >> 32;
+            info.srcDepthStride = bytesPerImg & 0xffffffff;
+            info.srcDepthStrideHigh = bytesPerImg >> 32;
+            info.destRowStride = info.destRowStrideHigh = 0;
+            info.destDepthStride = info.destDepthStrideHigh = 0;
+            info.format = _image->getVkFormat();
+            info.offset = cpyRgn.imageOffset;
+            info.extent = cpyRgn.imageExtent;
+            bool needsTempBuff = cpyRgn.imageSubresource.mipLevel != 0;
+            id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(cmdUse);
+            id<MTLComputePipelineState> mtlComputeState = getCommandEncodingPool()->getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff);
+            [mtlComputeEnc pushDebugGroup: @"vkCmdCopyBufferToImage"];
+            [mtlComputeEnc setComputePipelineState: mtlComputeState];
+            [mtlComputeEnc setBuffer: mtlBuffer offset: mtlBuffOffset + cpyRgn.bufferOffset atIndex: 0];
+            MVKBuffer* tempBuff;
+            if (needsTempBuff) {
+                NSUInteger bytesPerDestRow = mvkMTLPixelFormatBytesPerRow(mtlTexture.pixelFormat, info.extent.width);
+                NSUInteger bytesPerDestImg = mvkMTLPixelFormatBytesPerLayer(mtlTexture.pixelFormat, bytesPerDestRow, info.extent.height);
+                // We're going to copy from the temporary buffer now, so use the
+                // temp buffer parameters in the copy below.
+                bytesPerRow = bytesPerDestRow;
+                bytesPerImg = bytesPerDestImg;
+                MVKBufferDescriptorData tempBuffData;
+                tempBuffData.size = bytesPerDestImg * mtlTxtSize.depth;
+                tempBuffData.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+                tempBuff = getCommandEncodingPool()->getTransferMVKBuffer(tempBuffData);
+                mtlBuffer = tempBuff->getMTLBuffer();
+                mtlBuffOffset = tempBuff->getMTLBufferOffset();
+                info.destRowStride = bytesPerDestRow & 0xffffffff;
+                info.destRowStrideHigh = bytesPerDestRow >> 32;
+                info.destDepthStride = bytesPerDestImg & 0xffffffff;
+                info.destDepthStrideHigh = bytesPerDestImg >> 32;
+                [mtlComputeEnc setBuffer: mtlBuffer offset: mtlBuffOffset atIndex: 1];
+            } else {
+                [mtlComputeEnc setTexture: mtlTexture atIndex: 0];
+            }
+            cmdEncoder->setComputeBytes(mtlComputeEnc, &info, sizeof(info), 2);
+
+            // Now work out how big to make the grid, and from there, the size and number of threadgroups.
+            // One thread is run per block. Each block decompresses to an m x n array of texels.
+            // So the size of the grid is (ceil(width/m), ceil(height/n), depth).
+            VkExtent2D blockExtent = mvkMTLPixelFormatBlockTexelSize(mtlPixFmt);
+            MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide(mtlTxtSize.width, blockExtent.width),
+                                              mvkCeilingDivide(mtlTxtSize.height, blockExtent.height),
+                                              mtlTxtSize.depth);
+            // Use four times the thread execution width as the threadgroup size.
+            MTLSize mtlTgrpSize = MTLSizeMake(2, 2, mtlComputeState.threadExecutionWidth);
+            // Then the number of threadgroups is (ceil(x/2), ceil(y/2), ceil(z/t)),
+            // where 't' is the thread execution width.
+            mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, 2);
+            mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, 2);
+            mtlGridSize.depth = mvkCeilingDivide(mtlGridSize.depth, mtlTgrpSize.depth);
+            // There may be extra threads, but that's OK; the shader does bounds checking to
+            // ensure it doesn't try to write out of bounds.
+            // Alternatively, we could use the newer -[MTLComputeCommandEncoder dispatchThreads:threadsPerThreadgroup:] method,
+            // but that needs Metal 2.0.
+            [mtlComputeEnc dispatchThreadgroups: mtlGridSize threadsPerThreadgroup: mtlTgrpSize];
+            [mtlComputeEnc popDebugGroup];
+
+            if (!needsTempBuff) { continue; }
+        } else {
+            mtlBuffOffset += cpyRgn.bufferOffset;
+        }
+#else
+        mtlBuffOffset += cpyRgn.bufferOffset;
+#endif
+        id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(cmdUse);
 
         for (uint32_t lyrIdx = 0; lyrIdx < cpyRgn.imageSubresource.layerCount; lyrIdx++) {
             if (_toImage) {
                 [mtlBlitEnc copyFromBuffer: mtlBuffer
-                              sourceOffset: (mtlBuffOffset + cpyRgn.bufferOffset + (bytesPerImg * lyrIdx))
+                              sourceOffset: (mtlBuffOffset + (bytesPerImg * lyrIdx))
                          sourceBytesPerRow: bytesPerRow
                        sourceBytesPerImage: bytesPerImg
                                 sourceSize: mtlTxtSize
@@ -709,7 +799,7 @@
                                sourceOrigin: mtlTxtOrigin
                                  sourceSize: mtlTxtSize
                                    toBuffer: mtlBuffer
-                          destinationOffset: (mtlBuffOffset + cpyRgn.bufferOffset + (bytesPerImg * lyrIdx))
+                          destinationOffset: (mtlBuffOffset + (bytesPerImg * lyrIdx))
                      destinationBytesPerRow: bytesPerRow
                    destinationBytesPerImage: bytesPerImg
                                     options: blitOptions];
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
index 1373cb0..ea33451 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@@ -91,12 +91,28 @@
      */
     MVKImage* getTransferMVKImage(MVKImageDescriptorData& imgData);
     
+    /**
+     * Returns an MVKBuffer configured from the specified MTLBuffer configuration,
+     * with content held in Private storage. The object returned can be used as a
+     * temporary buffer during buffer-image transfers.
+     *
+     * The same buffer instance will be returned for two calls to this funciton with
+     * the same buffer descriptor data. This implies that the same buffer instance could 
+     * be used by two transfers within the same encoder or queue. This is acceptable 
+     * becuase the content only needs to be valid during the transfer, and it can be 
+     * reused by subsequent transfers in the same encoding run.
+     */
+    MVKBuffer* getTransferMVKBuffer(MVKBufferDescriptorData& buffData);
+    
 	/** Returns a MTLComputePipelineState for copying between two buffers with byte-aligned copy regions. */
     id<MTLComputePipelineState> getCmdCopyBufferBytesMTLComputePipelineState();
 
 	/** Returns a MTLComputePipelineState for filling a buffer. */
 	id<MTLComputePipelineState> getCmdFillBufferMTLComputePipelineState();
 
+	/** Returns a MTLComputePipelineState for decompressing a buffer into a 3D image. */
+	id<MTLComputePipelineState> getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needsTempBuff);
+
 	/** Deletes all the internal resources. */
 	void clear();
 
@@ -114,6 +130,8 @@
 	std::unordered_map<MVKRPSKeyClearAtt, id<MTLRenderPipelineState>> _cmdClearMTLRenderPipelineStates;
     std::unordered_map<MVKMTLDepthStencilDescriptorData, id<MTLDepthStencilState>> _mtlDepthStencilStates;
     std::unordered_map<MVKImageDescriptorData, MVKImage*> _transferImages;
+    std::unordered_map<MVKBufferDescriptorData, MVKBuffer*> _transferBuffers;
+    std::unordered_map<MVKBufferDescriptorData, MVKDeviceMemory*> _transferBufferMemory;
     MVKMTLBufferAllocator _mtlBufferAllocator;
     id<MTLSamplerState> _cmdBlitImageLinearMTLSamplerState = nil;
     id<MTLSamplerState> _cmdBlitImageNearestMTLSamplerState = nil;
@@ -123,5 +141,6 @@
     id<MTLDepthStencilState> _cmdClearDefaultDepthStencilState = nil;
     id<MTLComputePipelineState> _mtlCopyBufferBytesComputePipelineState = nil;
 	id<MTLComputePipelineState> _mtlFillBufferComputePipelineState = nil;
+	id<MTLComputePipelineState> _mtlCopyBufferToImage3DDecompressComputePipelineState[2] = {nil, nil};
 };
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
index f3447d4..fe8d8eb 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@@ -97,6 +97,10 @@
 	MVK_ENC_REZ_ACCESS(_transferImages[imgData], newMVKImage(imgData));
 }
 
+MVKBuffer* MVKCommandEncodingPool::getTransferMVKBuffer(MVKBufferDescriptorData& buffData) {
+	MVK_ENC_REZ_ACCESS(_transferBuffers[buffData], newMVKBuffer(buffData, _transferBufferMemory[buffData]));
+}
+
 id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdCopyBufferBytesMTLComputePipelineState() {
 	MVK_ENC_REZ_ACCESS(_mtlCopyBufferBytesComputePipelineState, newCmdCopyBufferBytesMTLComputePipelineState());
 }
@@ -105,6 +109,10 @@
 	MVK_ENC_REZ_ACCESS(_mtlFillBufferComputePipelineState, newCmdFillBufferMTLComputePipelineState());
 }
 
+id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needsTempBuff) {
+	MVK_ENC_REZ_ACCESS(_mtlCopyBufferToImage3DDecompressComputePipelineState[needsTempBuff ? 1 : 0], newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff));
+}
+
 void MVKCommandEncodingPool::clear() {
 	lock_guard<mutex> lock(_lock);
 	destroyMetalResources();
@@ -135,6 +143,12 @@
     for (auto& pair : _transferImages) { _device->destroyImage(pair.second, nullptr); }
     _transferImages.clear();
 
+    for (auto& pair : _transferBuffers) { _device->destroyBuffer(pair.second, nullptr); }
+    _transferBuffers.clear();
+
+    for (auto& pair : _transferBufferMemory) { _device->freeMemory(pair.second, nullptr); }
+    _transferBufferMemory.clear();
+
     [_cmdBlitImageLinearMTLSamplerState release];
     _cmdBlitImageLinearMTLSamplerState = nil;
 
@@ -155,5 +169,13 @@
 
     [_mtlCopyBufferBytesComputePipelineState release];
     _mtlCopyBufferBytesComputePipelineState = nil;
+
+    [_mtlFillBufferComputePipelineState release];
+    _mtlFillBufferComputePipelineState = nil;
+
+    [_mtlCopyBufferToImage3DDecompressComputePipelineState[0] release];
+    [_mtlCopyBufferToImage3DDecompressComputePipelineState[1] release];
+    _mtlCopyBufferToImage3DDecompressComputePipelineState[0] = nil;
+    _mtlCopyBufferToImage3DDecompressComputePipelineState[1] = nil;
 }
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
index 46ac6b6..1950556 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
@@ -37,6 +37,41 @@
     float2 v_texCoord;                                                                                          \n\
 } VaryingsPosTex;                                                                                               \n\
                                                                                                                 \n\
+typedef size_t VkDeviceSize;                                                                                    \n\
+                                                                                                                \n\
+typedef enum : uint32_t {                                                                                       \n\
+    VK_FORMAT_BC1_RGB_UNORM_BLOCK = 131,                                                                        \n\
+    VK_FORMAT_BC1_RGB_SRGB_BLOCK = 132,                                                                         \n\
+    VK_FORMAT_BC1_RGBA_UNORM_BLOCK = 133,                                                                       \n\
+    VK_FORMAT_BC1_RGBA_SRGB_BLOCK = 134,                                                                        \n\
+    VK_FORMAT_BC2_UNORM_BLOCK = 135,                                                                            \n\
+    VK_FORMAT_BC2_SRGB_BLOCK = 136,                                                                             \n\
+    VK_FORMAT_BC3_UNORM_BLOCK = 137,                                                                            \n\
+    VK_FORMAT_BC3_SRGB_BLOCK = 138,                                                                             \n\
+} VkFormat;                                                                                                     \n\
+                                                                                                                \n\
+typedef struct {                                                                                                \n\
+    uint32_t width;                                                                                             \n\
+    uint32_t height;                                                                                            \n\
+} VkExtent2D;                                                                                                   \n\
+                                                                                                                \n\
+typedef struct {                                                                                                \n\
+    uint32_t width;                                                                                             \n\
+    uint32_t height;                                                                                            \n\
+    uint32_t depth;                                                                                             \n\
+} __attribute__((packed)) VkExtent3D;                                                                           \n\
+                                                                                                                \n\
+typedef struct {                                                                                                \n\
+    int32_t x;                                                                                                  \n\
+    int32_t y;                                                                                                  \n\
+    int32_t z;                                                                                                  \n\
+} __attribute__((packed)) VkOffset3D;                                                                           \n\
+                                                                                                                \n"
+#define MVK_DECOMPRESS_CODE(...) #__VA_ARGS__
+#include "MVKDXTnCodec.def"
+#undef MVK_DECOMPRESS_CODE
+"\n\
+                                                                                                                \n\
 vertex VaryingsPosTex vtxCmdBlitImage(AttributesPosTex attributes [[stage_in]]) {                               \n\
     VaryingsPosTex varyings;                                                                                    \n\
     varyings.v_position = float4(attributes.a_position, 0.0, 1.0);                                              \n\
@@ -70,5 +105,68 @@
     }                                                                                                           \n\
 };                                                                                                              \n\
                                                                                                                 \n\
+typedef struct {                                                                                                \n\
+    uint32_t srcRowStride;                                                                                      \n\
+    uint32_t srcRowStrideHigh;                                                                                  \n\
+    uint32_t srcDepthStride;                                                                                    \n\
+    uint32_t srcDepthStrideHigh;                                                                                \n\
+    uint32_t destRowStride;                                                                                     \n\
+    uint32_t destRowStrideHigh;                                                                                 \n\
+    uint32_t destDepthStride;                                                                                   \n\
+    uint32_t destDepthStrideHigh;                                                                               \n\
+    VkFormat format;                                                                                            \n\
+    VkOffset3D offset;                                                                                          \n\
+    VkExtent3D extent;                                                                                          \n\
+} CmdCopyBufferToImageInfo;                                                                                     \n\
+                                                                                                                \n\
+kernel void cmdCopyBufferToImage3DDecompressDXTn(constant uint8_t* src [[buffer(0)]],                           \n\
+                                                 texture3d<float, access::write> dest [[texture(0)]],           \n\
+                                                 constant CmdCopyBufferToImageInfo& info [[buffer(2)]],         \n\
+                                                 uint3 pos [[thread_position_in_grid]]) {                       \n\
+    uint x = pos.x * 4, y = pos.y * 4, z = pos.z;                                                               \n\
+    VkDeviceSize blockByteCount = isBC1Format(info.format) ? 8 : 16;                                            \n\
+                                                                                                                \n\
+    if (x >= info.extent.width || y >= info.extent.height || z >= info.extent.depth) { return; }                \n\
+                                                                                                                \n\
+    src += z * info.srcDepthStride + y * info.srcRowStride / 4 + x * blockByteCount / 4;                        \n\
+    VkExtent2D blockExtent;                                                                                     \n\
+    blockExtent.width = min(info.extent.width - x, 4u);                                                         \n\
+    blockExtent.height = min(info.extent.height - y, 4u);                                                       \n\
+    uint pixels[16] = {0};                                                                                      \n\
+    decompressDXTnBlock(src, pixels, blockExtent, 4 * sizeof(uint), info.format);                               \n\
+    for (uint j = 0; j < blockExtent.height; ++j) {                                                             \n\
+        for (uint i = 0; i < blockExtent.width; ++i) {                                                          \n\
+            // The pixel components are in BGRA order, but texture::write wants them                            \n\
+            // in RGBA order. We can fix that (ironically) with a BGRA swizzle.                                 \n\
+            dest.write(unpack_unorm4x8_to_float(pixels[j * 4 + i]).bgra,                                        \n\
+                       uint3(info.offset.x + x + i, info.offset.y + y + j, info.offset.z + z));                 \n\
+        }                                                                                                       \n\
+    }                                                                                                           \n\
+}                                                                                                               \n\
+                                                                                                                \n\
+kernel void cmdCopyBufferToImage3DDecompressTempBufferDXTn(constant uint8_t* src [[buffer(0)]],                 \n\
+                                                           device uint8_t* dest [[buffer(1)]],                  \n\
+                                                           constant CmdCopyBufferToImageInfo& info [[buffer(2)]],\n\
+                                                           uint3 pos [[thread_position_in_grid]]) {             \n\
+    uint x = pos.x * 4, y = pos.y * 4, z = pos.z;                                                               \n\
+    VkDeviceSize blockByteCount = isBC1Format(info.format) ? 8 : 16;                                            \n\
+                                                                                                                \n\
+    if (x >= info.extent.width || y >= info.extent.height || z >= info.extent.depth) { return; }                \n\
+                                                                                                                \n\
+    src += z * info.srcDepthStride + y * info.srcRowStride / 4 + x * blockByteCount / 4;                        \n\
+    dest += z * info.destDepthStride + y * info.destRowStride + x * sizeof(uint);                               \n\
+    VkExtent2D blockExtent;                                                                                     \n\
+    blockExtent.width = min(info.extent.width - x, 4u);                                                         \n\
+    blockExtent.height = min(info.extent.height - y, 4u);                                                       \n\
+    uint pixels[16] = {0};                                                                                      \n\
+    decompressDXTnBlock(src, pixels, blockExtent, 4 * sizeof(uint), info.format);                               \n\
+    device uint* destPixel = (device uint*)dest;                                                                \n\
+    for (uint j = 0; j < blockExtent.height; ++j) {                                                             \n\
+        for (uint i = 0; i < blockExtent.width; ++i) {                                                          \n\
+            destPixel[j * info.destRowStride / sizeof(uint) + i] = pixels[j * 4 + i];                           \n\
+        }                                                                                                       \n\
+    }                                                                                                           \n\
+}                                                                                                               \n\
+                                                                                                                \n\
 ";
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
index 274f2c3..28a331c 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
@@ -259,6 +259,43 @@
 
 
 #pragma mark -
+#pragma mark MVKBufferDescriptorData
+
+/**
+ * Key to use for looking up cached MVKBuffer instances, and to create a new MVKBuffer when needed.
+ * The contents of this structure is a subset of the contents of the VkBufferCreateInfo structure.
+ *
+ * This structure can be used as a key in a std::map and std::unordered_map.
+ */
+typedef struct MVKBufferDescriptorData_t {
+    VkDeviceSize             size;
+    VkBufferUsageFlags       usage;
+
+    bool operator==(const MVKBufferDescriptorData_t& rhs) const {
+        return (memcmp(this, &rhs, sizeof(*this)) == 0);
+    }
+
+	std::size_t hash() const {
+		return mvkHash((uint64_t*)this, sizeof(*this) / sizeof(uint64_t));
+	}
+
+    MVKBufferDescriptorData_t() { memset(this, 0, sizeof(*this)); }
+
+} __attribute__((aligned(sizeof(uint64_t)))) MVKBufferDescriptorData;
+
+/**
+ * Hash structure implementation for MVKBufferDescriptorData in std namespace, so
+ * MVKBufferDescriptorData can be used as a key in a std::map and std::unordered_map.
+ */
+namespace std {
+    template <>
+    struct hash<MVKBufferDescriptorData> {
+        std::size_t operator()(const MVKBufferDescriptorData& k) const { return k.hash(); }
+    };
+}
+
+
+#pragma mark -
 #pragma mark MVKCommandResourceFactory
 
 /** 
@@ -308,12 +345,23 @@
      */
     MVKImage* newMVKImage(MVKImageDescriptorData& imgData);
     
+    /**
+     * Returns a new MVKBuffer configured with content held in Private storage.
+     * The buffer returned is bound to a new device memory, also returned, and
+     * can be used as a temporary buffer during buffer-image transfers.
+     */
+    MVKBuffer* newMVKBuffer(MVKBufferDescriptorData& buffData, MVKDeviceMemory*& buffMem);
+    
     /** Returns a new MTLComputePipelineState for copying between two buffers with byte-aligned copy regions. */
     id<MTLComputePipelineState> newCmdCopyBufferBytesMTLComputePipelineState();
 
 	/** Returns a new MTLComputePipelineState for filling a buffer. */
 	id<MTLComputePipelineState> newCmdFillBufferMTLComputePipelineState();
 
+	/** Returns a new MTLComputePipelineState for copying between a buffer holding compressed data and a 3D image. */
+	id<MTLComputePipelineState> newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needTempBuf);
+
+
 #pragma mark Construction
 
 	MVKCommandResourceFactory(MVKDevice* device);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
index 91602a2..b069977 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
@@ -20,6 +20,7 @@
 #include "MVKCommandPipelineStateFactoryShaderSource.h"
 #include "MVKPipeline.h"
 #include "MVKFoundation.h"
+#include "MVKBuffer.h"
 #include "NSString+MoltenVK.h"
 #include "MTLRenderPipelineDescriptor+MoltenVK.h"
 #include "MVKLogging.h"
@@ -342,6 +343,35 @@
 	return mvkImg;
 }
 
+MVKBuffer* MVKCommandResourceFactory::newMVKBuffer(MVKBufferDescriptorData& buffData, MVKDeviceMemory*& buffMem) {
+    const VkBufferCreateInfo createInfo = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext = nullptr,
+        .flags = 0,
+        .size = buffData.size,
+        .usage = buffData.usage,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .queueFamilyIndexCount = 0,
+        .pQueueFamilyIndices = nullptr,
+    };
+    MVKBuffer* mvkBuff = _device->createBuffer(&createInfo, nullptr);
+    const VkMemoryDedicatedAllocateInfo dedicatedInfo = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+        .pNext = nullptr,
+        .image = VK_NULL_HANDLE,
+        .buffer = (VkBuffer)mvkBuff,
+    };
+    const VkMemoryAllocateInfo allocInfo = {
+    	.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+    	.pNext = &dedicatedInfo,
+    	.allocationSize = buffData.size,
+    	.memoryTypeIndex = _device->getVulkanMemoryTypeIndex(MTLStorageModePrivate),
+    };
+    buffMem = _device->allocateMemory(&allocInfo, nullptr);
+    mvkBuff->bindDeviceMemory(buffMem, 0);
+    return mvkBuff;
+}
+
 id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdCopyBufferBytesMTLComputePipelineState() {
 	return newMTLComputePipelineState(getFunctionNamed("cmdCopyBufferBytes"));
 }
@@ -350,6 +380,11 @@
 	return newMTLComputePipelineState(getFunctionNamed("cmdFillBuffer"));
 }
 
+id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needTempBuf) {
+	return newMTLComputePipelineState(getFunctionNamed(needTempBuf ? "cmdCopyBufferToImage3DDecompressTempBufferDXTn" :
+																	 "cmdCopyBufferToImage3DDecompressDXTn"));
+}
+
 
 #pragma mark Support methods
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 162404b..5335a56 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -30,6 +30,7 @@
 #include "MVKRenderPass.h"
 #include "MVKCommandPool.h"
 #include "MVKFoundation.h"
+#include "MVKCodec.h"
 #include "MVKEnvironment.h"
 #include "MVKOSExtensions.h"
 #include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h>
@@ -261,10 +262,22 @@
                 return VK_ERROR_FORMAT_NOT_SUPPORTED;
             }
 			// Metal does not allow compressed or depth/stencil formats on 3D textures
-			if (mvkFormatTypeFromVkFormat(format) == kMVKFormatDepthStencil ||
-				mvkFormatTypeFromVkFormat(format) == kMVKFormatCompressed) {
+			if (mvkFormatTypeFromVkFormat(format) == kMVKFormatDepthStencil
+#if MVK_IOS
+				|| mvkFormatTypeFromVkFormat(format) == kMVKFormatCompressed
+#endif
+				) {
 				return VK_ERROR_FORMAT_NOT_SUPPORTED;
 			}
+#if MVK_MACOS
+			if (mvkFormatTypeFromVkFormat(format) == kMVKFormatCompressed) {
+				// If this is a compressed format and there's no codec, it isn't
+				// supported.
+				if (!mvkCanDecodeFormat(format) ) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
+				// Compressed multisampled textures aren't supported.
+				sampleCounts = VK_SAMPLE_COUNT_1_BIT;
+			}
+#endif
             maxExt.width = pLimits->maxImageDimension3D;
             maxExt.height = pLimits->maxImageDimension3D;
             maxExt.depth = pLimits->maxImageDimension3D;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index 0a6b2e0..6eaa8a9 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -238,6 +238,7 @@
     bool _hasExpectedTexelSize;
 	bool _usesTexelBuffer;
 	bool _isLinear;
+	bool _is3DCompressed;
 };
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index c8d7f5b..580afd6 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -23,6 +23,7 @@
 #include "MVKFoundation.h"
 #include "MVKEnvironment.h"
 #include "MVKLogging.h"
+#include "MVKCodec.h"
 #import "MTLTextureDescriptor+MoltenVK.h"
 #import "MTLSamplerDescriptor+MoltenVK.h"
 
@@ -387,13 +388,30 @@
 		mvkDisableFlag(usage, MTLTextureUsageRenderTarget);
 	}
 
+#if MVK_MACOS
+	// If this is a 3D compressed texture, tell Metal we might write to it.
+	if (_is3DCompressed) {
+		mvkEnableFlag(usage, MTLTextureUsageShaderWrite);
+	}
+#endif
+
 	return usage;
 }
 
 // Returns an autoreleased Metal texture descriptor constructed from the properties of this image.
 MTLTextureDescriptor* MVKImage::getMTLTextureDescriptor() {
 	MTLTextureDescriptor* mtlTexDesc = [[MTLTextureDescriptor alloc] init];
+#if MVK_MACOS
+	if (_is3DCompressed) {
+		// Metal doesn't yet support 3D compressed textures, so we'll decompress
+		// the texture ourselves. This, then, is the *uncompressed* format.
+		mtlTexDesc.pixelFormat = MTLPixelFormatBGRA8Unorm;
+	} else {
+		mtlTexDesc.pixelFormat = _mtlPixelFormat;
+	}
+#else
 	mtlTexDesc.pixelFormat = _mtlPixelFormat;
+#endif
 	mtlTexDesc.textureType = _mtlTextureType;
 	mtlTexDesc.width = _extent.width;
 	mtlTexDesc.height = _extent.height;
@@ -447,6 +465,27 @@
     mtlRegion.origin = MTLOriginMake(0, 0, 0);
     mtlRegion.size = mvkMTLSizeFromVkExtent3D(mipExtent);
 
+#if MVK_MACOS
+    std::unique_ptr<char[]> decompBuffer;
+    if (_is3DCompressed) {
+        // We cannot upload the texture data directly in this case. But we
+        // can upload the decompressed image data.
+        std::unique_ptr<MVKCodec> codec = mvkCreateCodec(getVkFormat());
+        if (!codec) {
+            mvkNotifyErrorWithText(VK_ERROR_FORMAT_NOT_SUPPORTED, "A 3D texture used a compressed format that MoltenVK does not yet support.");
+            return;
+        }
+        VkSubresourceLayout destLayout;
+        destLayout.rowPitch = 4 * mipExtent.width;
+        destLayout.depthPitch = destLayout.rowPitch * mipExtent.height;
+        destLayout.size = destLayout.depthPitch * mipExtent.depth;
+        decompBuffer = std::unique_ptr<char[]>(new char[destLayout.size]);
+        codec->decompress(decompBuffer.get(), pImgBytes, destLayout, imgLayout, mipExtent);
+        pImgBytes = decompBuffer.get();
+        imgLayout = destLayout;
+    }
+#endif
+
     [getMTLTexture() replaceRegion: mtlRegion
                        mipmapLevel: imgSubRez.mipLevel
                              slice: imgSubRez.arrayLayer
@@ -499,9 +538,15 @@
         setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Metal does not allow uncompressed views of compressed images."));
     }
 
+#if MVK_IOS
     if ( (pCreateInfo->imageType != VK_IMAGE_TYPE_2D) && (mvkFormatTypeFromVkFormat(pCreateInfo->format) == kMVKFormatCompressed) ) {
         setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, compressed formats may only be used with 2D images."));
     }
+#else
+    if ( (pCreateInfo->imageType != VK_IMAGE_TYPE_2D) && (mvkFormatTypeFromVkFormat(pCreateInfo->format) == kMVKFormatCompressed) && !mvkCanDecodeFormat(pCreateInfo->format) ) {
+        setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, compressed formats may only be used with 2D images."));
+    }
+#endif
     if ( (pCreateInfo->imageType != VK_IMAGE_TYPE_2D) && (mvkFormatTypeFromVkFormat(pCreateInfo->format) == kMVKFormatDepthStencil) ) {
         setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, depth/stencil formats may only be used with 2D images."));
     }
@@ -552,6 +597,7 @@
     _hasExpectedTexelSize = (mvkMTLPixelFormatBytesPerBlock(_mtlPixelFormat) == mvkVkFormatBytesPerBlock(pCreateInfo->format));
 	_isLinear = validateLinear(pCreateInfo);
 	_usesTexelBuffer = false;
+	_is3DCompressed = _mtlTextureType == MTLTextureType3D && mvkFormatTypeFromMTLPixelFormat(_mtlPixelFormat) == kMVKFormatCompressed;
 
 	_byteAlignment = _isLinear ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format) : mvkEnsurePowerOfTwo(mvkVkFormatBytesPerBlock(pCreateInfo->format));
 
diff --git a/MoltenVK/MoltenVK/Utility/MVKCodec.cpp b/MoltenVK/MoltenVK/Utility/MVKCodec.cpp
new file mode 100644
index 0000000..62b9b2f
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKCodec.cpp
@@ -0,0 +1,125 @@
+/*
+ * MVKCodec.cpp
+ *
+ * Copyright (c) 2018 Chip Davis for CodeWeavers
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "MVKCodec.h"
+
+#include <algorithm>
+#include <simd/simd.h>
+
+
+using simd::float3;
+using simd::float4;
+using simd::pow;
+using simd::select;
+
+static uint32_t pack_float_to_unorm4x8(float4 x) {
+	return ((((uint8_t)(x.r * 255)) & 0x000000ff) | ((((uint8_t)(x.g * 255)) << 8) & 0x0000ff00) |
+		((((uint8_t)(x.b * 255)) & 0x00ff0000) << 16) | ((((uint8_t)(x.a * 255)) << 24) & 0xff000000));
+}
+
+static float3 unpack_unorm565_to_float(uint16_t x) {
+	return simd::make_float3(((x >> 11) & 0x1f) / 31.0f, ((x >> 5) & 0x3f) / 63.0f, (x & 0x1f) / 31.0f);
+}
+
+
+/** Texture codec for DXTn (i.e. BC[1-3]) compressed data.
+ *
+ * This implementation is largely derived from Wine, from code originally
+ * written by Connor McAdams.
+ */
+class MVKDXTnCodec : public MVKCodec {
+
+public:
+
+	void decompress(void* pDest, const void* pSrc, const VkSubresourceLayout& destLayout, const VkSubresourceLayout& srcLayout, VkExtent3D extent) override {
+		VkDeviceSize blockByteCount;
+		const uint8_t* pSrcRow;
+		const uint8_t* pSrcSlice = (const uint8_t*)pSrc;
+		uint8_t* pDestRow;
+		uint8_t* pDestSlice = (uint8_t*)pDest;
+
+		blockByteCount = isBC1Format(_format) ? 8 : 16;
+
+		for (uint32_t z = 0; z < extent.depth; ++z) {
+			pSrcRow = pSrcSlice;
+			pDestRow = pDestSlice;
+			for (uint32_t y = 0; y < extent.height; y += 4) {
+				for (uint32_t x = 0; x < extent.width; x += 4) {
+					VkExtent2D blockExtent;
+					blockExtent.width = std::min(extent.width - x, 4u);
+					blockExtent.height = std::min(extent.height - y, 4u);
+					decompressDXTnBlock(pSrcRow + x * (blockByteCount / 4),
+						pDestRow + x * 4, blockExtent, destLayout.rowPitch, _format);
+				}
+				pSrcRow += srcLayout.rowPitch;
+				pDestRow += destLayout.rowPitch * 4;
+			}
+			pSrcSlice += srcLayout.depthPitch;
+			pDestSlice += destLayout.depthPitch;
+		}
+	}
+
+	/** Constructs an instance. */
+	MVKDXTnCodec(VkFormat format) : _format(format) {}
+
+private:
+
+#define constant const
+#define device
+#define thread
+#define MVK_DECOMPRESS_CODE(...) __VA_ARGS__
+#include "MVKDXTnCodec.def"
+#undef MVK_DECOMPRESS_CODE
+
+	VkFormat _format;
+};
+
+std::unique_ptr<MVKCodec> mvkCreateCodec(VkFormat format) {
+	switch (format) {
+	case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+	case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+	case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+	case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+	case VK_FORMAT_BC2_UNORM_BLOCK:
+	case VK_FORMAT_BC2_SRGB_BLOCK:
+	case VK_FORMAT_BC3_UNORM_BLOCK:
+	case VK_FORMAT_BC3_SRGB_BLOCK:
+		return std::unique_ptr<MVKCodec>(new MVKDXTnCodec(format));
+
+	default:
+		return nullptr;
+	}
+}
+
+bool mvkCanDecodeFormat(VkFormat format) {
+	switch (format) {
+	case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+	case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+	case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+	case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+	case VK_FORMAT_BC2_UNORM_BLOCK:
+	case VK_FORMAT_BC2_SRGB_BLOCK:
+	case VK_FORMAT_BC3_UNORM_BLOCK:
+	case VK_FORMAT_BC3_SRGB_BLOCK:
+		return true;
+
+	default:
+		return false;
+	}
+}
diff --git a/MoltenVK/MoltenVK/Utility/MVKCodec.h b/MoltenVK/MoltenVK/Utility/MVKCodec.h
new file mode 100644
index 0000000..ddff1ec
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKCodec.h
@@ -0,0 +1,50 @@
+/*
+ * MVKCodec.h
+ *
+ * Copyright (c) 2018 Chip Davis for CodeWeavers
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#pragma once
+
+#include "MVKFoundation.h"
+
+#include <string>
+
+
+#pragma mark Texture data codecs
+
+/**
+ * This is the base class implemented by all codecs supported by MoltenVK.
+ * Objects of this class are used to decompress texture data for upload to a
+ * 3D texture.
+ */
+class MVKCodec {
+
+public:
+
+	/** Decompresses compressed texture data for upload. */
+	virtual void decompress(void* pDest, const void* pSrc, const VkSubresourceLayout& destLayout, const VkSubresourceLayout& srcLayout, VkExtent3D extent) = 0;
+
+	/** Destructor. */
+	virtual ~MVKCodec() = default;
+
+};
+
+/** Returns an appropriate codec for the given format, or nullptr if the format is not supported. */
+std::unique_ptr<MVKCodec> mvkCreateCodec(VkFormat format);
+
+/** Returns whether or not the given format can be decompressed. */
+bool mvkCanDecodeFormat(VkFormat format);
diff --git a/MoltenVK/MoltenVK/Utility/MVKDXTnCodec.def b/MoltenVK/MoltenVK/Utility/MVKDXTnCodec.def
new file mode 100644
index 0000000..22cf9da
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKDXTnCodec.def
@@ -0,0 +1,128 @@
+/*
+ * MVKDXTnCodec.def
+ *
+ * Copyright (c) 2018 Chip Davis for CodeWeavers
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef MVK_DECOMPRESS_CODE
+#error MVK_DECOMPRESS_CODE must be defined before including this file
+#endif
+
+MVK_DECOMPRESS_CODE(
+	static bool isBC1Format(VkFormat format) {
+		return format == VK_FORMAT_BC1_RGB_UNORM_BLOCK || format == VK_FORMAT_BC1_RGB_SRGB_BLOCK ||
+			format == VK_FORMAT_BC1_RGBA_UNORM_BLOCK || format == VK_FORMAT_BC1_RGBA_SRGB_BLOCK;
+	}
+
+	static bool isBC2Format(VkFormat format) {
+		return format == VK_FORMAT_BC2_UNORM_BLOCK || format == VK_FORMAT_BC2_SRGB_BLOCK;
+	}
+
+	static bool isBC3Format(VkFormat format) {
+		return format == VK_FORMAT_BC3_UNORM_BLOCK || format == VK_FORMAT_BC3_SRGB_BLOCK;
+	}
+
+	static bool isSRGBFormat(VkFormat format) {
+		return format == VK_FORMAT_BC1_RGB_SRGB_BLOCK || format == VK_FORMAT_BC1_RGBA_SRGB_BLOCK ||
+			format == VK_FORMAT_BC2_SRGB_BLOCK || format == VK_FORMAT_BC3_SRGB_BLOCK;
+	}
+
+	static void buildDXTnColourTable(uint16_t colour0, uint16_t colour1, thread float3* pColourTable, VkFormat format) {
+		pColourTable[0] = unpack_unorm565_to_float(colour0);
+		pColourTable[1] = unpack_unorm565_to_float(colour1);
+
+		if (isBC1Format(format) && colour0 <= colour1) {
+			pColourTable[2] = (pColourTable[0] + pColourTable[1]) / 2;
+			pColourTable[3] = float3(0);
+		} else {
+			for (uint32_t i = 0; i < 2; ++i) {
+				pColourTable[i + 2] = (2 * pColourTable[i] + pColourTable[1 - i]) / 3;
+			}
+		}
+	}
+
+	static void buildDXT5AlphaTable(uint8_t alpha0, uint8_t alpha1, thread float* pAlphaTable) {
+		pAlphaTable[0] = alpha0 / 255.0f;
+		pAlphaTable[1] = alpha1 / 255.0f;
+
+		if (alpha0 > alpha1) {
+			for (uint32_t i = 0; i < 6; ++i) {
+				pAlphaTable[2 + i] = ((6 - i) * pAlphaTable[0] + (i + 1) * pAlphaTable[1]) / 7;
+			}
+		} else {
+			for (uint32_t i = 0; i < 4; ++i) {
+				pAlphaTable[2 + i] = ((4 - i) * pAlphaTable[0] + (i + 1) * pAlphaTable[1]) / 5;
+			}
+			pAlphaTable[6] = 0;
+			pAlphaTable[7] = 1;
+		}
+	}
+
+	static float3 sRGBCorrect(float3 colour) {
+		return select(pow((colour + 0.055)/1.055, float3(2.4)), colour/12.92, colour <= 0.04045);
+	}
+
+	static void decompressDXTnBlock(constant void* pSrc, thread void* pDest, VkExtent2D extent, VkDeviceSize destRowPitch, VkFormat format) {
+		constant uint32_t* pSrcBlock = (constant uint32_t *)pSrc;
+		bool isBC1Alpha = false;
+		float3 colourTable[4];
+		float alphaTable[8];
+		size_t alphaBits;
+		uint32_t colourBits;
+
+		if (isBC1Format(format)) {
+			alphaBits = 0;
+
+			uint16_t colour0 = pSrcBlock[0] & 0xffff;
+			uint16_t colour1 = pSrcBlock[0] >> 16;
+			colourBits = pSrcBlock[1];
+			buildDXTnColourTable(colour0, colour1, colourTable, format);
+			if (colour0 <= colour1) { isBC1Alpha = true; }
+		} else {
+			alphaBits = pSrcBlock[0] | ((size_t)pSrcBlock[1] << 32);
+			if (isBC3Format(format)) {
+				buildDXT5AlphaTable(alphaBits & 0xff, (alphaBits >> 8) & 0xff, alphaTable);
+				alphaBits >>= 16;
+			}
+
+			colourBits = pSrcBlock[3];
+			buildDXTnColourTable(pSrcBlock[2] & 0xffff, pSrcBlock[2] >> 16, colourTable, format);
+		}
+
+		for (uint32_t y = 0; y < extent.height; ++y) {
+			thread uint32_t* pDestRow = (thread uint32_t *)((thread uint8_t *)pDest + y * destRowPitch);
+			for (uint32_t x = 0; x < extent.width; ++x) {
+				uint8_t colourIndex = (colourBits >> (y * 8 + x * 2)) & 0x3;
+				float alpha;
+				if (isBC1Format(format)) {
+					alpha = (!isBC1Alpha || colourIndex != 3) ? 1.0f : 0.0f;
+				} else if (isBC2Format(format)) {
+					alpha = ((alphaBits >> (y * 16 + x * 4)) & 0xf) / 15.0f;
+				} else {	// Must be a BC3 format
+					alpha = alphaTable[(alphaBits >> (y * 12 + x * 3)) & 0x7];
+				}
+				float4 colour;
+				colour.rgb = colourTable[colourIndex];
+				if (isSRGBFormat(format)) {
+					// Convert sRGB back to linear.
+					colour.rgb = sRGBCorrect(colour.rgb);
+				}
+				colour.a = alpha;
+				pDestRow[x] = pack_float_to_unorm4x8(colour);
+			}
+		}
+	}
+)