Merge pull request #437 from billhollings/master
Support iOS builds for arm64e architecture.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 78395a8..db2a8e1 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -31,6 +31,7 @@
- Support iOS builds for arm64e architecture.
- Improvements to building external libraries.
- Print Vulkan semantics when logging converted GLSL.
+- Support uploading S3TC-compressed 3D images.
diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
index c3ce45b..9fc8632 100644
--- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
+++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
@@ -9,6 +9,10 @@
/* Begin PBXBuildFile section */
45003E73214AD4E500E989CB /* MVKExtensions.def in Headers */ = {isa = PBXBuildFile; fileRef = 45003E6F214AD4C900E989CB /* MVKExtensions.def */; };
45003E74214AD4E600E989CB /* MVKExtensions.def in Headers */ = {isa = PBXBuildFile; fileRef = 45003E6F214AD4C900E989CB /* MVKExtensions.def */; };
+ 45557A5221C9EFF3008868BD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
+ 45557A5321C9EFF3008868BD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
+ 45557A5421C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
+ 45557A5521C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
83A4AD2A21BD75570006C935 /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
83A4AD2B21BD75570006C935 /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
83A4AD2C21BD75570006C935 /* MVKVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */; };
@@ -265,6 +269,9 @@
/* Begin PBXFileReference section */
45003E6F214AD4C900E989CB /* MVKExtensions.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKExtensions.def; sourceTree = "<group>"; };
+ 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MVKCodec.cpp; sourceTree = "<group>"; };
+ 45557A5121C9EFF3008868BD /* MVKCodec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCodec.h; sourceTree = "<group>"; };
+ 45557A5721CD83C3008868BD /* MVKDXTnCodec.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKDXTnCodec.def; sourceTree = "<group>"; };
83A4AD2521BD75570006C935 /* MVKVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKVector.h; sourceTree = "<group>"; };
83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKVectorAllocator.h; sourceTree = "<group>"; };
A9096E5C1F81E16300DFBEA6 /* MVKCmdDispatch.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MVKCmdDispatch.h; sourceTree = "<group>"; };
@@ -497,6 +504,9 @@
A98149401FB6A3F7005F00B4 /* Utility */ = {
isa = PBXGroup;
children = (
+ 45557A5721CD83C3008868BD /* MVKDXTnCodec.def */,
+ 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */,
+ 45557A5121C9EFF3008868BD /* MVKCodec.h */,
83A4AD2521BD75570006C935 /* MVKVector.h */,
83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */,
A98149411FB6A3F7005F00B4 /* MVKBaseObject.cpp */,
@@ -611,6 +621,7 @@
A94FB7E01C7DFB4800632CA3 /* MVKDescriptorSet.h in Headers */,
A9E53DE12100B197002781DD /* NSString+MoltenVK.h in Headers */,
A9E53DDF2100B197002781DD /* CAMetalLayer+MoltenVK.h in Headers */,
+ 45557A5421C9EFF3008868BD /* MVKCodec.h in Headers */,
A94FB8041C7DFB4800632CA3 /* MVKRenderPass.h in Headers */,
A9F042A61FB4CF83009FCCB8 /* MVKLogging.h in Headers */,
A94FB8001C7DFB4800632CA3 /* MVKQueue.h in Headers */,
@@ -674,6 +685,7 @@
A94FB7E11C7DFB4800632CA3 /* MVKDescriptorSet.h in Headers */,
A9E53DE22100B197002781DD /* NSString+MoltenVK.h in Headers */,
A9E53DE02100B197002781DD /* CAMetalLayer+MoltenVK.h in Headers */,
+ 45557A5521C9EFF3008868BD /* MVKCodec.h in Headers */,
A94FB8051C7DFB4800632CA3 /* MVKRenderPass.h in Headers */,
A9F042A71FB4CF83009FCCB8 /* MVKLogging.h in Headers */,
A94FB8011C7DFB4800632CA3 /* MVKQueue.h in Headers */,
@@ -949,6 +961,7 @@
A94FB7CE1C7DFB4800632CA3 /* MVKCommand.mm in Sources */,
A94FB80E1C7DFB4800632CA3 /* MVKShaderModule.mm in Sources */,
A94FB81A1C7DFB4800632CA3 /* MVKSync.mm in Sources */,
+ 45557A5221C9EFF3008868BD /* MVKCodec.cpp in Sources */,
A94FB7BE1C7DFB4800632CA3 /* MVKCmdPipeline.mm in Sources */,
A94FB81E1C7DFB4800632CA3 /* MVKLayers.mm in Sources */,
A94FB7EE1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */,
@@ -1001,6 +1014,7 @@
A94FB7CF1C7DFB4800632CA3 /* MVKCommand.mm in Sources */,
A94FB80F1C7DFB4800632CA3 /* MVKShaderModule.mm in Sources */,
A94FB81B1C7DFB4800632CA3 /* MVKSync.mm in Sources */,
+ 45557A5321C9EFF3008868BD /* MVKCodec.cpp in Sources */,
A94FB7BF1C7DFB4800632CA3 /* MVKCmdPipeline.mm in Sources */,
A94FB81F1C7DFB4800632CA3 /* MVKLayers.mm in Sources */,
A94FB7EF1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */,
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
index 6bb9790..6b909f3 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@@ -617,6 +617,21 @@
#pragma mark -
#pragma mark MVKCmdBufferImageCopy
+// Matches shader struct.
+typedef struct {
+ uint32_t srcRowStride;
+ uint32_t srcRowStrideHigh;
+ uint32_t srcDepthStride;
+ uint32_t srcDepthStrideHigh;
+ uint32_t destRowStride;
+ uint32_t destRowStrideHigh;
+ uint32_t destDepthStride;
+ uint32_t destDepthStrideHigh;
+ VkFormat format;
+ VkOffset3D offset;
+ VkExtent3D extent;
+} MVKCmdCopyBufferToImageInfo;
+
void MVKCmdBufferImageCopy::setContent(VkBuffer buffer,
VkImage image,
VkImageLayout imageLayout,
@@ -649,9 +664,8 @@
if ( !mtlBuffer || !mtlTexture ) { return; }
NSUInteger mtlBuffOffset = _buffer->getMTLBufferOffset();
- MTLPixelFormat mtlPixFmt = mtlTexture.pixelFormat;
+ MTLPixelFormat mtlPixFmt = _image->getMTLPixelFormat();
MVKCommandUse cmdUse = _toImage ? kMVKCommandUseCopyBufferToImage : kMVKCommandUseCopyImageToBuffer;
- id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(cmdUse);
for (auto& cpyRgn : _mtlBuffImgCopyRegions) {
@@ -689,11 +703,87 @@
blitOptions |= MTLBlitOptionStencilFromDepthStencil;
}
}
+#if MVK_MACOS
+ if (_toImage && mvkFormatTypeFromMTLPixelFormat(mtlPixFmt) == kMVKFormatCompressed &&
+ mtlTexture.textureType == MTLTextureType3D) {
+ // If we're copying to a compressed 3D image, the image data need to be decompressed.
+ // If we're copying to mip level 0, we can skip the copy and just decode
+ // directly into the image. Otherwise, we need to use an intermediate
+ // buffer.
+ MVKCmdCopyBufferToImageInfo info;
+ info.srcRowStride = bytesPerRow & 0xffffffff;
+ info.srcRowStrideHigh = bytesPerRow >> 32;
+ info.srcDepthStride = bytesPerImg & 0xffffffff;
+ info.srcDepthStrideHigh = bytesPerImg >> 32;
+ info.destRowStride = info.destRowStrideHigh = 0;
+ info.destDepthStride = info.destDepthStrideHigh = 0;
+ info.format = _image->getVkFormat();
+ info.offset = cpyRgn.imageOffset;
+ info.extent = cpyRgn.imageExtent;
+ bool needsTempBuff = cpyRgn.imageSubresource.mipLevel != 0;
+ id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(cmdUse);
+ id<MTLComputePipelineState> mtlComputeState = getCommandEncodingPool()->getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff);
+ [mtlComputeEnc pushDebugGroup: @"vkCmdCopyBufferToImage"];
+ [mtlComputeEnc setComputePipelineState: mtlComputeState];
+ [mtlComputeEnc setBuffer: mtlBuffer offset: mtlBuffOffset + cpyRgn.bufferOffset atIndex: 0];
+ MVKBuffer* tempBuff;
+ if (needsTempBuff) {
+ NSUInteger bytesPerDestRow = mvkMTLPixelFormatBytesPerRow(mtlTexture.pixelFormat, info.extent.width);
+ NSUInteger bytesPerDestImg = mvkMTLPixelFormatBytesPerLayer(mtlTexture.pixelFormat, bytesPerDestRow, info.extent.height);
+ // We're going to copy from the temporary buffer now, so use the
+ // temp buffer parameters in the copy below.
+ bytesPerRow = bytesPerDestRow;
+ bytesPerImg = bytesPerDestImg;
+ MVKBufferDescriptorData tempBuffData;
+ tempBuffData.size = bytesPerDestImg * mtlTxtSize.depth;
+ tempBuffData.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+ tempBuff = getCommandEncodingPool()->getTransferMVKBuffer(tempBuffData);
+ mtlBuffer = tempBuff->getMTLBuffer();
+ mtlBuffOffset = tempBuff->getMTLBufferOffset();
+ info.destRowStride = bytesPerDestRow & 0xffffffff;
+ info.destRowStrideHigh = bytesPerDestRow >> 32;
+ info.destDepthStride = bytesPerDestImg & 0xffffffff;
+ info.destDepthStrideHigh = bytesPerDestImg >> 32;
+ [mtlComputeEnc setBuffer: mtlBuffer offset: mtlBuffOffset atIndex: 1];
+ } else {
+ [mtlComputeEnc setTexture: mtlTexture atIndex: 0];
+ }
+ cmdEncoder->setComputeBytes(mtlComputeEnc, &info, sizeof(info), 2);
+
+ // Now work out how big to make the grid, and from there, the size and number of threadgroups.
+ // One thread is run per block. Each block decompresses to an m x n array of texels.
+ // So the size of the grid is (ceil(width/m), ceil(height/n), depth).
+ VkExtent2D blockExtent = mvkMTLPixelFormatBlockTexelSize(mtlPixFmt);
+ MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide(mtlTxtSize.width, blockExtent.width),
+ mvkCeilingDivide(mtlTxtSize.height, blockExtent.height),
+ mtlTxtSize.depth);
+ // Use four times the thread execution width as the threadgroup size.
+ MTLSize mtlTgrpSize = MTLSizeMake(2, 2, mtlComputeState.threadExecutionWidth);
+ // Then the number of threadgroups is (ceil(x/2), ceil(y/2), ceil(z/t)),
+ // where 't' is the thread execution width.
+ mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, 2);
+ mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, 2);
+ mtlGridSize.depth = mvkCeilingDivide(mtlGridSize.depth, mtlTgrpSize.depth);
+ // There may be extra threads, but that's OK; the shader does bounds checking to
+ // ensure it doesn't try to write out of bounds.
+ // Alternatively, we could use the newer -[MTLComputeCommandEncoder dispatchThreads:threadsPerThreadgroup:] method,
+ // but that needs Metal 2.0.
+ [mtlComputeEnc dispatchThreadgroups: mtlGridSize threadsPerThreadgroup: mtlTgrpSize];
+ [mtlComputeEnc popDebugGroup];
+
+ if (!needsTempBuff) { continue; }
+ } else {
+ mtlBuffOffset += cpyRgn.bufferOffset;
+ }
+#else
+ mtlBuffOffset += cpyRgn.bufferOffset;
+#endif
+ id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(cmdUse);
for (uint32_t lyrIdx = 0; lyrIdx < cpyRgn.imageSubresource.layerCount; lyrIdx++) {
if (_toImage) {
[mtlBlitEnc copyFromBuffer: mtlBuffer
- sourceOffset: (mtlBuffOffset + cpyRgn.bufferOffset + (bytesPerImg * lyrIdx))
+ sourceOffset: (mtlBuffOffset + (bytesPerImg * lyrIdx))
sourceBytesPerRow: bytesPerRow
sourceBytesPerImage: bytesPerImg
sourceSize: mtlTxtSize
@@ -709,7 +799,7 @@
sourceOrigin: mtlTxtOrigin
sourceSize: mtlTxtSize
toBuffer: mtlBuffer
- destinationOffset: (mtlBuffOffset + cpyRgn.bufferOffset + (bytesPerImg * lyrIdx))
+ destinationOffset: (mtlBuffOffset + (bytesPerImg * lyrIdx))
destinationBytesPerRow: bytesPerRow
destinationBytesPerImage: bytesPerImg
options: blitOptions];
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
index 1373cb0..ea33451 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@@ -91,12 +91,28 @@
*/
MVKImage* getTransferMVKImage(MVKImageDescriptorData& imgData);
+ /**
+ * Returns an MVKBuffer configured from the specified MTLBuffer configuration,
+ * with content held in Private storage. The object returned can be used as a
+ * temporary buffer during buffer-image transfers.
+ *
+ * The same buffer instance will be returned for two calls to this funciton with
+ * the same buffer descriptor data. This implies that the same buffer instance could
+ * be used by two transfers within the same encoder or queue. This is acceptable
+ * becuase the content only needs to be valid during the transfer, and it can be
+ * reused by subsequent transfers in the same encoding run.
+ */
+ MVKBuffer* getTransferMVKBuffer(MVKBufferDescriptorData& buffData);
+
/** Returns a MTLComputePipelineState for copying between two buffers with byte-aligned copy regions. */
id<MTLComputePipelineState> getCmdCopyBufferBytesMTLComputePipelineState();
/** Returns a MTLComputePipelineState for filling a buffer. */
id<MTLComputePipelineState> getCmdFillBufferMTLComputePipelineState();
+ /** Returns a MTLComputePipelineState for decompressing a buffer into a 3D image. */
+ id<MTLComputePipelineState> getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needsTempBuff);
+
/** Deletes all the internal resources. */
void clear();
@@ -114,6 +130,8 @@
std::unordered_map<MVKRPSKeyClearAtt, id<MTLRenderPipelineState>> _cmdClearMTLRenderPipelineStates;
std::unordered_map<MVKMTLDepthStencilDescriptorData, id<MTLDepthStencilState>> _mtlDepthStencilStates;
std::unordered_map<MVKImageDescriptorData, MVKImage*> _transferImages;
+ std::unordered_map<MVKBufferDescriptorData, MVKBuffer*> _transferBuffers;
+ std::unordered_map<MVKBufferDescriptorData, MVKDeviceMemory*> _transferBufferMemory;
MVKMTLBufferAllocator _mtlBufferAllocator;
id<MTLSamplerState> _cmdBlitImageLinearMTLSamplerState = nil;
id<MTLSamplerState> _cmdBlitImageNearestMTLSamplerState = nil;
@@ -123,5 +141,6 @@
id<MTLDepthStencilState> _cmdClearDefaultDepthStencilState = nil;
id<MTLComputePipelineState> _mtlCopyBufferBytesComputePipelineState = nil;
id<MTLComputePipelineState> _mtlFillBufferComputePipelineState = nil;
+ id<MTLComputePipelineState> _mtlCopyBufferToImage3DDecompressComputePipelineState[2] = {nil, nil};
};
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
index f3447d4..fe8d8eb 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@@ -97,6 +97,10 @@
MVK_ENC_REZ_ACCESS(_transferImages[imgData], newMVKImage(imgData));
}
+MVKBuffer* MVKCommandEncodingPool::getTransferMVKBuffer(MVKBufferDescriptorData& buffData) {
+ MVK_ENC_REZ_ACCESS(_transferBuffers[buffData], newMVKBuffer(buffData, _transferBufferMemory[buffData]));
+}
+
id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdCopyBufferBytesMTLComputePipelineState() {
MVK_ENC_REZ_ACCESS(_mtlCopyBufferBytesComputePipelineState, newCmdCopyBufferBytesMTLComputePipelineState());
}
@@ -105,6 +109,10 @@
MVK_ENC_REZ_ACCESS(_mtlFillBufferComputePipelineState, newCmdFillBufferMTLComputePipelineState());
}
+id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needsTempBuff) {
+ MVK_ENC_REZ_ACCESS(_mtlCopyBufferToImage3DDecompressComputePipelineState[needsTempBuff ? 1 : 0], newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff));
+}
+
void MVKCommandEncodingPool::clear() {
lock_guard<mutex> lock(_lock);
destroyMetalResources();
@@ -135,6 +143,12 @@
for (auto& pair : _transferImages) { _device->destroyImage(pair.second, nullptr); }
_transferImages.clear();
+ for (auto& pair : _transferBuffers) { _device->destroyBuffer(pair.second, nullptr); }
+ _transferBuffers.clear();
+
+ for (auto& pair : _transferBufferMemory) { _device->freeMemory(pair.second, nullptr); }
+ _transferBufferMemory.clear();
+
[_cmdBlitImageLinearMTLSamplerState release];
_cmdBlitImageLinearMTLSamplerState = nil;
@@ -155,5 +169,13 @@
[_mtlCopyBufferBytesComputePipelineState release];
_mtlCopyBufferBytesComputePipelineState = nil;
+
+ [_mtlFillBufferComputePipelineState release];
+ _mtlFillBufferComputePipelineState = nil;
+
+ [_mtlCopyBufferToImage3DDecompressComputePipelineState[0] release];
+ [_mtlCopyBufferToImage3DDecompressComputePipelineState[1] release];
+ _mtlCopyBufferToImage3DDecompressComputePipelineState[0] = nil;
+ _mtlCopyBufferToImage3DDecompressComputePipelineState[1] = nil;
}
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
index 46ac6b6..1950556 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
@@ -37,6 +37,41 @@
float2 v_texCoord; \n\
} VaryingsPosTex; \n\
\n\
+typedef size_t VkDeviceSize; \n\
+ \n\
+typedef enum : uint32_t { \n\
+ VK_FORMAT_BC1_RGB_UNORM_BLOCK = 131, \n\
+ VK_FORMAT_BC1_RGB_SRGB_BLOCK = 132, \n\
+ VK_FORMAT_BC1_RGBA_UNORM_BLOCK = 133, \n\
+ VK_FORMAT_BC1_RGBA_SRGB_BLOCK = 134, \n\
+ VK_FORMAT_BC2_UNORM_BLOCK = 135, \n\
+ VK_FORMAT_BC2_SRGB_BLOCK = 136, \n\
+ VK_FORMAT_BC3_UNORM_BLOCK = 137, \n\
+ VK_FORMAT_BC3_SRGB_BLOCK = 138, \n\
+} VkFormat; \n\
+ \n\
+typedef struct { \n\
+ uint32_t width; \n\
+ uint32_t height; \n\
+} VkExtent2D; \n\
+ \n\
+typedef struct { \n\
+ uint32_t width; \n\
+ uint32_t height; \n\
+ uint32_t depth; \n\
+} __attribute__((packed)) VkExtent3D; \n\
+ \n\
+typedef struct { \n\
+ int32_t x; \n\
+ int32_t y; \n\
+ int32_t z; \n\
+} __attribute__((packed)) VkOffset3D; \n\
+ \n"
+#define MVK_DECOMPRESS_CODE(...) #__VA_ARGS__
+#include "MVKDXTnCodec.def"
+#undef MVK_DECOMPRESS_CODE
+"\n\
+ \n\
vertex VaryingsPosTex vtxCmdBlitImage(AttributesPosTex attributes [[stage_in]]) { \n\
VaryingsPosTex varyings; \n\
varyings.v_position = float4(attributes.a_position, 0.0, 1.0); \n\
@@ -70,5 +105,68 @@
} \n\
}; \n\
\n\
+typedef struct { \n\
+ uint32_t srcRowStride; \n\
+ uint32_t srcRowStrideHigh; \n\
+ uint32_t srcDepthStride; \n\
+ uint32_t srcDepthStrideHigh; \n\
+ uint32_t destRowStride; \n\
+ uint32_t destRowStrideHigh; \n\
+ uint32_t destDepthStride; \n\
+ uint32_t destDepthStrideHigh; \n\
+ VkFormat format; \n\
+ VkOffset3D offset; \n\
+ VkExtent3D extent; \n\
+} CmdCopyBufferToImageInfo; \n\
+ \n\
+kernel void cmdCopyBufferToImage3DDecompressDXTn(constant uint8_t* src [[buffer(0)]], \n\
+ texture3d<float, access::write> dest [[texture(0)]], \n\
+ constant CmdCopyBufferToImageInfo& info [[buffer(2)]], \n\
+ uint3 pos [[thread_position_in_grid]]) { \n\
+ uint x = pos.x * 4, y = pos.y * 4, z = pos.z; \n\
+ VkDeviceSize blockByteCount = isBC1Format(info.format) ? 8 : 16; \n\
+ \n\
+ if (x >= info.extent.width || y >= info.extent.height || z >= info.extent.depth) { return; } \n\
+ \n\
+ src += z * info.srcDepthStride + y * info.srcRowStride / 4 + x * blockByteCount / 4; \n\
+ VkExtent2D blockExtent; \n\
+ blockExtent.width = min(info.extent.width - x, 4u); \n\
+ blockExtent.height = min(info.extent.height - y, 4u); \n\
+ uint pixels[16] = {0}; \n\
+ decompressDXTnBlock(src, pixels, blockExtent, 4 * sizeof(uint), info.format); \n\
+ for (uint j = 0; j < blockExtent.height; ++j) { \n\
+ for (uint i = 0; i < blockExtent.width; ++i) { \n\
+ // The pixel components are in BGRA order, but texture::write wants them \n\
+ // in RGBA order. We can fix that (ironically) with a BGRA swizzle. \n\
+ dest.write(unpack_unorm4x8_to_float(pixels[j * 4 + i]).bgra, \n\
+ uint3(info.offset.x + x + i, info.offset.y + y + j, info.offset.z + z)); \n\
+ } \n\
+ } \n\
+} \n\
+ \n\
+kernel void cmdCopyBufferToImage3DDecompressTempBufferDXTn(constant uint8_t* src [[buffer(0)]], \n\
+ device uint8_t* dest [[buffer(1)]], \n\
+ constant CmdCopyBufferToImageInfo& info [[buffer(2)]],\n\
+ uint3 pos [[thread_position_in_grid]]) { \n\
+ uint x = pos.x * 4, y = pos.y * 4, z = pos.z; \n\
+ VkDeviceSize blockByteCount = isBC1Format(info.format) ? 8 : 16; \n\
+ \n\
+ if (x >= info.extent.width || y >= info.extent.height || z >= info.extent.depth) { return; } \n\
+ \n\
+ src += z * info.srcDepthStride + y * info.srcRowStride / 4 + x * blockByteCount / 4; \n\
+ dest += z * info.destDepthStride + y * info.destRowStride + x * sizeof(uint); \n\
+ VkExtent2D blockExtent; \n\
+ blockExtent.width = min(info.extent.width - x, 4u); \n\
+ blockExtent.height = min(info.extent.height - y, 4u); \n\
+ uint pixels[16] = {0}; \n\
+ decompressDXTnBlock(src, pixels, blockExtent, 4 * sizeof(uint), info.format); \n\
+ device uint* destPixel = (device uint*)dest; \n\
+ for (uint j = 0; j < blockExtent.height; ++j) { \n\
+ for (uint i = 0; i < blockExtent.width; ++i) { \n\
+ destPixel[j * info.destRowStride / sizeof(uint) + i] = pixels[j * 4 + i]; \n\
+ } \n\
+ } \n\
+} \n\
+ \n\
";
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
index 274f2c3..28a331c 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
@@ -259,6 +259,43 @@
#pragma mark -
+#pragma mark MVKBufferDescriptorData
+
+/**
+ * Key to use for looking up cached MVKBuffer instances, and to create a new MVKBuffer when needed.
+ * The contents of this structure is a subset of the contents of the VkBufferCreateInfo structure.
+ *
+ * This structure can be used as a key in a std::map and std::unordered_map.
+ */
+typedef struct MVKBufferDescriptorData_t {
+ VkDeviceSize size;
+ VkBufferUsageFlags usage;
+
+ bool operator==(const MVKBufferDescriptorData_t& rhs) const {
+ return (memcmp(this, &rhs, sizeof(*this)) == 0);
+ }
+
+ std::size_t hash() const {
+ return mvkHash((uint64_t*)this, sizeof(*this) / sizeof(uint64_t));
+ }
+
+ MVKBufferDescriptorData_t() { memset(this, 0, sizeof(*this)); }
+
+} __attribute__((aligned(sizeof(uint64_t)))) MVKBufferDescriptorData;
+
+/**
+ * Hash structure implementation for MVKBufferDescriptorData in std namespace, so
+ * MVKBufferDescriptorData can be used as a key in a std::map and std::unordered_map.
+ */
+namespace std {
+ template <>
+ struct hash<MVKBufferDescriptorData> {
+ std::size_t operator()(const MVKBufferDescriptorData& k) const { return k.hash(); }
+ };
+}
+
+
+#pragma mark -
#pragma mark MVKCommandResourceFactory
/**
@@ -308,12 +345,23 @@
*/
MVKImage* newMVKImage(MVKImageDescriptorData& imgData);
+ /**
+ * Returns a new MVKBuffer configured with content held in Private storage.
+ * The buffer returned is bound to a new device memory, also returned, and
+ * can be used as a temporary buffer during buffer-image transfers.
+ */
+ MVKBuffer* newMVKBuffer(MVKBufferDescriptorData& buffData, MVKDeviceMemory*& buffMem);
+
/** Returns a new MTLComputePipelineState for copying between two buffers with byte-aligned copy regions. */
id<MTLComputePipelineState> newCmdCopyBufferBytesMTLComputePipelineState();
/** Returns a new MTLComputePipelineState for filling a buffer. */
id<MTLComputePipelineState> newCmdFillBufferMTLComputePipelineState();
+ /** Returns a new MTLComputePipelineState for copying between a buffer holding compressed data and a 3D image. */
+ id<MTLComputePipelineState> newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needTempBuf);
+
+
#pragma mark Construction
MVKCommandResourceFactory(MVKDevice* device);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
index 91602a2..b069977 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
@@ -20,6 +20,7 @@
#include "MVKCommandPipelineStateFactoryShaderSource.h"
#include "MVKPipeline.h"
#include "MVKFoundation.h"
+#include "MVKBuffer.h"
#include "NSString+MoltenVK.h"
#include "MTLRenderPipelineDescriptor+MoltenVK.h"
#include "MVKLogging.h"
@@ -342,6 +343,35 @@
return mvkImg;
}
+MVKBuffer* MVKCommandResourceFactory::newMVKBuffer(MVKBufferDescriptorData& buffData, MVKDeviceMemory*& buffMem) {
+ const VkBufferCreateInfo createInfo = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .size = buffData.size,
+ .usage = buffData.usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 0,
+ .pQueueFamilyIndices = nullptr,
+ };
+ MVKBuffer* mvkBuff = _device->createBuffer(&createInfo, nullptr);
+ const VkMemoryDedicatedAllocateInfo dedicatedInfo = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = nullptr,
+ .image = VK_NULL_HANDLE,
+ .buffer = (VkBuffer)mvkBuff,
+ };
+ const VkMemoryAllocateInfo allocInfo = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = &dedicatedInfo,
+ .allocationSize = buffData.size,
+ .memoryTypeIndex = _device->getVulkanMemoryTypeIndex(MTLStorageModePrivate),
+ };
+ buffMem = _device->allocateMemory(&allocInfo, nullptr);
+ mvkBuff->bindDeviceMemory(buffMem, 0);
+ return mvkBuff;
+}
+
id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdCopyBufferBytesMTLComputePipelineState() {
return newMTLComputePipelineState(getFunctionNamed("cmdCopyBufferBytes"));
}
@@ -350,6 +380,11 @@
return newMTLComputePipelineState(getFunctionNamed("cmdFillBuffer"));
}
+id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needTempBuf) {
+ return newMTLComputePipelineState(getFunctionNamed(needTempBuf ? "cmdCopyBufferToImage3DDecompressTempBufferDXTn" :
+ "cmdCopyBufferToImage3DDecompressDXTn"));
+}
+
#pragma mark Support methods
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 162404b..5335a56 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -30,6 +30,7 @@
#include "MVKRenderPass.h"
#include "MVKCommandPool.h"
#include "MVKFoundation.h"
+#include "MVKCodec.h"
#include "MVKEnvironment.h"
#include "MVKOSExtensions.h"
#include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h>
@@ -261,10 +262,22 @@
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
// Metal does not allow compressed or depth/stencil formats on 3D textures
- if (mvkFormatTypeFromVkFormat(format) == kMVKFormatDepthStencil ||
- mvkFormatTypeFromVkFormat(format) == kMVKFormatCompressed) {
+ if (mvkFormatTypeFromVkFormat(format) == kMVKFormatDepthStencil
+#if MVK_IOS
+ || mvkFormatTypeFromVkFormat(format) == kMVKFormatCompressed
+#endif
+ ) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
+#if MVK_MACOS
+ if (mvkFormatTypeFromVkFormat(format) == kMVKFormatCompressed) {
+ // If this is a compressed format and there's no codec, it isn't
+ // supported.
+ if (!mvkCanDecodeFormat(format) ) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
+ // Compressed multisampled textures aren't supported.
+ sampleCounts = VK_SAMPLE_COUNT_1_BIT;
+ }
+#endif
maxExt.width = pLimits->maxImageDimension3D;
maxExt.height = pLimits->maxImageDimension3D;
maxExt.depth = pLimits->maxImageDimension3D;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index 0a6b2e0..6eaa8a9 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -238,6 +238,7 @@
bool _hasExpectedTexelSize;
bool _usesTexelBuffer;
bool _isLinear;
+ bool _is3DCompressed;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index c8d7f5b..580afd6 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -23,6 +23,7 @@
#include "MVKFoundation.h"
#include "MVKEnvironment.h"
#include "MVKLogging.h"
+#include "MVKCodec.h"
#import "MTLTextureDescriptor+MoltenVK.h"
#import "MTLSamplerDescriptor+MoltenVK.h"
@@ -387,13 +388,30 @@
mvkDisableFlag(usage, MTLTextureUsageRenderTarget);
}
+#if MVK_MACOS
+ // If this is a 3D compressed texture, tell Metal we might write to it.
+ if (_is3DCompressed) {
+ mvkEnableFlag(usage, MTLTextureUsageShaderWrite);
+ }
+#endif
+
return usage;
}
// Returns an autoreleased Metal texture descriptor constructed from the properties of this image.
MTLTextureDescriptor* MVKImage::getMTLTextureDescriptor() {
MTLTextureDescriptor* mtlTexDesc = [[MTLTextureDescriptor alloc] init];
+#if MVK_MACOS
+ if (_is3DCompressed) {
+ // Metal doesn't yet support 3D compressed textures, so we'll decompress
+ // the texture ourselves. This, then, is the *uncompressed* format.
+ mtlTexDesc.pixelFormat = MTLPixelFormatBGRA8Unorm;
+ } else {
+ mtlTexDesc.pixelFormat = _mtlPixelFormat;
+ }
+#else
mtlTexDesc.pixelFormat = _mtlPixelFormat;
+#endif
mtlTexDesc.textureType = _mtlTextureType;
mtlTexDesc.width = _extent.width;
mtlTexDesc.height = _extent.height;
@@ -447,6 +465,27 @@
mtlRegion.origin = MTLOriginMake(0, 0, 0);
mtlRegion.size = mvkMTLSizeFromVkExtent3D(mipExtent);
+#if MVK_MACOS
+ std::unique_ptr<char[]> decompBuffer;
+ if (_is3DCompressed) {
+ // We cannot upload the texture data directly in this case. But we
+ // can upload the decompressed image data.
+ std::unique_ptr<MVKCodec> codec = mvkCreateCodec(getVkFormat());
+ if (!codec) {
+ mvkNotifyErrorWithText(VK_ERROR_FORMAT_NOT_SUPPORTED, "A 3D texture used a compressed format that MoltenVK does not yet support.");
+ return;
+ }
+ VkSubresourceLayout destLayout;
+ destLayout.rowPitch = 4 * mipExtent.width;
+ destLayout.depthPitch = destLayout.rowPitch * mipExtent.height;
+ destLayout.size = destLayout.depthPitch * mipExtent.depth;
+ decompBuffer = std::unique_ptr<char[]>(new char[destLayout.size]);
+ codec->decompress(decompBuffer.get(), pImgBytes, destLayout, imgLayout, mipExtent);
+ pImgBytes = decompBuffer.get();
+ imgLayout = destLayout;
+ }
+#endif
+
[getMTLTexture() replaceRegion: mtlRegion
mipmapLevel: imgSubRez.mipLevel
slice: imgSubRez.arrayLayer
@@ -499,9 +538,15 @@
setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Metal does not allow uncompressed views of compressed images."));
}
+#if MVK_IOS
if ( (pCreateInfo->imageType != VK_IMAGE_TYPE_2D) && (mvkFormatTypeFromVkFormat(pCreateInfo->format) == kMVKFormatCompressed) ) {
setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, compressed formats may only be used with 2D images."));
}
+#else
+ if ( (pCreateInfo->imageType != VK_IMAGE_TYPE_2D) && (mvkFormatTypeFromVkFormat(pCreateInfo->format) == kMVKFormatCompressed) && !mvkCanDecodeFormat(pCreateInfo->format) ) {
+ setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, compressed formats may only be used with 2D images."));
+ }
+#endif
if ( (pCreateInfo->imageType != VK_IMAGE_TYPE_2D) && (mvkFormatTypeFromVkFormat(pCreateInfo->format) == kMVKFormatDepthStencil) ) {
setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, depth/stencil formats may only be used with 2D images."));
}
@@ -552,6 +597,7 @@
_hasExpectedTexelSize = (mvkMTLPixelFormatBytesPerBlock(_mtlPixelFormat) == mvkVkFormatBytesPerBlock(pCreateInfo->format));
_isLinear = validateLinear(pCreateInfo);
_usesTexelBuffer = false;
+ _is3DCompressed = _mtlTextureType == MTLTextureType3D && mvkFormatTypeFromMTLPixelFormat(_mtlPixelFormat) == kMVKFormatCompressed;
_byteAlignment = _isLinear ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format) : mvkEnsurePowerOfTwo(mvkVkFormatBytesPerBlock(pCreateInfo->format));
diff --git a/MoltenVK/MoltenVK/Utility/MVKCodec.cpp b/MoltenVK/MoltenVK/Utility/MVKCodec.cpp
new file mode 100644
index 0000000..62b9b2f
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKCodec.cpp
@@ -0,0 +1,125 @@
+/*
+ * MVKCodec.cpp
+ *
+ * Copyright (c) 2018 Chip Davis for CodeWeavers
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "MVKCodec.h"
+
+#include <algorithm>
+#include <simd/simd.h>
+
+
+using simd::float3;
+using simd::float4;
+using simd::pow;
+using simd::select;
+
+static uint32_t pack_float_to_unorm4x8(float4 x) {
+ return ((((uint8_t)(x.r * 255)) & 0x000000ff) | ((((uint8_t)(x.g * 255)) << 8) & 0x0000ff00) |
+ ((((uint8_t)(x.b * 255)) & 0x00ff0000) << 16) | ((((uint8_t)(x.a * 255)) << 24) & 0xff000000));
+}
+
+static float3 unpack_unorm565_to_float(uint16_t x) {
+ return simd::make_float3(((x >> 11) & 0x1f) / 31.0f, ((x >> 5) & 0x3f) / 63.0f, (x & 0x1f) / 31.0f);
+}
+
+
+/** Texture codec for DXTn (i.e. BC[1-3]) compressed data.
+ *
+ * This implementation is largely derived from Wine, from code originally
+ * written by Connor McAdams.
+ */
+class MVKDXTnCodec : public MVKCodec {
+
+public:
+
+ void decompress(void* pDest, const void* pSrc, const VkSubresourceLayout& destLayout, const VkSubresourceLayout& srcLayout, VkExtent3D extent) override {
+ VkDeviceSize blockByteCount;
+ const uint8_t* pSrcRow;
+ const uint8_t* pSrcSlice = (const uint8_t*)pSrc;
+ uint8_t* pDestRow;
+ uint8_t* pDestSlice = (uint8_t*)pDest;
+
+ blockByteCount = isBC1Format(_format) ? 8 : 16;
+
+ for (uint32_t z = 0; z < extent.depth; ++z) {
+ pSrcRow = pSrcSlice;
+ pDestRow = pDestSlice;
+ for (uint32_t y = 0; y < extent.height; y += 4) {
+ for (uint32_t x = 0; x < extent.width; x += 4) {
+ VkExtent2D blockExtent;
+ blockExtent.width = std::min(extent.width - x, 4u);
+ blockExtent.height = std::min(extent.height - y, 4u);
+ decompressDXTnBlock(pSrcRow + x * (blockByteCount / 4),
+ pDestRow + x * 4, blockExtent, destLayout.rowPitch, _format);
+ }
+ pSrcRow += srcLayout.rowPitch;
+ pDestRow += destLayout.rowPitch * 4;
+ }
+ pSrcSlice += srcLayout.depthPitch;
+ pDestSlice += destLayout.depthPitch;
+ }
+ }
+
+ /** Constructs an instance. */
+ MVKDXTnCodec(VkFormat format) : _format(format) {}
+
+private:
+
+#define constant const
+#define device
+#define thread
+#define MVK_DECOMPRESS_CODE(...) __VA_ARGS__
+#include "MVKDXTnCodec.def"
+#undef MVK_DECOMPRESS_CODE
+
+ VkFormat _format;
+};
+
+std::unique_ptr<MVKCodec> mvkCreateCodec(VkFormat format) {
+ switch (format) {
+ case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+ case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ case VK_FORMAT_BC2_UNORM_BLOCK:
+ case VK_FORMAT_BC2_SRGB_BLOCK:
+ case VK_FORMAT_BC3_UNORM_BLOCK:
+ case VK_FORMAT_BC3_SRGB_BLOCK:
+ return std::unique_ptr<MVKCodec>(new MVKDXTnCodec(format));
+
+ default:
+ return nullptr;
+ }
+}
+
+bool mvkCanDecodeFormat(VkFormat format) {
+ switch (format) {
+ case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+ case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ case VK_FORMAT_BC2_UNORM_BLOCK:
+ case VK_FORMAT_BC2_SRGB_BLOCK:
+ case VK_FORMAT_BC3_UNORM_BLOCK:
+ case VK_FORMAT_BC3_SRGB_BLOCK:
+ return true;
+
+ default:
+ return false;
+ }
+}
diff --git a/MoltenVK/MoltenVK/Utility/MVKCodec.h b/MoltenVK/MoltenVK/Utility/MVKCodec.h
new file mode 100644
index 0000000..ddff1ec
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKCodec.h
@@ -0,0 +1,50 @@
+/*
+ * MVKCodec.h
+ *
+ * Copyright (c) 2018 Chip Davis for CodeWeavers
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#pragma once
+
+#include "MVKFoundation.h"
+
+#include <string>
+
+
+#pragma mark Texture data codecs
+
+/**
+ * This is the base class implemented by all codecs supported by MoltenVK.
+ * Objects of this class are used to decompress texture data for upload to a
+ * 3D texture.
+ */
+class MVKCodec {
+
+public:
+
+ /** Decompresses compressed texture data for upload. */
+ virtual void decompress(void* pDest, const void* pSrc, const VkSubresourceLayout& destLayout, const VkSubresourceLayout& srcLayout, VkExtent3D extent) = 0;
+
+ /** Destructor. */
+ virtual ~MVKCodec() = default;
+
+};
+
+/** Returns an appropriate codec for the given format, or nullptr if the format is not supported. */
+std::unique_ptr<MVKCodec> mvkCreateCodec(VkFormat format);
+
+/** Returns whether or not the given format can be decompressed. */
+bool mvkCanDecodeFormat(VkFormat format);
diff --git a/MoltenVK/MoltenVK/Utility/MVKDXTnCodec.def b/MoltenVK/MoltenVK/Utility/MVKDXTnCodec.def
new file mode 100644
index 0000000..22cf9da
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKDXTnCodec.def
@@ -0,0 +1,128 @@
+/*
+ * MVKDXTnCodec.def
+ *
+ * Copyright (c) 2018 Chip Davis for CodeWeavers
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef MVK_DECOMPRESS_CODE
+#error MVK_DECOMPRESS_CODE must be defined before including this file
+#endif
+
+MVK_DECOMPRESS_CODE(
+ static bool isBC1Format(VkFormat format) {
+ return format == VK_FORMAT_BC1_RGB_UNORM_BLOCK || format == VK_FORMAT_BC1_RGB_SRGB_BLOCK ||
+ format == VK_FORMAT_BC1_RGBA_UNORM_BLOCK || format == VK_FORMAT_BC1_RGBA_SRGB_BLOCK;
+ }
+
+ static bool isBC2Format(VkFormat format) {
+ return format == VK_FORMAT_BC2_UNORM_BLOCK || format == VK_FORMAT_BC2_SRGB_BLOCK;
+ }
+
+ static bool isBC3Format(VkFormat format) {
+ return format == VK_FORMAT_BC3_UNORM_BLOCK || format == VK_FORMAT_BC3_SRGB_BLOCK;
+ }
+
+ static bool isSRGBFormat(VkFormat format) {
+ return format == VK_FORMAT_BC1_RGB_SRGB_BLOCK || format == VK_FORMAT_BC1_RGBA_SRGB_BLOCK ||
+ format == VK_FORMAT_BC2_SRGB_BLOCK || format == VK_FORMAT_BC3_SRGB_BLOCK;
+ }
+
+ static void buildDXTnColourTable(uint16_t colour0, uint16_t colour1, thread float3* pColourTable, VkFormat format) {
+ pColourTable[0] = unpack_unorm565_to_float(colour0);
+ pColourTable[1] = unpack_unorm565_to_float(colour1);
+
+ if (isBC1Format(format) && colour0 <= colour1) {
+ pColourTable[2] = (pColourTable[0] + pColourTable[1]) / 2;
+ pColourTable[3] = float3(0);
+ } else {
+ for (uint32_t i = 0; i < 2; ++i) {
+ pColourTable[i + 2] = (2 * pColourTable[i] + pColourTable[1 - i]) / 3;
+ }
+ }
+ }
+
+ static void buildDXT5AlphaTable(uint8_t alpha0, uint8_t alpha1, thread float* pAlphaTable) {
+ pAlphaTable[0] = alpha0 / 255.0f;
+ pAlphaTable[1] = alpha1 / 255.0f;
+
+ if (alpha0 > alpha1) {
+ for (uint32_t i = 0; i < 6; ++i) {
+ pAlphaTable[2 + i] = ((6 - i) * pAlphaTable[0] + (i + 1) * pAlphaTable[1]) / 7;
+ }
+ } else {
+ for (uint32_t i = 0; i < 4; ++i) {
+ pAlphaTable[2 + i] = ((4 - i) * pAlphaTable[0] + (i + 1) * pAlphaTable[1]) / 5;
+ }
+ pAlphaTable[6] = 0;
+ pAlphaTable[7] = 1;
+ }
+ }
+
+ static float3 sRGBCorrect(float3 colour) {
+ return select(pow((colour + 0.055)/1.055, float3(2.4)), colour/12.92, colour <= 0.04045);
+ }
+
+ static void decompressDXTnBlock(constant void* pSrc, thread void* pDest, VkExtent2D extent, VkDeviceSize destRowPitch, VkFormat format) {
+ constant uint32_t* pSrcBlock = (constant uint32_t *)pSrc;
+ bool isBC1Alpha = false;
+ float3 colourTable[4];
+ float alphaTable[8];
+ size_t alphaBits;
+ uint32_t colourBits;
+
+ if (isBC1Format(format)) {
+ alphaBits = 0;
+
+ uint16_t colour0 = pSrcBlock[0] & 0xffff;
+ uint16_t colour1 = pSrcBlock[0] >> 16;
+ colourBits = pSrcBlock[1];
+ buildDXTnColourTable(colour0, colour1, colourTable, format);
+ if (colour0 <= colour1) { isBC1Alpha = true; }
+ } else {
+ alphaBits = pSrcBlock[0] | ((size_t)pSrcBlock[1] << 32);
+ if (isBC3Format(format)) {
+ buildDXT5AlphaTable(alphaBits & 0xff, (alphaBits >> 8) & 0xff, alphaTable);
+ alphaBits >>= 16;
+ }
+
+ colourBits = pSrcBlock[3];
+ buildDXTnColourTable(pSrcBlock[2] & 0xffff, pSrcBlock[2] >> 16, colourTable, format);
+ }
+
+ for (uint32_t y = 0; y < extent.height; ++y) {
+ thread uint32_t* pDestRow = (thread uint32_t *)((thread uint8_t *)pDest + y * destRowPitch);
+ for (uint32_t x = 0; x < extent.width; ++x) {
+ uint8_t colourIndex = (colourBits >> (y * 8 + x * 2)) & 0x3;
+ float alpha;
+ if (isBC1Format(format)) {
+ alpha = (!isBC1Alpha || colourIndex != 3) ? 1.0f : 0.0f;
+ } else if (isBC2Format(format)) {
+ alpha = ((alphaBits >> (y * 16 + x * 4)) & 0xf) / 15.0f;
+ } else { // Must be a BC3 format
+ alpha = alphaTable[(alphaBits >> (y * 12 + x * 3)) & 0x7];
+ }
+ float4 colour;
+ colour.rgb = colourTable[colourIndex];
+ if (isSRGBFormat(format)) {
+ // Convert sRGB back to linear.
+ colour.rgb = sRGBCorrect(colour.rgb);
+ }
+ colour.a = alpha;
+ pDestRow[x] = pack_float_to_unorm4x8(colour);
+ }
+ }
+ }
+)