Merge pull request #1030 from cdavis5e/clear-linear-images-mac

MVKCmdClearColorImage: Clear linear images on Mac with a shader.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
index 24e840b..5714c1b 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@@ -1166,8 +1166,8 @@
 
         // Validate
         MVKMTLFmtCaps mtlFmtCaps = cmdBuff->getPixelFormats()->getCapabilities(_image->getMTLPixelFormat(planeIndex));
-        if ((isDS && !mvkAreAllFlagsEnabled(mtlFmtCaps, kMVKMTLFmtCapsDSAtt)) ||
-            ( !isDS && !mvkAreAllFlagsEnabled(mtlFmtCaps, kMVKMTLFmtCapsColorAtt))) {
+		uint32_t reqCap = isDS ? kMVKMTLFmtCapsDSAtt : (_image->getIsLinear() ? kMVKMTLFmtCapsWrite : kMVKMTLFmtCapsColorAtt);
+        if (!mvkAreAllFlagsEnabled(mtlFmtCaps, reqCap)) {
             return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Format %s cannot be cleared on this device.", (isDS ? "DepthStencil" : "Color"), cmdBuff->getPixelFormats()->getName(_image->getVkFormat()));
         }
         
@@ -1196,6 +1196,31 @@
         id<MTLTexture> imgMTLTex = _image->getMTLTexture(MVKImage::getPlaneFromVkImageAspectFlags(srRange.aspectMask));
         if ( !imgMTLTex ) { continue; }
 
+#if MVK_MACOS
+        if ( _image->getIsLinear() ) {
+            // These images cannot be rendered. Instead, use a compute shader.
+            // Luckily for us, linear images only have one mip and one array layer under Metal.
+            assert( !isDS );
+            id<MTLComputePipelineState> mtlClearState = cmdEncoder->getCommandEncodingPool()->getCmdClearColorImageMTLComputePipelineState(pixFmts->getFormatType(_image->getVkFormat()));
+            id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseClearColorImage);
+            [mtlComputeEnc pushDebugGroup: @"vkCmdClearColorImage"];
+            [mtlComputeEnc setComputePipelineState: mtlClearState];
+            [mtlComputeEnc setTexture: imgMTLTex atIndex: 0];
+            cmdEncoder->setComputeBytes(mtlComputeEnc, &_clearValue, sizeof(_clearValue), 0);
+            MTLSize gridSize = mvkMTLSizeFromVkExtent3D(_image->getExtent3D());
+            MTLSize tgSize = MTLSizeMake(mtlClearState.threadExecutionWidth, 1, 1);
+            if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
+                [mtlComputeEnc dispatchThreads: gridSize threadsPerThreadgroup: tgSize];
+            } else {
+                MTLSize tgCount = MTLSizeMake(gridSize.width / tgSize.width, gridSize.height, gridSize.depth);
+                if (gridSize.width % tgSize.width) { tgCount.width += 1; }
+                [mtlComputeEnc dispatchThreadgroups: tgCount threadsPerThreadgroup: tgSize];
+            }
+            [mtlComputeEnc popDebugGroup];
+            continue;
+        }
+#endif
+
 		MTLRenderPassDescriptor* mtlRPDesc = [MTLRenderPassDescriptor renderPassDescriptor];
 		MTLRenderPassColorAttachmentDescriptor* mtlRPCADesc = nil;
 		MTLRenderPassDepthAttachmentDescriptor* mtlRPDADesc = nil;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 24b65a4..0889577 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -752,6 +752,7 @@
         case kMVKCommandUseCopyBufferToImage:   return @"vkCmdCopyBufferToImage ComputeEncoder";
         case kMVKCommandUseCopyImageToBuffer:   return @"vkCmdCopyImageToBuffer ComputeEncoder";
         case kMVKCommandUseFillBuffer:          return @"vkCmdFillBuffer ComputeEncoder";
+        case kMVKCommandUseClearColorImage:     return @"vkCmdClearColorImage ComputeEncoder";
         case kMVKCommandUseTessellationVertexTessCtl: return @"vkCmdDraw (vertex and tess control stages) ComputeEncoder";
         case kMVKCommandUseMultiviewInstanceCountAdjust: return @"vkCmdDraw (multiview instance count adjustment) ComputeEncoder";
         case kMVKCommandUseCopyQueryPoolResults:return @"vkCmdCopyQueryPoolResults ComputeEncoder";
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
index 52e4704..eac9f3f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@@ -109,6 +109,11 @@
 	/** Returns a MTLComputePipelineState for filling a buffer. */
 	id<MTLComputePipelineState> getCmdFillBufferMTLComputePipelineState();
 
+#if MVK_MACOS
+	/** Returns a MTLComputePipelineState for clearing an image. Currently only used for 2D linear images on Mac. */
+	id<MTLComputePipelineState> getCmdClearColorImageMTLComputePipelineState(MVKFormatType type);
+#endif
+
 	/** Returns a MTLComputePipelineState for decompressing a buffer into a 3D image. */
 	id<MTLComputePipelineState> getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needsTempBuff);
 
@@ -151,6 +156,9 @@
     id<MTLDepthStencilState> _cmdClearDefaultDepthStencilState = nil;
     id<MTLComputePipelineState> _mtlCopyBufferBytesComputePipelineState = nil;
 	id<MTLComputePipelineState> _mtlFillBufferComputePipelineState = nil;
+#if MVK_MACOS
+	id<MTLComputePipelineState> _mtlClearColorImageComputePipelineState[3] = {nil, nil, nil};
+#endif
 	id<MTLComputePipelineState> _mtlCopyBufferToImage3DDecompressComputePipelineState[2] = {nil, nil};
 	id<MTLComputePipelineState> _mtlDrawIndirectMultiviewConvertBuffersComputePipelineState[2] = {nil, nil};
 	id<MTLComputePipelineState> _mtlDrawIndirectTessConvertBuffersComputePipelineState[2] = {nil, nil};
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
index da0e661..75c8c48 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@@ -102,6 +102,30 @@
 	MVK_ENC_REZ_ACCESS(_mtlFillBufferComputePipelineState, newCmdFillBufferMTLComputePipelineState(_commandPool));
 }
 
+#if MVK_MACOS
+static inline uint32_t getClearStateIndex(MVKFormatType type) {
+	switch (type) {
+		case kMVKFormatColorHalf:
+		case kMVKFormatColorFloat:
+			return 0;
+		case kMVKFormatColorInt8:
+		case kMVKFormatColorInt16:
+		case kMVKFormatColorInt32:
+			return 1;
+		case kMVKFormatColorUInt8:
+		case kMVKFormatColorUInt16:
+		case kMVKFormatColorUInt32:
+			return 2;
+		default:
+			return 0;
+	}
+}
+
+id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdClearColorImageMTLComputePipelineState(MVKFormatType type) {
+	MVK_ENC_REZ_ACCESS(_mtlClearColorImageComputePipelineState[getClearStateIndex(type)], newCmdClearColorImageMTLComputePipelineState(type, _commandPool));
+}
+#endif
+
 id<MTLComputePipelineState> MVKCommandEncodingPool::getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needsTempBuff) {
 	MVK_ENC_REZ_ACCESS(_mtlCopyBufferToImage3DDecompressComputePipelineState[needsTempBuff ? 1 : 0], newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff, _commandPool));
 }
@@ -178,6 +202,13 @@
     [_mtlFillBufferComputePipelineState release];
     _mtlFillBufferComputePipelineState = nil;
 
+    [_mtlClearColorImageComputePipelineState[0] release];
+    [_mtlClearColorImageComputePipelineState[1] release];
+    [_mtlClearColorImageComputePipelineState[2] release];
+    _mtlClearColorImageComputePipelineState[0] = nil;
+    _mtlClearColorImageComputePipelineState[1] = nil;
+    _mtlClearColorImageComputePipelineState[2] = nil;
+
     [_mtlCopyBufferToImage3DDecompressComputePipelineState[0] release];
     [_mtlCopyBufferToImage3DDecompressComputePipelineState[1] release];
     _mtlCopyBufferToImage3DDecompressComputePipelineState[0] = nil;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
index 124f6d9..68e60d7 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
@@ -91,13 +91,31 @@
     for (size_t i = 0; i < info.size; i++) {                                                                    \n\
         dst[i + info.dstOffset] = src[i + info.srcOffset];                                                      \n\
     }                                                                                                           \n\
-};                                                                                                              \n\
+}                                                                                                               \n\
                                                                                                                 \n\
 kernel void cmdFillBuffer(device uint32_t* dst [[ buffer(0) ]],                                                 \n\
                           constant uint32_t& fillValue [[ buffer(1) ]],                                         \n\
                           uint pos [[thread_position_in_grid]]) {                                               \n\
     dst[pos] = fillValue;                                                                                       \n\
-};                                                                                                              \n\
+}                                                                                                               \n\
+                                                                                                                \n\
+kernel void cmdClearColorImage2DFloat(texture2d<float, access::write> dst [[ texture(0) ]],                     \n\
+                                      constant float4& clearValue [[ buffer(0) ]],                              \n\
+                                      uint2 pos [[thread_position_in_grid]]) {                                  \n\
+    dst.write(clearValue, pos);                                                                                 \n\
+}                                                                                                               \n\
+                                                                                                                \n\
+kernel void cmdClearColorImage2DUInt(texture2d<uint, access::write> dst [[ texture(0) ]],                       \n\
+                                     constant uint4& clearValue [[ buffer(0) ]],                                \n\
+                                     uint2 pos [[thread_position_in_grid]]) {                                   \n\
+    dst.write(clearValue, pos);                                                                                 \n\
+}                                                                                                               \n\
+                                                                                                                \n\
+kernel void cmdClearColorImage2DInt(texture2d<int, access::write> dst [[ texture(0) ]],                         \n\
+                                    constant int4& clearValue [[ buffer(0) ]],                                  \n\
+                                    uint2 pos [[thread_position_in_grid]]) {                                    \n\
+    dst.write(clearValue, pos);                                                                                 \n\
+}                                                                                                               \n\
                                                                                                                 \n\
 typedef struct {                                                                                                \n\
     uint32_t srcRowStride;                                                                                      \n\
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
index 25327ac..7006fe4 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
@@ -417,6 +417,12 @@
 	/** Returns a new MTLComputePipelineState for filling a buffer. */
 	id<MTLComputePipelineState> newCmdFillBufferMTLComputePipelineState(MVKVulkanAPIDeviceObject* owner);
 
+#if MVK_MACOS
+	/** Returns a new MTLComputePipelineState for clearing an image. */
+	id<MTLComputePipelineState> newCmdClearColorImageMTLComputePipelineState(MVKFormatType type,
+																			 MVKVulkanAPIDeviceObject* owner);
+#endif
+
 	/** Returns a new MTLComputePipelineState for copying between a buffer holding compressed data and a 3D image. */
 	id<MTLComputePipelineState> newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needTempBuf,
 																						   MVKVulkanAPIDeviceObject* owner);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
index a616a64..751fb18 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
@@ -410,6 +410,34 @@
 	return newMTLComputePipelineState("cmdFillBuffer", owner);
 }
 
+#if MVK_MACOS
+id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdClearColorImageMTLComputePipelineState(MVKFormatType type,
+					MVKVulkanAPIDeviceObject* owner) {
+	const char* funcName;
+	switch (type) {
+		case kMVKFormatColorHalf:
+		case kMVKFormatColorFloat:
+			funcName = "cmdClearColorImage2DFloat";
+			break;
+		case kMVKFormatColorInt8:
+		case kMVKFormatColorInt16:
+		case kMVKFormatColorInt32:
+			funcName = "cmdClearColorImage2DInt";
+			break;
+		case kMVKFormatColorUInt8:
+		case kMVKFormatColorUInt16:
+		case kMVKFormatColorUInt32:
+			funcName = "cmdClearColorImage2DUInt";
+			break;
+		default:
+			owner->reportError(VK_ERROR_FORMAT_NOT_SUPPORTED,
+							   "Format type %u is not supported for clearing with a compute shader.", type);
+			return nil;
+	}
+	return newMTLComputePipelineState(funcName, owner);
+}
+#endif
+
 id<MTLComputePipelineState> MVKCommandResourceFactory::newCmdCopyBufferToImage3DDecompressMTLComputePipelineState(bool needTempBuf,
 																												  MVKVulkanAPIDeviceObject* owner) {
 	return newMTLComputePipelineState(needTempBuf
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
index 65caf00..9fb584e 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
@@ -477,6 +477,14 @@
 
 		mvkEnableFlags(mtlUsage, MTLTextureUsageShaderWrite);
 	}
+#if MVK_MACOS
+    // Clearing a linear image may use shader writes.
+    if (mvkIsAnyFlagEnabled(vkImageUsageFlags, VK_IMAGE_USAGE_TRANSFER_DST_BIT) &&
+        mvkIsAnyFlagEnabled(mtlFmtCaps, kMVKMTLFmtCapsWrite) && isLinear) {
+
+		mvkEnableFlags(mtlUsage, MTLTextureUsageShaderWrite);
+    }
+#endif
 
 	// Render to but only if format supports rendering...
 	if (mvkIsAnyFlagEnabled(vkImageUsageFlags, (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |