Merge pull request #785 from billhollings/master
Fix tessellation break when control stage declares but does not use position builtin.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index a7298a3..071c359 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -19,11 +19,14 @@
Released TBD
+- Add support for extensions:
+ - `VK_EXT_inline_uniform_block`
- Support linear filtering when using `vkCmdBlitImage()`.
- Clamp image copy extents to image extent.
- Fix crash in `fetchDependencies` on build paths containing spaces.
- Fix image subresource sizing calculations for heap-based textures.
- Fix `MTLHeap` memory leak in `MVKDeviceMemory`.
+- Fix tessellation break when control stage declares but does not use position builtin.
- Support *Xcode 11.2*.
diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision
index 7ab941b..57d5a9f 100644
--- a/ExternalRevisions/SPIRV-Cross_repo_revision
+++ b/ExternalRevisions/SPIRV-Cross_repo_revision
@@ -1 +1 @@
-b10f5d48c959c5600c5e103a6b955b6b1b59cdff
+96a276c2ca86e81530313786e99e2b95623898ca
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
index 87e1128..a1cb774 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
@@ -110,7 +110,7 @@
if (pipeline->isTessellationPipeline()) {
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
- patchCount = (uint32_t)mvkCeilingDivide(_vertexCount, inControlPointCount);
+ patchCount = mvkCeilingDivide(_vertexCount, inControlPointCount);
}
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
@@ -308,7 +308,7 @@
if (pipeline->isTessellationPipeline()) {
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
- patchCount = (uint32_t)mvkCeilingDivide(_indexCount, inControlPointCount);
+ patchCount = mvkCeilingDivide(_indexCount, inControlPointCount);
}
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
@@ -544,7 +544,7 @@
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
- patchCount = (uint32_t)mvkCeilingDivide(vertexCount, inControlPointCount);
+ patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
@@ -614,7 +614,7 @@
&_drawCount,
sizeof(_drawCount),
5);
- [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
+ [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
@@ -783,7 +783,7 @@
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
- patchCount = (uint32_t)mvkCeilingDivide(vertexCount, inControlPointCount);
+ patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
@@ -842,7 +842,7 @@
&_drawCount,
sizeof(_drawCount),
5);
- [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
+ [mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
// We actually need to make a copy of the index buffer, regardless of whether
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
index e15467f..82ced58 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@@ -825,15 +825,15 @@
// One thread is run per block. Each block decompresses to an m x n array of texels.
// So the size of the grid is (ceil(width/m), ceil(height/n), depth).
VkExtent2D blockExtent = mvkMTLPixelFormatBlockTexelSize(mtlPixFmt);
- MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide(mtlTxtSize.width, blockExtent.width),
- mvkCeilingDivide(mtlTxtSize.height, blockExtent.height),
+ MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide<NSUInteger>(mtlTxtSize.width, blockExtent.width),
+ mvkCeilingDivide<NSUInteger>(mtlTxtSize.height, blockExtent.height),
mtlTxtSize.depth);
// Use four times the thread execution width as the threadgroup size.
MTLSize mtlTgrpSize = MTLSizeMake(2, 2, mtlComputeState.threadExecutionWidth);
// Then the number of threadgroups is (ceil(x/2), ceil(y/2), ceil(z/t)),
// where 't' is the thread execution width.
- mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, 2);
- mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, 2);
+ mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, mtlTgrpSize.width);
+ mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, mtlTgrpSize.height);
mtlGridSize.depth = mvkCeilingDivide(mtlGridSize.depth, mtlTgrpSize.depth);
// There may be extra threads, but that's OK; the shader does bounds checking to
// ensure it doesn't try to write out of bounds.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 48a3f6e..5dfa27f 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -507,9 +507,10 @@
}
static uint32_t sizeOfOutput(const SPIRVShaderOutput& output) {
+ if ( !output.isUsed ) { return 0; } // Unused outputs consume no buffer space.
+
uint32_t vecWidth = output.vecWidth;
- // Round up to 4 elements for 3-vectors, since that reflects how Metal lays them out.
- if (vecWidth == 3) { vecWidth = 4; }
+ if (vecWidth == 3) { vecWidth = 4; } // Metal 3-vectors consume same as 4-vectors.
switch (output.baseType) {
case SPIRType::SByte:
case SPIRType::UByte:
diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
index 0fddedd..4481344 100644
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@@ -141,12 +141,6 @@
#pragma mark -
#pragma mark Alignment functions
-/** Returns the result of an unsigned integer division, rounded up. */
-static inline size_t mvkCeilingDivide(size_t numerator, size_t denominator) {
- if (denominator == 1) { return numerator; } // Short circuit for this very common usecase.
- return (numerator + denominator - 1) / denominator;
-}
-
/** Returns whether the specified value is a power-of-two. */
static inline bool mvkIsPowerOfTwo(uintptr_t value) {
// Test POT: (x != 0) && ((x & (x - 1)) == 0)
@@ -348,6 +342,13 @@
return std::min(std::max(val, lower), upper);
}
+/** Returns the result of a division, rounded up. */
+template<typename T>
+T mvkCeilingDivide(T numerator, T denominator) {
+ // Short circuit very common usecase of dividing by one.
+ return (denominator == 1) ? numerator : (numerator + denominator - 1) / denominator;
+}
+
/**
* Returns a hash value calculated from the specified array of numeric elements,
* using the DJB2a algorithm: hash = (hash * 33) ^ value.
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVReflection.cpp b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVReflection.cpp
index d6eefce..4a58885 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVReflection.cpp
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVReflection.cpp
@@ -129,6 +129,7 @@
reflect.set_entry_point(entryName, model);
}
reflect.compile();
+ reflect.update_active_builtins();
outputs.clear();
@@ -136,11 +137,13 @@
parser.get_parsed_ir().for_each_typed_id<SPIRV_CROSS_NAMESPACE::SPIRVariable>([&reflect, &outputs, model, addSat](uint32_t varID, const SPIRV_CROSS_NAMESPACE::SPIRVariable& var) {
if (var.storage != spv::StorageClassOutput) { return; }
+ bool isUsed = true;
const auto* type = &reflect.get_type(reflect.get_type_from_variable(varID).parent_type);
bool patch = reflect.has_decoration(varID, spv::DecorationPatch);
auto biType = spv::BuiltInMax;
if (reflect.has_decoration(varID, spv::DecorationBuiltIn)) {
biType = (spv::BuiltIn)reflect.get_decoration(varID, spv::DecorationBuiltIn);
+ isUsed = reflect.has_active_builtin(biType, var.storage);
}
uint32_t loc = -1;
if (reflect.has_decoration(varID, spv::DecorationLocation)) {
@@ -160,30 +163,31 @@
patch = reflect.has_member_decoration(type->self, i, spv::DecorationPatch);
if (reflect.has_member_decoration(type->self, i, spv::DecorationBuiltIn)) {
biType = (spv::BuiltIn)reflect.get_member_decoration(type->self, i, spv::DecorationBuiltIn);
+ isUsed = reflect.has_active_builtin(biType, var.storage);
}
const SPIRV_CROSS_NAMESPACE::SPIRType& memberType = reflect.get_type(type->member_types[i]);
if (memberType.columns > 1) {
for (uint32_t i = 0; i < memberType.columns; i++) {
- outputs.push_back({memberType.basetype, memberType.vecsize, addSat(memberLoc, i), patch, biType});
+ outputs.push_back({memberType.basetype, memberType.vecsize, addSat(memberLoc, i), biType, patch, isUsed});
}
} else if (!memberType.array.empty()) {
for (uint32_t i = 0; i < memberType.array[0]; i++) {
- outputs.push_back({memberType.basetype, memberType.vecsize, addSat(memberLoc, i), patch, biType});
+ outputs.push_back({memberType.basetype, memberType.vecsize, addSat(memberLoc, i), biType, patch, isUsed});
}
} else {
- outputs.push_back({memberType.basetype, memberType.vecsize, memberLoc, patch, biType});
+ outputs.push_back({memberType.basetype, memberType.vecsize, memberLoc, biType, patch, isUsed});
}
}
} else if (type->columns > 1) {
for (uint32_t i = 0; i < type->columns; i++) {
- outputs.push_back({type->basetype, type->vecsize, addSat(loc, i), patch, biType});
+ outputs.push_back({type->basetype, type->vecsize, addSat(loc, i), biType, patch, isUsed});
}
} else if (!type->array.empty()) {
for (uint32_t i = 0; i < type->array[0]; i++) {
- outputs.push_back({type->basetype, type->vecsize, addSat(loc, i), patch, biType});
+ outputs.push_back({type->basetype, type->vecsize, addSat(loc, i), biType, patch, isUsed});
}
} else {
- outputs.push_back({type->basetype, type->vecsize, loc, patch, biType});
+ outputs.push_back({type->basetype, type->vecsize, loc, biType, patch, isUsed});
}
});
// Sort outputs by ascending location.
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVReflection.h b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVReflection.h
index f8867c6..cb85311 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVReflection.h
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVReflection.h
@@ -61,11 +61,14 @@
/** The location number of the output. */
uint32_t location;
+ /** If this is a builtin, the kind of builtin this is. */
+ spv::BuiltIn builtin;
+
/** Whether this is a per-patch or per-vertex output. Only meaningful for tessellation control shaders. */
bool perPatch;
- /** If this is a builtin, the kind of builtin this is. */
- spv::BuiltIn builtin;
+ /** Whether this output is actually used (populated) by the shader. */
+ bool isUsed;
};
#pragma mark -