Reduce memory footprint of retained MSL source code.
- Add MVKCompressor template class, and mvkCompress() & mvkDecompress()
functions to support general data compression.
- Add MVKConfiguration::shaderSourceCompressionAlgorithm and
env var MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM to support
compressing MSL shader source code held in a pipeline cache.
- Add MVKShaderCompilationPerformance::mslCompress and mslDecompress
to allow performance of MSL compression to be tracked and queried.
- Add support for logging performance stats accumulated in a VkDevice,
when it is destroyed. Good for CTS testing.
- Change MVKConfiguration::logActivityPerformanceInline boolean to
activityPerformanceLoggingStyle enumeration value.
- Add MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE environment variable and
build setting to set MVKConfiguration::activityPerformanceLoggingStyle value.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 7591865..4c0d117 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -32,8 +32,17 @@
- Change rounding of surface size provided by Metal from truncation to rounding-with-half-to-even.
- Queue submissions retain wait semaphores until `MTLCommandBuffer` finishes.
- Use a different visibility buffer for each `MTLCommandBuffer` in a queue submit.
-- Reduce memory footprint of retained MSL source code.
- Work around problems with using explicit LoD with arrayed depth images on Apple Silicon.
+- Reduce memory footprint of MSL source code retained in pipeline cache.
+- Add `MVKConfiguration::shaderSourceCompressionAlgorithm` and
+ env var `MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM` to support
+ compressing MSL shader source code held in a pipeline cache.
+- Add `MVKShaderCompilationPerformance::mslCompress` and `mslDecompress`
+ to allow performance of MSL compression to be tracked and queried.
+- Add support for logging performance stats accumulated in a `VkDevice`, when it is destroyed.
+- Change `MVKConfiguration::logActivityPerformanceInline` boolean to `activityPerformanceLoggingStyle` enumeration value.
+- Add `MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE` environment variable and
+ build setting to set `MVKConfiguration::activityPerformanceLoggingStyle` value.
- Update `VK_MVK_MOLTENVK_SPEC_VERSION` to version `37`.
diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
index 8878391..2c4b87e 100644
--- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
+++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
@@ -112,7 +112,7 @@
2FEA0AAA24902F9F00EEF3AD /* MVKOSExtensions.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */; };
2FEA0AAB24902F9F00EEF3AD /* MVKShaderModule.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7981C7DFB4800632CA3 /* MVKShaderModule.mm */; };
2FEA0AAC24902F9F00EEF3AD /* MVKSync.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB79E1C7DFB4800632CA3 /* MVKSync.mm */; };
- 2FEA0AAD24902F9F00EEF3AD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
+ 2FEA0AAD24902F9F00EEF3AD /* MVKCodec.mm in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.mm */; };
2FEA0AAE24902F9F00EEF3AD /* MVKCmdPipeline.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB76F1C7DFB4800632CA3 /* MVKCmdPipeline.mm */; };
2FEA0AAF24902F9F00EEF3AD /* MVKLayers.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7A11C7DFB4800632CA3 /* MVKLayers.mm */; };
2FEA0AB024902F9F00EEF3AD /* MVKFramebuffer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7881C7DFB4800632CA3 /* MVKFramebuffer.mm */; };
@@ -136,8 +136,8 @@
4553AEFC2251617100E8EBCD /* MVKBlockObserver.m in Sources */ = {isa = PBXBuildFile; fileRef = 4553AEF62251617100E8EBCD /* MVKBlockObserver.m */; };
4553AEFD2251617100E8EBCD /* MVKBlockObserver.h in Headers */ = {isa = PBXBuildFile; fileRef = 4553AEFA2251617100E8EBCD /* MVKBlockObserver.h */; };
4553AEFE2251617100E8EBCD /* MVKBlockObserver.h in Headers */ = {isa = PBXBuildFile; fileRef = 4553AEFA2251617100E8EBCD /* MVKBlockObserver.h */; };
- 45557A5221C9EFF3008868BD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
- 45557A5321C9EFF3008868BD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
+ 45557A5221C9EFF3008868BD /* MVKCodec.mm in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.mm */; };
+ 45557A5321C9EFF3008868BD /* MVKCodec.mm in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.mm */; };
45557A5421C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
45557A5521C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
A9096E5E1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; };
@@ -428,7 +428,7 @@
453638312508A4C7000EFFD3 /* MTLRenderPassDepthAttachmentDescriptor+MoltenVK.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "MTLRenderPassDepthAttachmentDescriptor+MoltenVK.h"; sourceTree = "<group>"; };
4553AEF62251617100E8EBCD /* MVKBlockObserver.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = MVKBlockObserver.m; sourceTree = "<group>"; };
4553AEFA2251617100E8EBCD /* MVKBlockObserver.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKBlockObserver.h; sourceTree = "<group>"; };
- 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MVKCodec.cpp; sourceTree = "<group>"; };
+ 45557A4D21C9EFF3008868BD /* MVKCodec.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCodec.mm; sourceTree = "<group>"; };
45557A5121C9EFF3008868BD /* MVKCodec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCodec.h; sourceTree = "<group>"; };
45557A5721CD83C3008868BD /* MVKDXTnCodec.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKDXTnCodec.def; sourceTree = "<group>"; };
A9096E5C1F81E16300DFBEA6 /* MVKCmdDispatch.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MVKCmdDispatch.h; sourceTree = "<group>"; };
@@ -691,8 +691,8 @@
A9D7104E25CDE05E00E38106 /* MVKBitArray.h */,
4553AEFA2251617100E8EBCD /* MVKBlockObserver.h */,
4553AEF62251617100E8EBCD /* MVKBlockObserver.m */,
- 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */,
45557A5121C9EFF3008868BD /* MVKCodec.h */,
+ 45557A4D21C9EFF3008868BD /* MVKCodec.mm */,
45557A5721CD83C3008868BD /* MVKDXTnCodec.def */,
A9A5E9C525C0822700E9085E /* MVKEnvironment.cpp */,
A98149431FB6A3F7005F00B4 /* MVKEnvironment.h */,
@@ -1368,7 +1368,7 @@
2FEA0AAA24902F9F00EEF3AD /* MVKOSExtensions.mm in Sources */,
2FEA0AAB24902F9F00EEF3AD /* MVKShaderModule.mm in Sources */,
2FEA0AAC24902F9F00EEF3AD /* MVKSync.mm in Sources */,
- 2FEA0AAD24902F9F00EEF3AD /* MVKCodec.cpp in Sources */,
+ 2FEA0AAD24902F9F00EEF3AD /* MVKCodec.mm in Sources */,
2FEA0AAE24902F9F00EEF3AD /* MVKCmdPipeline.mm in Sources */,
2FEA0AAF24902F9F00EEF3AD /* MVKLayers.mm in Sources */,
2FEA0AB024902F9F00EEF3AD /* MVKFramebuffer.mm in Sources */,
@@ -1427,7 +1427,7 @@
A9B51BD7225E986A00AC74D2 /* MVKOSExtensions.mm in Sources */,
A94FB80E1C7DFB4800632CA3 /* MVKShaderModule.mm in Sources */,
A94FB81A1C7DFB4800632CA3 /* MVKSync.mm in Sources */,
- 45557A5221C9EFF3008868BD /* MVKCodec.cpp in Sources */,
+ 45557A5221C9EFF3008868BD /* MVKCodec.mm in Sources */,
A94FB7BE1C7DFB4800632CA3 /* MVKCmdPipeline.mm in Sources */,
A94FB81E1C7DFB4800632CA3 /* MVKLayers.mm in Sources */,
A94FB7EE1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */,
@@ -1487,7 +1487,7 @@
A9B51BD8225E986A00AC74D2 /* MVKOSExtensions.mm in Sources */,
A94FB80F1C7DFB4800632CA3 /* MVKShaderModule.mm in Sources */,
A94FB81B1C7DFB4800632CA3 /* MVKSync.mm in Sources */,
- 45557A5321C9EFF3008868BD /* MVKCodec.cpp in Sources */,
+ 45557A5321C9EFF3008868BD /* MVKCodec.mm in Sources */,
A94FB7BF1C7DFB4800632CA3 /* MVKCmdPipeline.mm in Sources */,
A94FB81F1C7DFB4800632CA3 /* MVKLayers.mm in Sources */,
A94FB7EF1C7DFB4800632CA3 /* MVKFramebuffer.mm in Sources */,
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index a6b4803..7377f05 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -130,6 +130,24 @@
MVK_CONFIG_FAST_MATH_MAX_ENUM = 0x7FFFFFFF
} MVKConfigFastMath;
+/** Identifies available system data compression algorithms. */
+typedef enum MVKConfigCompressionAlgorithm {
+ MVK_CONFIG_COMPRESSION_ALGORITHM_NONE = 0, /**< No compression. */
+ MVK_CONFIG_COMPRESSION_ALGORITHM_LZFSE = 1, /**< Apple proprietary. Good balance of high performance and small compression size, particularly for larger data content. */
+ MVK_CONFIG_COMPRESSION_ALGORITHM_ZLIB = 2, /**< Open cross-platform ZLib format. For smaller data content, has better performance and smaller size than LZFSE. */
+ MVK_CONFIG_COMPRESSION_ALGORITHM_LZ4 = 3, /**< Fastest performance. Largest compression size. */
+ MVK_CONFIG_COMPRESSION_ALGORITHM_LZMA = 4, /**< Slowest performance. Smallest compression size, particular with larger content. */
+ MVK_CONFIG_COMPRESSION_ALGORITHM_MAX_ENUM = 0x7FFFFFFF,
+} MVKConfigCompressionAlgorithm;
+
+/** Identifies the style of activity performance logging to use. */
+typedef enum MVKConfigActivityPerformanceLoggingStyle {
+ MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT = 0, /**< Repeatedly log performance after a configured number of frames. */
+ MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE = 1, /**< Log immediately after each performance measurement. */
+ MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2, /**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */
+ MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM = 0x7FFFFFFF,
+} MVKConfigActivityPerformanceLoggingStyle;
+
/**
* MoltenVK configuration settings.
*
@@ -361,8 +379,8 @@
* If enabled, performance statistics, as defined by the MVKPerformanceStatistics structure,
* are collected, and can be retrieved via the vkGetPerformanceStatisticsMVK() function.
*
- * You can also use the performanceLoggingFrameCount or logActivityPerformanceInline
- * parameters to automatically log the performance statistics collected by this parameter.
+ * You can also use the activityPerformanceLoggingStyle and performanceLoggingFrameCount
+ * parameters to configure when to log the performance statistics collected by this parameter.
*
* The value of this parameter must be changed before creating a VkDevice,
* for the change to take effect.
@@ -770,21 +788,20 @@
VkBool32 useMTLHeap;
/**
- * Controls whether MoltenVK should log the performance of individual activities as they happen.
- * If this setting is enabled, activity performance will be logged when each activity happens.
- * If this setting is disabled, activity performance will be logged when frame peformance is
- * logged as determined by the performanceLoggingFrameCount value.
+ * Controls when MoltenVK should log activity performance events.
*
* The value of this parameter must be changed before creating a VkDevice,
* for the change to take effect.
*
* The initial value or this parameter is set by the
- * MVK_CONFIG_PERFORMANCE_LOGGING_INLINE
+ * MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE
* runtime environment variable or MoltenVK compile-time build setting.
- * If neither is set, this setting is disabled by default, and activity
- * performance will be logged only when frame activity is logged.
+ * If neither is set, this setting is set to
+ * MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT by default,
+ * and activity performance will be logged when frame activity is logged.
*/
- VkBool32 logActivityPerformanceInline;
+ MVKConfigActivityPerformanceLoggingStyle activityPerformanceLoggingStyle;
+#define logActivityPerformanceInline activityPerformanceLoggingStyle
/**
* Controls the Vulkan API version that MoltenVK should advertise in vkEnumerateInstanceVersion().
@@ -877,6 +894,27 @@
*/
MVKUseMetalArgumentBuffers useMetalArgumentBuffers;
+ /**
+ * Controls the type of compression to use on the MSL source code that is stored in memory
+ * for use in a pipeline cache. After being converted from SPIR-V, or loaded directly into
+ * a VkShaderModule, and then compiled into a MTLLibrary, the MSL source code is no longer
+ * needed for operation, but it is retained so it can be written out as part of a pipeline
+ * cache export. When a large number of shaders are loaded, this can consume significant
+ * memory. In such a case, this parameter can be used to compress the MSL source code that
+ * is awaiting export as part of a pipeline cache.
+ *
+ * The value of this parameter can be changed at any time, and will affect the size of
+ * the cached MSL from subsequent shader compilations.
+ *
+ * The initial value or this parameter is set by the
+ * MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM
+ * runtime environment variable or MoltenVK compile-time build setting.
+ * If neither is set, this setting is set to
+ * MVK_CONFIG_COMPRESSION_ALGORITHM_NONE by default,
+ * and MoltenVK will not compress the MSL source code after compilation into a MTLLibrary.
+ */
+ MVKConfigCompressionAlgorithm shaderSourceCompressionAlgorithm;
+
} MVKConfiguration;
/** Identifies the type of rounding Metal uses for float to integer conversions in particular calculatons. */
@@ -999,6 +1037,8 @@
MVKPerformanceTracker spirvToMSL; /** Convert SPIR-V to MSL source code. */
MVKPerformanceTracker mslCompile; /** Compile MSL source code into a MTLLibrary. */
MVKPerformanceTracker mslLoad; /** Load pre-compiled MSL code into a MTLLibrary. */
+ MVKPerformanceTracker mslCompress; /** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache. */
+ MVKPerformanceTracker mslDecompress; /** Decompress MSL source code to write the MSL when serializing a pipeline cache. */
MVKPerformanceTracker shaderLibraryFromCache; /** Retrieve a shader library from the cache, lazily creating it if needed. */
MVKPerformanceTracker functionRetrieval; /** Retrieve a MTLFunction from a MTLLibrary. */
MVKPerformanceTracker functionSpecialization; /** Specialize a retrieved MTLFunction. */
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index f58d9ee..c9b0e97 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -686,7 +686,9 @@
// Log call not locked. Very minor chance that the tracker data will be updated during log call,
// resulting in an inconsistent report. Not worth taking lock perf hit for rare inline reporting.
- if (_logActivityPerformanceInline) { logActivityPerformance(activityTracker, _performanceStatistics, true); }
+ if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) {
+ logActivityPerformance(activityTracker, _performanceStatistics, true);
+ }
}
};
@@ -891,7 +893,7 @@
id<MTLSamplerState> _defaultMTLSamplerState = nil;
id<MTLBuffer> _dummyBlitMTLBuffer = nil;
uint32_t _globalVisibilityQueryCount = 0;
- bool _logActivityPerformanceInline = false;
+ MVKConfigActivityPerformanceLoggingStyle _activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT;
bool _isPerformanceTracking = false;
bool _isCurrentlyAutoGPUCapturing = false;
bool _isUsingMetalArgumentBuffers = false;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 11ab944..60bb270 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -4000,7 +4000,7 @@
}
void MVKDevice::logPerformanceSummary() {
- if (_logActivityPerformanceInline) { return; }
+ if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { return; }
// Get a copy to minimize time under lock
MVKPerformanceStatistics perfStats;
@@ -4014,6 +4014,8 @@
logActivityPerformance(perfStats.shaderCompilation.spirvToMSL, perfStats);
logActivityPerformance(perfStats.shaderCompilation.mslCompile, perfStats);
logActivityPerformance(perfStats.shaderCompilation.mslLoad, perfStats);
+ logActivityPerformance(perfStats.shaderCompilation.mslCompress, perfStats);
+ logActivityPerformance(perfStats.shaderCompilation.mslDecompress, perfStats);
logActivityPerformance(perfStats.shaderCompilation.shaderLibraryFromCache, perfStats);
logActivityPerformance(perfStats.shaderCompilation.functionRetrieval, perfStats);
logActivityPerformance(perfStats.shaderCompilation.functionSpecialization, perfStats);
@@ -4028,6 +4030,8 @@
if (&activity == &perfStats.shaderCompilation.spirvToMSL) { return "Convert SPIR-V to MSL source code"; }
if (&activity == &perfStats.shaderCompilation.mslCompile) { return "Compile MSL source code into a MTLLibrary"; }
if (&activity == &perfStats.shaderCompilation.mslLoad) { return "Load pre-compiled MSL code into a MTLLibrary"; }
+ if (&activity == &perfStats.shaderCompilation.mslCompress) { return "Compress MSL source code after compiling a MTLLibrary"; }
+ if (&activity == &perfStats.shaderCompilation.mslDecompress) { return "Decompress MSL source code during pipeline cache write"; }
if (&activity == &perfStats.shaderCompilation.shaderLibraryFromCache) { return "Retrieve shader library from the cache"; }
if (&activity == &perfStats.shaderCompilation.functionRetrieval) { return "Retrieve a MTLFunction from a MTLLibrary"; }
if (&activity == &perfStats.shaderCompilation.functionSpecialization) { return "Specialize a retrieved MTLFunction"; }
@@ -4377,29 +4381,25 @@
void MVKDevice::initPerformanceTracking() {
_isPerformanceTracking = mvkConfig().performanceTracking;
- _logActivityPerformanceInline = mvkConfig().logActivityPerformanceInline;
+ _activityPerformanceLoggingStyle = mvkConfig().activityPerformanceLoggingStyle;
- MVKPerformanceTracker initPerf;
- initPerf.count = 0;
- initPerf.averageDuration = 0.0;
- initPerf.minimumDuration = 0.0;
- initPerf.maximumDuration = 0.0;
-
- _performanceStatistics.shaderCompilation.hashShaderCode = initPerf;
- _performanceStatistics.shaderCompilation.spirvToMSL = initPerf;
- _performanceStatistics.shaderCompilation.mslCompile = initPerf;
- _performanceStatistics.shaderCompilation.mslLoad = initPerf;
- _performanceStatistics.shaderCompilation.shaderLibraryFromCache = initPerf;
- _performanceStatistics.shaderCompilation.functionRetrieval = initPerf;
- _performanceStatistics.shaderCompilation.functionSpecialization = initPerf;
- _performanceStatistics.shaderCompilation.pipelineCompile = initPerf;
- _performanceStatistics.pipelineCache.sizePipelineCache = initPerf;
- _performanceStatistics.pipelineCache.writePipelineCache = initPerf;
- _performanceStatistics.pipelineCache.readPipelineCache = initPerf;
- _performanceStatistics.queue.mtlQueueAccess = initPerf;
- _performanceStatistics.queue.mtlCommandBufferCompletion = initPerf;
- _performanceStatistics.queue.nextCAMetalDrawable = initPerf;
- _performanceStatistics.queue.frameInterval = initPerf;
+ _performanceStatistics.shaderCompilation.hashShaderCode = {};
+ _performanceStatistics.shaderCompilation.spirvToMSL = {};
+ _performanceStatistics.shaderCompilation.mslCompile = {};
+ _performanceStatistics.shaderCompilation.mslLoad = {};
+ _performanceStatistics.shaderCompilation.mslCompress = {};
+ _performanceStatistics.shaderCompilation.mslDecompress = {};
+ _performanceStatistics.shaderCompilation.shaderLibraryFromCache = {};
+ _performanceStatistics.shaderCompilation.functionRetrieval = {};
+ _performanceStatistics.shaderCompilation.functionSpecialization = {};
+ _performanceStatistics.shaderCompilation.pipelineCompile = {};
+ _performanceStatistics.pipelineCache.sizePipelineCache = {};
+ _performanceStatistics.pipelineCache.writePipelineCache = {};
+ _performanceStatistics.pipelineCache.readPipelineCache = {};
+ _performanceStatistics.queue.mtlQueueAccess = {};
+ _performanceStatistics.queue.mtlCommandBufferCompletion = {};
+ _performanceStatistics.queue.nextCAMetalDrawable = {};
+ _performanceStatistics.queue.frameInterval = {};
}
void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo) {
@@ -4666,9 +4666,15 @@
}
MVKDevice::~MVKDevice() {
+ if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) {
+ MVKLogInfo("Device activity performance summary:");
+ logPerformanceSummary();
+ }
+
for (auto& queues : _queuesByQueueFamilyIndex) {
mvkDestroyContainerContents(queues);
}
+
if (_commandResourceFactory) { _commandResourceFactory->destroy(); }
[_globalVisibilityResultMTLBuffer release];
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 5923054..426d118 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -2004,7 +2004,7 @@
bool next() { return (++_index < (_pSLCache ? _pSLCache->_shaderLibraries.size() : 0)); }
SPIRVToMSLConversionConfiguration& getShaderConversionConfig() { return _pSLCache->_shaderLibraries[_index].first; }
- std::string& getMSL() { return _pSLCache->_shaderLibraries[_index].second->_msl; }
+ MVKCompressor<std::string>& getCompressedMSL() { return _pSLCache->_shaderLibraries[_index].second->getCompressedMSL(); }
SPIRVToMSLConversionResultInfo& getShaderConversionResultInfo() { return _pSLCache->_shaderLibraries[_index].second->_shaderConversionResultInfo; }
MVKShaderCacheIterator(MVKShaderLibraryCache* pSLCache) : _pSLCache(pSLCache) {}
@@ -2087,7 +2087,7 @@
writer(smKey);
writer(cacheIter.getShaderConversionConfig());
writer(cacheIter.getShaderConversionResultInfo());
- writer(cacheIter.getMSL());
+ writer(cacheIter.getCompressedMSL());
_device->addActivityPerformance(activityTracker, startTime);
}
}
@@ -2149,14 +2149,16 @@
SPIRVToMSLConversionConfiguration shaderConversionConfig;
reader(shaderConversionConfig);
- SPIRVToMSLConversionResult shaderConversionResult;
- reader(shaderConversionResult.resultInfo);
- reader(shaderConversionResult.msl);
+ SPIRVToMSLConversionResultInfo resultInfo;
+ reader(resultInfo);
+
+ MVKCompressor<std::string> compressedMSL;
+ reader(compressedMSL);
// Add the shader library to the staging cache.
MVKShaderLibraryCache* slCache = getShaderLibraryCache(smKey);
_device->addActivityPerformance(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime);
- slCache->addShaderLibrary(&shaderConversionConfig, shaderConversionResult);
+ slCache->addShaderLibrary(&shaderConversionConfig, resultInfo, compressedMSL);
break;
}
@@ -2394,6 +2396,13 @@
k.codeHash);
}
+template<class Archive, class C>
+void serialize(Archive & archive, MVKCompressor<C>& comp) {
+ archive(comp._compressed,
+ comp._uncompressedSize,
+ comp._algorithm);
+}
+
#pragma mark Construction
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
index 3d7e3e4..0296992 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
@@ -20,6 +20,7 @@
#include "MVKDevice.h"
#include "MVKSync.h"
+#include "MVKCodec.h"
#include "MVKSmallVector.h"
#include <MoltenVKShaderConverter/SPIRVToMSLConverter.h>
#include <MoltenVKShaderConverter/GLSLToSPIRVConverter.h>
@@ -84,10 +85,13 @@
*/
void setWorkgroupSize(uint32_t x, uint32_t y, uint32_t z);
- /** Constructs an instance from the specified MSL source code. */
- MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, const SPIRVToMSLConversionResult& conversionResult);
+ MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
+ const SPIRVToMSLConversionResult& conversionResult);
- /** Constructs an instance from the specified compiled MSL code data. */
+ MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
+ const SPIRVToMSLConversionResultInfo& resultInfo,
+ const MVKCompressor<std::string> compressedMSL);
+
MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
const void* mslCompiledCodeData,
size_t mslCompiledCodeLength);
@@ -106,11 +110,15 @@
MVKMTLFunction getMTLFunction(const VkSpecializationInfo* pSpecializationInfo, MVKShaderModule* shaderModule);
void handleCompilationError(NSError* err, const char* opDesc);
MTLFunctionConstant* getFunctionConstant(NSArray<MTLFunctionConstant*>* mtlFCs, NSUInteger mtlFCID);
+ void compileLibrary(const std::string& msl);
+ void compressMSL(const std::string& msl);
+ void decompressMSL(std::string& msl);
+ MVKCompressor<std::string>& getCompressedMSL() { return _compressedMSL; }
MVKVulkanAPIDeviceObject* _owner;
id<MTLLibrary> _mtlLibrary;
+ MVKCompressor<std::string> _compressedMSL;
SPIRVToMSLConversionResultInfo _shaderConversionResultInfo;
- std::string _msl;
};
@@ -146,8 +154,11 @@
friend MVKShaderModule;
MVKShaderLibrary* findShaderLibrary(SPIRVToMSLConversionConfiguration* pShaderConfig);
- MVKShaderLibrary* addShaderLibrary(SPIRVToMSLConversionConfiguration* pShaderConfig,
- SPIRVToMSLConversionResult& conversionResult);
+ MVKShaderLibrary* addShaderLibrary(const SPIRVToMSLConversionConfiguration* pShaderConfig,
+ const SPIRVToMSLConversionResult& conversionResult);
+ MVKShaderLibrary* addShaderLibrary(const SPIRVToMSLConversionConfiguration* pShaderConfig,
+ const SPIRVToMSLConversionResultInfo& resultInfo,
+ const MVKCompressor<std::string> compressedMSL);
void merge(MVKShaderLibraryCache* other);
MVKVulkanAPIDeviceObject* _owner;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
index ce2e997..7d7f6db 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
@@ -19,7 +19,6 @@
#include "MVKShaderModule.h"
#include "MVKPipeline.h"
#include "MVKFoundation.h"
-#include <string>
using namespace std;
@@ -140,18 +139,45 @@
wgSize.depth.size = z;
}
+// Sets the cached MSL source code, after first compressing it.
+void MVKShaderLibrary::compressMSL(const string& msl) {
+ MVKDevice* mvkDev = _owner->getDevice();
+ uint64_t startTime = mvkDev->getPerformanceTimestamp();
+ _compressedMSL.compress(msl, mvkConfig().shaderSourceCompressionAlgorithm);
+ mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime);
+}
+
+// Decompresses the cached MSL into the string.
+void MVKShaderLibrary::decompressMSL(string& msl) {
+ MVKDevice* mvkDev = _owner->getDevice();
+ uint64_t startTime = mvkDev->getPerformanceTimestamp();
+ _compressedMSL.decompress(msl);
+ mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime);
+}
+
MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
const SPIRVToMSLConversionResult& conversionResult) : _owner(owner) {
- MVKShaderLibraryCompiler* slc = new MVKShaderLibraryCompiler(_owner);
-
- NSString* nsSrc = [[NSString alloc] initWithUTF8String: conversionResult.msl.c_str()]; // temp retained
- _mtlLibrary = slc->newMTLLibrary(nsSrc, conversionResult.resultInfo); // retained
- [nsSrc release]; // release temp string
-
- slc->destroy();
-
_shaderConversionResultInfo = conversionResult.resultInfo;
- _msl = conversionResult.msl;
+ compressMSL(conversionResult.msl);
+ compileLibrary(conversionResult.msl);
+}
+
+MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
+ const SPIRVToMSLConversionResultInfo& resultInfo,
+ const MVKCompressor<std::string> compressedMSL) : _owner(owner) {
+ _shaderConversionResultInfo = resultInfo;
+ _compressedMSL = compressedMSL;
+ string msl;
+ decompressMSL(msl);
+ compileLibrary(msl);
+}
+
+void MVKShaderLibrary::compileLibrary(const string& msl) {
+ MVKShaderLibraryCompiler* slc = new MVKShaderLibraryCompiler(_owner);
+ NSString* nsSrc = [[NSString alloc] initWithUTF8String: msl.c_str()]; // temp retained
+ _mtlLibrary = slc->newMTLLibrary(nsSrc, _shaderConversionResultInfo); // retained
+ [nsSrc release]; // release temp string
+ slc->destroy();
}
MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner,
@@ -176,7 +202,7 @@
_owner = other._owner;
_mtlLibrary = [other._mtlLibrary retain];
_shaderConversionResultInfo = other._shaderConversionResultInfo;
- _msl = other._msl;
+ _compressedMSL = other._compressedMSL;
}
MVKShaderLibrary& MVKShaderLibrary::operator=(const MVKShaderLibrary& other) {
@@ -186,7 +212,7 @@
}
_owner = other._owner;
_shaderConversionResultInfo = other._shaderConversionResultInfo;
- _msl = other._msl;
+ _compressedMSL = other._compressedMSL;
return *this;
}
@@ -245,13 +271,22 @@
}
// Adds and returns a new shader library configured from the specified conversion configuration.
-MVKShaderLibrary* MVKShaderLibraryCache::addShaderLibrary(SPIRVToMSLConversionConfiguration* pShaderConfig,
- SPIRVToMSLConversionResult& conversionResult) {
+MVKShaderLibrary* MVKShaderLibraryCache::addShaderLibrary(const SPIRVToMSLConversionConfiguration* pShaderConfig,
+ const SPIRVToMSLConversionResult& conversionResult) {
MVKShaderLibrary* shLib = new MVKShaderLibrary(_owner, conversionResult);
_shaderLibraries.emplace_back(*pShaderConfig, shLib);
return shLib;
}
+// Adds and returns a new shader library configured from contents read from a pipeline cache.
+MVKShaderLibrary* MVKShaderLibraryCache::addShaderLibrary(const SPIRVToMSLConversionConfiguration* pShaderConfig,
+ const SPIRVToMSLConversionResultInfo& resultInfo,
+ const MVKCompressor<std::string> compressedMSL) {
+ MVKShaderLibrary* shLib = new MVKShaderLibrary(_owner, resultInfo, compressedMSL);
+ _shaderLibraries.emplace_back(*pShaderConfig, shLib);
+ return shLib;
+}
+
// Merge another shader library cache with this one. Handle null input.
void MVKShaderLibraryCache::merge(MVKShaderLibraryCache* other) {
if ( !other ) { return; }
diff --git a/MoltenVK/MoltenVK/Layers/MVKLayers.mm b/MoltenVK/MoltenVK/Layers/MVKLayers.mm
index 3405a56..040712b 100644
--- a/MoltenVK/MoltenVK/Layers/MVKLayers.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKLayers.mm
@@ -19,7 +19,6 @@
#include "MVKLayers.h"
#include "MVKEnvironment.h"
#include "MVKFoundation.h"
-#include "vk_mvk_moltenvk.h"
#include <mutex>
using namespace std;
diff --git a/MoltenVK/MoltenVK/Utility/MVKCodec.cpp b/MoltenVK/MoltenVK/Utility/MVKCodec.cpp
deleted file mode 100644
index 09669e4..0000000
--- a/MoltenVK/MoltenVK/Utility/MVKCodec.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * MVKCodec.cpp
- *
- * Copyright (c) 2018-2023 Chip Davis for CodeWeavers
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "MVKCodec.h"
-
-#include <algorithm>
-#include <simd/simd.h>
-
-
-using simd::float3;
-using simd::float4;
-using simd::pow;
-using simd::select;
-
-static uint32_t pack_float_to_unorm4x8(float4 x) {
- return ((((uint8_t)(x.r * 255)) & 0x000000ff) | ((((uint8_t)(x.g * 255)) << 8) & 0x0000ff00) |
- ((((uint8_t)(x.b * 255)) & 0x00ff0000) << 16) | ((((uint8_t)(x.a * 255)) << 24) & 0xff000000));
-}
-
-static float3 unpack_unorm565_to_float(uint16_t x) {
- return simd::make_float3(((x >> 11) & 0x1f) / 31.0f, ((x >> 5) & 0x3f) / 63.0f, (x & 0x1f) / 31.0f);
-}
-
-
-/** Texture codec for DXTn (i.e. BC[1-3]) compressed data.
- *
- * This implementation is largely derived from Wine, from code originally
- * written by Connor McAdams.
- */
-class MVKDXTnCodec : public MVKCodec {
-
-public:
-
- void decompress(void* pDest, const void* pSrc, const VkSubresourceLayout& destLayout, const VkSubresourceLayout& srcLayout, VkExtent3D extent) override {
- VkDeviceSize blockByteCount;
- const uint8_t* pSrcRow;
- const uint8_t* pSrcSlice = (const uint8_t*)pSrc;
- uint8_t* pDestRow;
- uint8_t* pDestSlice = (uint8_t*)pDest;
-
- blockByteCount = isBC1Format(_format) ? 8 : 16;
-
- for (uint32_t z = 0; z < extent.depth; ++z) {
- pSrcRow = pSrcSlice;
- pDestRow = pDestSlice;
- for (uint32_t y = 0; y < extent.height; y += 4) {
- for (uint32_t x = 0; x < extent.width; x += 4) {
- VkExtent2D blockExtent;
- blockExtent.width = std::min(extent.width - x, 4u);
- blockExtent.height = std::min(extent.height - y, 4u);
- decompressDXTnBlock(pSrcRow + x * (blockByteCount / 4),
- pDestRow + x * 4, blockExtent, destLayout.rowPitch, _format);
- }
- pSrcRow += srcLayout.rowPitch;
- pDestRow += destLayout.rowPitch * 4;
- }
- pSrcSlice += srcLayout.depthPitch;
- pDestSlice += destLayout.depthPitch;
- }
- }
-
- /** Constructs an instance. */
- MVKDXTnCodec(VkFormat format) : _format(format) {}
-
-private:
-
-#define constant const
-#define device
-#define thread
-#define MVK_DECOMPRESS_CODE(...) __VA_ARGS__
-#include "MVKDXTnCodec.def"
-#undef MVK_DECOMPRESS_CODE
-
- VkFormat _format;
-};
-
-std::unique_ptr<MVKCodec> mvkCreateCodec(VkFormat format) {
- switch (format) {
- case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
- case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
- case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
- case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
- case VK_FORMAT_BC2_UNORM_BLOCK:
- case VK_FORMAT_BC2_SRGB_BLOCK:
- case VK_FORMAT_BC3_UNORM_BLOCK:
- case VK_FORMAT_BC3_SRGB_BLOCK:
- return std::unique_ptr<MVKCodec>(new MVKDXTnCodec(format));
-
- default:
- return nullptr;
- }
-}
-
-bool mvkCanDecodeFormat(VkFormat format) {
- switch (format) {
- case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
- case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
- case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
- case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
- case VK_FORMAT_BC2_UNORM_BLOCK:
- case VK_FORMAT_BC2_SRGB_BLOCK:
- case VK_FORMAT_BC3_UNORM_BLOCK:
- case VK_FORMAT_BC3_SRGB_BLOCK:
- return true;
-
- default:
- return false;
- }
-}
diff --git a/MoltenVK/MoltenVK/Utility/MVKCodec.h b/MoltenVK/MoltenVK/Utility/MVKCodec.h
index 136f420..595a50f 100644
--- a/MoltenVK/MoltenVK/Utility/MVKCodec.h
+++ b/MoltenVK/MoltenVK/Utility/MVKCodec.h
@@ -19,17 +19,18 @@
#pragma once
-#include "MVKFoundation.h"
+#include "MVKEnvironment.h"
+#include <vector>
#include <string>
+#pragma mark -
#pragma mark Texture data codecs
/**
* This is the base class implemented by all codecs supported by MoltenVK.
- * Objects of this class are used to decompress texture data for upload to a
- * 3D texture.
+ * Objects of this class are used to decompress texture data for upload to a 3D texture.
*/
class MVKCodec {
@@ -43,8 +44,89 @@
};
+
+#pragma mark -
+#pragma mark General data compressor
+
+/**
+ * Holds compressed data, along with information allowing it to be decompressed again.
+ * The template class C must support the basic data container methods data(), size() and resize().
+ *
+ * THIS CLASS IS STREAMED OUT AS PART OF THE PIEPLINE CACHE.
+ * STURCTURAL CHANGES TO THIS CLASS MUST BE CAPTURED IN THE STREAMING LOGIC OF THE PIPELINE CACHE.
+ */
+template <class C>
+class MVKCompressor {
+
+public:
+
+ /**
+ * Compresses the content in the data container using the algorithm, and retains
+ * the compressed content. If an error occurs, or if the compressed data is actually
+ * larger (which can happen with some compression algorithms if the source is small),
+ * the uncompressed content is retained. Returns true if the content was successfully
+ * compressed, or returns false if the content was retained as uncompressed,
+ */
+ bool compress(const C& uncompressed, MVKConfigCompressionAlgorithm algorithm) {
+
+ _uncompressedSize = uncompressed.size();
+ _compressed.resize(_uncompressedSize);
+ _algorithm = algorithm;
+ size_t compSize = mvkCompress((uint8_t*)uncompressed.data(), uncompressed.size(),
+ _compressed.data(), _compressed.size(),
+ _algorithm);
+
+ bool wasCompressed = (compSize > 0);
+ if ( !wasCompressed ) {
+ _algorithm = MVK_CONFIG_COMPRESSION_ALGORITHM_NONE;
+ compSize = mvkCompress((uint8_t*)uncompressed.data(), uncompressed.size(),
+ _compressed.data(), _compressed.size(),
+ _algorithm);
+ }
+
+ _compressed.resize(compSize);
+ _compressed.shrink_to_fit();
+
+ return wasCompressed;
+ }
+
+ /** Decompress the retained compressed content into the data container. */
+ void decompress(C& uncompressed) {
+ uncompressed.resize(_uncompressedSize);
+ mvkDecompress(_compressed.data(), _compressed.size(),
+ (uint8_t*)uncompressed.data(), uncompressed.size(),
+ _algorithm);
+ }
+
+ std::vector<uint8_t> _compressed;
+ size_t _uncompressedSize = 0;
+ MVKConfigCompressionAlgorithm _algorithm = MVK_CONFIG_COMPRESSION_ALGORITHM_NONE;
+};
+
+
+#pragma mark -
+#pragma mark Support functions
+
/** Returns an appropriate codec for the given format, or nullptr if the format is not supported. */
std::unique_ptr<MVKCodec> mvkCreateCodec(VkFormat format);
/** Returns whether or not the given format can be decompressed. */
bool mvkCanDecodeFormat(VkFormat format);
+
+/**
+ * Compresses the source bytes into the destination bytes using a compression algorithm,
+ * and returns the number of bytes written to dstBytes. If an error occurs, or the compressed
+ * data is larger than dstSize, no data is copied to dstBytes, and zero is returned.
+ */
+size_t mvkCompress(const uint8_t* srcBytes, size_t srcSize,
+ uint8_t* dstBytes, size_t dstSize,
+ MVKConfigCompressionAlgorithm compAlgo);
+
+/**
+ * Decompresses the source bytes into the destination bytes using a compression algorithm,
+ * and returns the number of bytes written to dstBytes. If an error occurs, or the decompressed
+ * data is larger than dstSize, no data is copied to dstBytes, and zero is returned.
+ */
+size_t mvkDecompress(const uint8_t* srcBytes, size_t srcSize,
+ uint8_t* dstBytes, size_t dstSize,
+ MVKConfigCompressionAlgorithm compAlgo);
diff --git a/MoltenVK/MoltenVK/Utility/MVKCodec.mm b/MoltenVK/MoltenVK/Utility/MVKCodec.mm
new file mode 100644
index 0000000..7c07d6a
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKCodec.mm
@@ -0,0 +1,192 @@
+/*
+ * MVKCodec.cpp
+ *
+ * Copyright (c) 2018-2023 Chip Davis for CodeWeavers
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "MVKCodec.h"
+#include "MVKBaseObject.h"
+#include "MVKFoundation.h"
+
+#include <algorithm>
+#include <simd/simd.h>
+
+using namespace std;
+
+using simd::float3;
+using simd::float4;
+using simd::pow;
+using simd::select;
+
+static uint32_t pack_float_to_unorm4x8(float4 x) {
+ return ((((uint8_t)(x.r * 255)) & 0x000000ff) | ((((uint8_t)(x.g * 255)) << 8) & 0x0000ff00) |
+ ((((uint8_t)(x.b * 255)) & 0x00ff0000) << 16) | ((((uint8_t)(x.a * 255)) << 24) & 0xff000000));
+}
+
+static float3 unpack_unorm565_to_float(uint16_t x) {
+ return simd::make_float3(((x >> 11) & 0x1f) / 31.0f, ((x >> 5) & 0x3f) / 63.0f, (x & 0x1f) / 31.0f);
+}
+
+
+/** Texture codec for DXTn (i.e. BC[1-3]) compressed data.
+ *
+ * This implementation is largely derived from Wine, from code originally
+ * written by Connor McAdams.
+ */
+class MVKDXTnCodec : public MVKCodec {
+
+public:
+
+ void decompress(void* pDest, const void* pSrc, const VkSubresourceLayout& destLayout, const VkSubresourceLayout& srcLayout, VkExtent3D extent) override {
+ VkDeviceSize blockByteCount;
+ const uint8_t* pSrcRow;
+ const uint8_t* pSrcSlice = (const uint8_t*)pSrc;
+ uint8_t* pDestRow;
+ uint8_t* pDestSlice = (uint8_t*)pDest;
+
+ blockByteCount = isBC1Format(_format) ? 8 : 16;
+
+ for (uint32_t z = 0; z < extent.depth; ++z) {
+ pSrcRow = pSrcSlice;
+ pDestRow = pDestSlice;
+ for (uint32_t y = 0; y < extent.height; y += 4) {
+ for (uint32_t x = 0; x < extent.width; x += 4) {
+ VkExtent2D blockExtent;
+ blockExtent.width = min(extent.width - x, 4u);
+ blockExtent.height = min(extent.height - y, 4u);
+ decompressDXTnBlock(pSrcRow + x * (blockByteCount / 4),
+ pDestRow + x * 4, blockExtent, destLayout.rowPitch, _format);
+ }
+ pSrcRow += srcLayout.rowPitch;
+ pDestRow += destLayout.rowPitch * 4;
+ }
+ pSrcSlice += srcLayout.depthPitch;
+ pDestSlice += destLayout.depthPitch;
+ }
+ }
+
+ /** Constructs an instance. */
+ MVKDXTnCodec(VkFormat format) : _format(format) {}
+
+private:
+
+#define constant const
+#define device
+#define thread
+#define MVK_DECOMPRESS_CODE(...) __VA_ARGS__
+#include "MVKDXTnCodec.def"
+#undef MVK_DECOMPRESS_CODE
+
+ VkFormat _format;
+};
+
+
+#pragma mark -
+#pragma mark Support functions
+
+unique_ptr<MVKCodec> mvkCreateCodec(VkFormat format) {
+ switch (format) {
+ case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+ case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ case VK_FORMAT_BC2_UNORM_BLOCK:
+ case VK_FORMAT_BC2_SRGB_BLOCK:
+ case VK_FORMAT_BC3_UNORM_BLOCK:
+ case VK_FORMAT_BC3_SRGB_BLOCK:
+ return unique_ptr<MVKCodec>(new MVKDXTnCodec(format));
+
+ default:
+ return nullptr;
+ }
+}
+
+bool mvkCanDecodeFormat(VkFormat format) {
+ switch (format) {
+ case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+ case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ case VK_FORMAT_BC2_UNORM_BLOCK:
+ case VK_FORMAT_BC2_SRGB_BLOCK:
+ case VK_FORMAT_BC3_UNORM_BLOCK:
+ case VK_FORMAT_BC3_SRGB_BLOCK:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static NSDataCompressionAlgorithm getSystemCompressionAlgo(MVKConfigCompressionAlgorithm compAlgo) {
+ switch (compAlgo) {
+ case MVK_CONFIG_COMPRESSION_ALGORITHM_NONE: return NSDataCompressionAlgorithmLZFSE;
+ case MVK_CONFIG_COMPRESSION_ALGORITHM_LZFSE: return NSDataCompressionAlgorithmLZFSE;
+ case MVK_CONFIG_COMPRESSION_ALGORITHM_LZ4: return NSDataCompressionAlgorithmLZ4;
+ case MVK_CONFIG_COMPRESSION_ALGORITHM_LZMA: return NSDataCompressionAlgorithmLZMA;
+ case MVK_CONFIG_COMPRESSION_ALGORITHM_ZLIB: return NSDataCompressionAlgorithmZlib;
+ default: return NSDataCompressionAlgorithmLZFSE;
+ }
+}
+
+// Only copy into the dstBytes if it can fit, otherwise the data will be corrupted
+static size_t mvkCompressDecompress(const uint8_t* srcBytes, size_t srcSize,
+ uint8_t* dstBytes, size_t dstSize,
+ MVKConfigCompressionAlgorithm compAlgo,
+ bool isCompressing) {
+ size_t dstByteCount = 0;
+ if (compAlgo != MVK_CONFIG_COMPRESSION_ALGORITHM_NONE) {
+ @autoreleasepool {
+ NSDataCompressionAlgorithm sysCompAlgo = getSystemCompressionAlgo(compAlgo);
+ NSData* srcData = [NSData dataWithBytesNoCopy: (void*)srcBytes length: srcSize freeWhenDone: NO];
+
+ NSError* err = nil;
+ NSData* dstData = (isCompressing
+ ? [srcData compressedDataUsingAlgorithm: sysCompAlgo error: &err]
+ : [srcData decompressedDataUsingAlgorithm: sysCompAlgo error: &err]);
+ if ( !err ) {
+ size_t dataLen = dstData.length;
+ if (dstSize >= dataLen) {
+ [dstData getBytes: dstBytes length: dstSize];
+ dstByteCount = dataLen;
+ }
+ } else {
+ MVKBaseObject::reportError(nullptr, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "Could not %scompress data (Error code %li):\n%s",
+ (isCompressing ? "" : "de"),
+ (long)err.code, err.localizedDescription.UTF8String);
+ }
+ }
+ } else if (dstSize >= srcSize) {
+ mvkCopy(dstBytes, srcBytes, srcSize);
+ dstByteCount = srcSize;
+ }
+ return dstByteCount;
+}
+
+size_t mvkCompress(const uint8_t* srcBytes, size_t srcSize,
+ uint8_t* dstBytes, size_t dstSize,
+ MVKConfigCompressionAlgorithm compAlgo) {
+
+ return mvkCompressDecompress(srcBytes, srcSize, dstBytes, dstSize, compAlgo, true);
+}
+
+size_t mvkDecompress(const uint8_t* srcBytes, size_t srcSize,
+ uint8_t* dstBytes, size_t dstSize,
+ MVKConfigCompressionAlgorithm compAlgo) {
+
+ return mvkCompressDecompress(srcBytes, srcSize, dstBytes, dstSize, compAlgo, false);
+}
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
index 9861a35..1381c61 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
@@ -39,7 +39,7 @@
MVK_SET_FROM_ENV_OR_BUILD_INT64 (evCfg.metalCompileTimeout, MVK_CONFIG_METAL_COMPILE_TIMEOUT);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.performanceTracking, MVK_CONFIG_PERFORMANCE_TRACKING);
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.performanceLoggingFrameCount, MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT);
- MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.logActivityPerformanceInline, MVK_CONFIG_PERFORMANCE_LOGGING_INLINE);
+ MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.activityPerformanceLoggingStyle, MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.displayWatermark, MVK_CONFIG_DISPLAY_WATERMARK);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.specializedQueueFamilies, MVK_CONFIG_SPECIALIZED_QUEUE_FAMILIES);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.switchSystemGPU, MVK_CONFIG_SWITCH_SYSTEM_GPU);
@@ -62,6 +62,7 @@
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.advertiseExtensions, MVK_CONFIG_ADVERTISE_EXTENSIONS);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.resumeLostDevice, MVK_CONFIG_RESUME_LOST_DEVICE);
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.useMetalArgumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
+ MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.shaderSourceCompressionAlgorithm, MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM);
// Deprected legacy VkSemaphore MVK_ALLOW_METAL_FENCES and MVK_ALLOW_METAL_EVENTS config.
// Legacy MVK_ALLOW_METAL_EVENTS is covered by MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE,
@@ -75,6 +76,17 @@
evCfg.semaphoreUseMTLEvent = (MVKVkSemaphoreSupportStyle)false; // Disabled. Also semaphoreSupportStyle MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE.
}
+ // Deprecated legacy env var MVK_CONFIG_PERFORMANCE_LOGGING_INLINE config. If legacy
+ // MVK_CONFIG_PERFORMANCE_LOGGING_INLINE env var was used, and activityPerformanceLoggingStyle
+ // was not already set by MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE, set
+ // activityPerformanceLoggingStyle to MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE.
+ bool logPerfInline;
+ MVK_SET_FROM_ENV_OR_BUILD_BOOL(logPerfInline, MVK_CONFIG_PERFORMANCE_LOGGING_INLINE);
+ if (logPerfInline && evCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) {
+ evCfg.activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE;
+ }
+
+
mvkSetConfig(evCfg);
}
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
index 16833d0..cec1740 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@@ -151,8 +151,11 @@
# define MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT 0
#endif
-/** Log activity performance every time an activity occurs. Disabled by default. */
-# ifndef MVK_CONFIG_PERFORMANCE_LOGGING_INLINE
+/** Activity performance logging style. Default is to log after a configured number of frames. */
+# ifndef MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE
+# define MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT
+# endif
+# ifndef MVK_CONFIG_PERFORMANCE_LOGGING_INLINE // Deprecated
# define MVK_CONFIG_PERFORMANCE_LOGGING_INLINE 0
# endif
@@ -286,3 +289,8 @@
#ifndef MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS
# define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER
#endif
+
+/** Compress MSL shader source code in a pipeline cache. Defaults to no compression. */
+#ifndef MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM
+# define MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM MVK_CONFIG_COMPRESSION_ALGORITHM_NONE
+#endif
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
index 34c02b2..765e110 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
@@ -279,7 +279,7 @@
bool hasSPIRV() { return !_spirv.empty(); }
/**
- * Converts SPIR-V code, set using setSPIRV() to MSL code, which can be retrieved using getMSL().
+ * Converts SPIR-V code, set using setSPIRV() to MSL code.
*
* The boolean flags indicate whether the original SPIR-V code, the resulting MSL code,
* and optionally, the original GLSL (as converted from the SPIR_V), should be logged
diff --git a/Scripts/runcts b/Scripts/runcts
index 4cc33e7..2349e75 100755
--- a/Scripts/runcts
+++ b/Scripts/runcts
@@ -94,22 +94,24 @@
export MVK_CONFIG_ADVERTISE_EXTENSIONS=0xA
fi
+# ----- Metal validation settings ------
export METAL_DEVICE_WRAPPER_TYPE=1
export METAL_ERROR_MODE=3
export METAL_DEBUG_ERROR_MODE=3
# ----- MoltenVK config settings ------
-export MVK_CONFIG_LOG_LEVEL=1
+export MVK_CONFIG_LOG_LEVEL=1 #(1 = Errors only, 3 = Info)
export MVK_DEBUG=0
-# Additional MoltenVK configuration can be set here by
-# editing below, or can be set before calling this script.
+# Additional MoltenVK configuration can be set here by editing below.
export MVK_CONFIG_RESUME_LOST_DEVICE=1
export MVK_CONFIG_FAST_MATH_ENABLED=1
-export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 #(2 = VK_EXT_descriptor_indexing enabled)
export MVK_CONFIG_FORCE_LOW_POWER_GPU=0
-export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2 #(2 = MTLEvents always)
-
+export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 #(2 = VK_EXT_descriptor_indexing enabled)
+export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2 #(2 = MTLEvents always)
+export MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM=0 #(2 = ZLIB, 3 = LZ4)
+export MVK_CONFIG_PERFORMANCE_TRACKING=0
+export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=2 #(2 = Device lifetime)
# -------------- Operation --------------------