Merge pull request #55 from billhollings/master

Version 1.0.0.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index e1d6748..c44c485 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -13,10 +13,10 @@
 
 
 
-MoltenVK 0.20.0
+MoltenVK 1.0.0
 ---------------
 
-Released 2017/11/17
+Released 2018/02/26
 
 #### Initial open-source release!
 
diff --git a/External/SPIRV-Cross b/External/SPIRV-Cross
index 3925fe8..50ef6cd 160000
--- a/External/SPIRV-Cross
+++ b/External/SPIRV-Cross
@@ -1 +1 @@
-Subproject commit 3925fe88e91e32747db84c2bd3caa5e1f3ec70c7
+Subproject commit 50ef6cd95fa2d22f31dc8cb5e22a7cf1094c017e
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 0e5a804..234a51d 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -46,8 +46,8 @@
  *   - 030104    (version 3.1.4)
  *   - 401215    (version 4.12.15)
  */
-#define MVK_VERSION_MAJOR   0
-#define MVK_VERSION_MINOR   20
+#define MVK_VERSION_MAJOR   1
+#define MVK_VERSION_MINOR   0
 #define MVK_VERSION_PATCH   0
 
 #define MVK_MAKE_VERSION(major, minor, patch)    (((major) * 10000) + ((minor) * 100) + (patch))
@@ -62,7 +62,8 @@
     VkBool32 debugMode;                     /**< If enabled, several debugging capabilities will be enabled. Shader code will be logged during Runtime Shader Conversion. Adjusts settings that might trigger Metal validation but are otherwise acceptable to Metal runtime. Improves support for Xcode GPU Frame Capture. Default value is determined at build time by the presence of a DEBUG build setting. By default the DEBUG build setting is defined when MoltenVK is compiled in Debug mode, and not defined when compiled in Release mode. */
     VkBool32 shaderConversionFlipVertexY;   /**< If enabled, MSL vertex shader code created during Runtime Shader Conversion will flip the Y-axis of each vertex, as Vulkan coordinate system is inverse of OpenGL. Default is true. */
     VkBool32 supportLargeQueryPools;        /**< Metal allows only 8192 occlusion queries per MTLBuffer. If enabled, MoltenVK allocates a MTLBuffer for each query pool, allowing each query pool to support 8192 queries, which may slow performance or cause unexpected behaviour if the query pool is not established prior to a Metal renderpass, or if the query pool is changed within a Metal renderpass. If disabled, one MTLBuffer will be shared by all query pools, which improves performance, but limits the total device queries to 8192. Default is false. */
-    VkBool32 displayWatermark;              /**< If enabled, a MoltenVK logo watermark will be rendered on top of the scene. This can be enabled for publicity during demos. Default value is determined at build time by the presence of a MVK_WATERMARK build setting. By default the MVK_WATERMARK build setting is defined when MoltenVK is compiled in Debug mode, and not defined when compiled in Release mode. */
+	VkBool32 presentWithCommandBuffer;      /**< If enabled, each surface presentation is scheduled using a command buffer. Enabling this may improve rendering frame synchronization, but may result in reduced frame rates. Default value is true if the MVK_PRESENT_WITH_COMMAND_BUFFER build setting is defined when MoltenVK is compiled, and false otherwise. By default the MVK_PRESENT_WITH_COMMAND_BUFFER build setting is not defined. */
+    VkBool32 displayWatermark;              /**< If enabled, a MoltenVK logo watermark will be rendered on top of the scene. This can be enabled for publicity during demos. Default value is true if the MVK_DISPLAY_WATERMARK build setting is defined when MoltenVK is compiled, and false otherwise. By default the MVK_DISPLAY_WATERMARK build setting is not defined. */
     VkBool32 performanceTracking;           /**< If enabled, per-frame performance statistics are tracked, optionally logged, and can be retrieved via the vkGetSwapchainPerformanceMVK() function, and various shader compilation performance statistics are tracked, logged, and can be retrieved via the vkGetShaderCompilationPerformanceMVK() function. Default is false. */
     uint32_t performanceLoggingFrameCount;  /**< If non-zero, performance statistics will be periodically logged to the console, on a repeating cycle of this many frames per swapchain. The performanceTracking capability must also be enabled. Default is zero, indicating no logging. */
 } MVKDeviceConfiguration;
@@ -88,17 +89,17 @@
 
 /** MoltenVK swapchain performance statistics. */
 typedef struct {
-    double lastFrameInterval;           /**< The time interval between this frame and the immediately previous frame, in seconds. */
-    double averageFrameInterval;        /**< The rolling average time interval between frames, in seconds. This value has less volatility than the lastFrameInterval value. The inverse of this value is the rolling average frames per second. */
-    double averageFramesPerSecond;      /**< The rolling average number of frames per second. This is simply the inverse of the averageFrameInterval value. */
+    double lastFrameInterval;           /**< The time interval between this frame and the immediately previous frame, in milliseconds. */
+    double averageFrameInterval;        /**< The rolling average time interval between frames, in miliseconds. This value has less volatility than the lastFrameInterval value. */
+    double averageFramesPerSecond;      /**< The rolling average number of frames per second. This is simply the 1000 divided by the averageFrameInterval value. */
 } MVKSwapchainPerformance;
 
 /** MoltenVK performance of a particular type of shader compilation event. */
 typedef struct {
     uint32_t count;             /**< The number of compilation events of this type. */
-    double averageInterval;     /**< The average time interval consumed by the compilation event, in seconds. */
-    double minimumInterval;     /**< The minimum time interval consumed by the compilation event, in seconds. */
-    double maximumInterval;     /**< The maximum time interval consumed by the compilation event, in seconds. */
+    double averageDuration;     /**< The average duration of the compilation event, in milliseconds. */
+    double minimumDuration;     /**< The minimum duration of the compilation event, in milliseconds. */
+    double maximumDuration;     /**< The maximum duration of the compilation event, in milliseconds. */
 } MVKShaderCompilationEventPerformance;
 
 /** MoltenVK performance of shader compilation events for a VkDevice. */
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
index 09019c4..285394f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
@@ -216,14 +216,14 @@
 }
 
 id<MTLFunction> MVKCommandResourceFactory::getFunctionNamed(const char* funcName) {
-    NSTimeInterval startTime = _device->getPerformanceTimestamp();
+    uint64_t startTime = _device->getPerformanceTimestamp();
     id<MTLFunction> mtlFunc = [[_mtlLibrary newFunctionWithName: @(funcName)] autorelease];
     _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.functionRetrieval, startTime);
     return mtlFunc;
 }
 
 id<MTLRenderPipelineState> MVKCommandResourceFactory::newMTLRenderPipelineState(MTLRenderPipelineDescriptor* plDesc) {
-    NSTimeInterval startTime = _device->getPerformanceTimestamp();
+    uint64_t startTime = _device->getPerformanceTimestamp();
     NSError* err = nil;
     id<MTLRenderPipelineState> rps = [getMTLDevice() newRenderPipelineStateWithDescriptor: plDesc error: &err];    // retained
     MVKAssert( !err, "Could not create %s pipeline state: %s (code %li) %s", plDesc.label.UTF8String, err.localizedDescription.UTF8String, (long)err.code, err.localizedFailureReason.UTF8String);
@@ -239,7 +239,7 @@
 
 /** Initializes the Metal shaders used for command activity. */
 void MVKCommandResourceFactory::initMTLLibrary() {
-    NSTimeInterval startTime = _device->getPerformanceTimestamp();
+    uint64_t startTime = _device->getPerformanceTimestamp();
     @autoreleasepool {
         MTLCompileOptions* shdrOpts = [[MTLCompileOptions new] autorelease];
         NSError* err = nil;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index efb2c3e..4376f39 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -389,24 +389,31 @@
                             MVKCommandUse cmdUse);
 
     /**
-     * If performance is being tracked, returns a marker indicating the current system time,
-     * otherwise returns zero.
-     *
-     * This marker is not guaranteed to be a meaningful value, but the difference between
-     * two calls to this function will indicate a time interval value measure in seconds.
+	 * If performance is being tracked, returns a monotonic timestamp value for use performance timestamping.
+	 *
+	 * The returned value corresponds to the number of CPU "ticks" since the app was initialized.
+	 *
+	 * Calling this value twice, subtracting the first value from the second, and then multiplying
+	 * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the
+	 * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds()
+	 * can be used to perform this calculation.
      */
-    inline NSTimeInterval getPerformanceTimestamp() {
-        return _mvkConfig.performanceTracking ? [NSDate timeIntervalSinceReferenceDate] : 0.0;
-    }
+    inline uint64_t getPerformanceTimestamp() {
+		return _mvkConfig.performanceTracking ? getPerformanceTimestampImpl() : 0;
+	}
 
     /**
      * If performance is being tracked, adds a shader compilation event with a duration
      * interval between the start and end times, to the given performance statistics.
      *
-     * If endTime is zero, the current time is used.
+     * If endTime is zero or not supplied, the current time is used.
      */
-    void addShaderCompilationEventPerformance(MVKShaderCompilationEventPerformance& shaderCompilationEvent,
-                                              NSTimeInterval startTime, NSTimeInterval endTime = 0.0);
+    inline void addShaderCompilationEventPerformance(MVKShaderCompilationEventPerformance& shaderCompilationEvent,
+													 uint64_t startTime, uint64_t endTime = 0) {
+		if (_mvkConfig.performanceTracking) {
+			addShaderCompilationEventPerformanceImpl(shaderCompilationEvent, startTime, endTime);
+		}
+	};
 
     /** Populates the specified statistics structure from the current shader performance statistics. */
     void getShaderCompilationPerformanceStatistics(MVKShaderCompilationPerformance* pShaderCompPerf);
@@ -503,6 +510,9 @@
 	MVKResource* removeResource(MVKResource* rez);
     void initPerformanceTracking();
     const char* getShaderCompilationEventName(MVKShaderCompilationEventPerformance& shaderCompilationEvent);
+	uint64_t getPerformanceTimestampImpl();
+	void addShaderCompilationEventPerformanceImpl(MVKShaderCompilationEventPerformance& shaderCompilationEvent,
+												  uint64_t startTime, uint64_t endTime);
 
 	MVKPhysicalDevice* _physicalDevice;
     MVKCommandResourceFactory* _commandResourceFactory;
@@ -573,27 +583,3 @@
 };
 
 
-#pragma mark -
-#pragma mark Functions
-
-/** 
- * Returns a monotonic timestamp value for use in Vulkan timestamping.
- *
- * The returned value corresponds to the number of CPU "ticks" since the app was initialized.
- *
- * Calling this value twice, subtracting the first value from the second, and then multiplying
- * the result by the VkPhysicalDeviceProperties.VkPhysicalDeviceLimits.timestampPeriod value 
- * will provide an indication of the number of nanoseconds between the two calls.
- */
-uint64_t mvkGetTimestamp();
-
-/** 
- * Returns the number of milliseconds since the app was initialized.
- *
- * This is a convenience function for tracking the time required to perform operations.
- * Accuracy may be improved by using the mvkGetTimestamp() function and following the
- * method provided in the notes for that function.
- */
-double mvkGetElapsedMilliseconds();
-
-
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 0587d3a..77dd83d 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -35,7 +35,6 @@
 #include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h>
 #include "mvk_datatypes.h"
 #include "vk_mvk_moltenvk.h"
-#include <mach/mach_time.h>
 
 using namespace std;
 
@@ -49,15 +48,19 @@
 #	define MVKViewClass		NSView
 #endif
 
-// To display the MoltenVK logo watermark by default, define the MVK_WATERMARK build setting.
-#ifdef MVK_WATERMARK
-#   define MVK_DISPLAY_WATERMARK    1
+// To present surface using a command buffer, define the MVK_PRESENT_WITH_COMMAND_BUFFER build setting.
+#ifdef MVK_PRESENT_WITH_COMMAND_BUFFER
+#   define MVK_PRESENT_WITH_COMMAND_BUFFER_BOOL    1
 #else
-#   define MVK_DISPLAY_WATERMARK    0
+#   define MVK_PRESENT_WITH_COMMAND_BUFFER_BOOL    0
 #endif
 
-static uint64_t _mvkTimestampBase;
-static double _mvkTimestampPeriod;
+// To display the MoltenVK logo watermark by default, define the MVK_DISPLAY_WATERMARK build setting.
+#ifdef MVK_DISPLAY_WATERMARK
+#   define MVK_DISPLAY_WATERMARK_BOOL    1
+#else
+#   define MVK_DISPLAY_WATERMARK_BOOL    0
+#endif
 
 
 #pragma mark -
@@ -580,7 +583,7 @@
     _properties.limits.optimalBufferCopyRowPitchAlignment = 1;
 
 	_properties.limits.timestampComputeAndGraphics = VK_TRUE;
-	_properties.limits.timestampPeriod = _mvkTimestampPeriod;
+	_properties.limits.timestampPeriod = mvkGetTimestampPeriod();
 
     _properties.limits.pointSizeRange[0] = 1;
     _properties.limits.pointSizeRange[1] = 511;
@@ -1241,26 +1244,24 @@
 	}
 }
 
-#define asMS(V)     ((V) * 1000.0)
-void MVKDevice::addShaderCompilationEventPerformance(MVKShaderCompilationEventPerformance& shaderCompilationEvent,
-                                                     NSTimeInterval startTime, NSTimeInterval endTime) {
+uint64_t MVKDevice::getPerformanceTimestampImpl() { return mvkGetTimestamp(); }
 
-    if ( !_mvkConfig.performanceTracking ) { return; }
-
+void MVKDevice::addShaderCompilationEventPerformanceImpl(MVKShaderCompilationEventPerformance& shaderCompilationEvent,
+														 uint64_t startTime, uint64_t endTime) {
     lock_guard<mutex> lock(_shaderCompPerfLock);
 
-    NSTimeInterval currInterval = (endTime ? endTime : getPerformanceTimestamp()) - startTime;
-    shaderCompilationEvent.minimumInterval = min(currInterval, shaderCompilationEvent.minimumInterval);
-    shaderCompilationEvent.maximumInterval = max(currInterval, shaderCompilationEvent.maximumInterval);
-    double totalInverval = (shaderCompilationEvent.averageInterval * shaderCompilationEvent.count++) + currInterval;
-    shaderCompilationEvent.averageInterval = totalInverval / shaderCompilationEvent.count;
+	double currInterval = mvkGetElapsedMilliseconds(startTime, endTime);
+    shaderCompilationEvent.minimumDuration = min(currInterval, shaderCompilationEvent.minimumDuration);
+    shaderCompilationEvent.maximumDuration = max(currInterval, shaderCompilationEvent.maximumDuration);
+    double totalInverval = (shaderCompilationEvent.averageDuration * shaderCompilationEvent.count++) + currInterval;
+    shaderCompilationEvent.averageDuration = totalInverval / shaderCompilationEvent.count;
 
     MVKLogInfo("%s performance curr: %.3f ms, avg: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d",
                getShaderCompilationEventName(shaderCompilationEvent),
-               asMS(currInterval),
-               asMS(shaderCompilationEvent.averageInterval),
-               asMS(shaderCompilationEvent.minimumInterval),
-               asMS(shaderCompilationEvent.maximumInterval),
+               currInterval,
+               shaderCompilationEvent.averageDuration,
+               shaderCompilationEvent.minimumDuration,
+               shaderCompilationEvent.maximumDuration,
                shaderCompilationEvent.count);
 }
 
@@ -1342,7 +1343,8 @@
     pCfg->debugMode = MVK_DEBUG;
     pCfg->supportLargeQueryPools = false;
     pCfg->shaderConversionFlipVertexY = true;
-    pCfg->displayWatermark = MVK_DISPLAY_WATERMARK;
+	pCfg->presentWithCommandBuffer = MVK_PRESENT_WITH_COMMAND_BUFFER_BOOL;
+    pCfg->displayWatermark = MVK_DISPLAY_WATERMARK_BOOL;
     pCfg->performanceTracking = MVK_DEBUG;
     pCfg->performanceLoggingFrameCount = MVK_DEBUG ? 300 : 0;
 
@@ -1386,9 +1388,9 @@
 void MVKDevice::initPerformanceTracking() {
     MVKShaderCompilationEventPerformance initPerf;
     initPerf.count = 0;
-    initPerf.averageInterval = 0.0;
-    initPerf.minimumInterval = numeric_limits<double>::max();
-    initPerf.maximumInterval = 0.0;
+    initPerf.averageDuration = 0.0;
+    initPerf.minimumDuration = numeric_limits<double>::max();
+    initPerf.maximumDuration = 0.0;
 
     _shaderCompilationPerformance.spirvToMSL = initPerf;
     _shaderCompilationPerformance.mslCompile = initPerf;
@@ -1405,34 +1407,3 @@
 }
 
 
-#pragma mark -
-#pragma mark Functions
-
-/** Initializes the timestamping functionality. */
-static void initTimestamps() {
-	_mvkTimestampBase = mach_absolute_time();
-	mach_timebase_info_data_t timebase;
-	mach_timebase_info(&timebase);
-	_mvkTimestampPeriod = (double)timebase.numer / (double)timebase.denom;
-//	MVKLogDebug("Initializing MoltenVK timestamping. Mach time: %llu. Time period: %d / %d = %.6f.", _mvkTimestampBase, timebase.numer, timebase.denom, _mvkTimestampPeriod);
-}
-
-uint64_t mvkGetTimestamp() { return mach_absolute_time() - _mvkTimestampBase; }
-
-double mvkGetElapsedMilliseconds() { return (double)mvkGetTimestamp() * _mvkTimestampPeriod / 1e6; }
-
-
-#pragma mark Library initialization
-
-/**
- * Called automatically when the framework is loaded and initialized.
- *
- * Initialize various device content.
- */
-static bool _mvkDevicesInitialized = false;
-__attribute__((constructor)) static void MVKInitDataTypes() {
-	if (_mvkDevicesInitialized ) { return; }
-	initTimestamps();
-	_mvkDevicesInitialized = true;
-}
-
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 6826b12..dc3740d 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -239,7 +239,7 @@
 
         MTLRenderPipelineDescriptor* plDesc = getMTLRenderPipelineDescriptor(pCreateInfo);
         if (plDesc) {
-            NSTimeInterval startTime = _device->getPerformanceTimestamp();
+            uint64_t startTime = _device->getPerformanceTimestamp();
             NSError* psError = nil;
             _mtlPipelineState = [getMTLDevice() newRenderPipelineStateWithDescriptor: plDesc error: &psError];  // retained
             if (psError) {
@@ -411,7 +411,7 @@
         _mtlPipelineState = nil;
 
         NSError* psError = nil;
-        NSTimeInterval startTime = _device->getPerformanceTimestamp();
+        uint64_t startTime = _device->getPerformanceTimestamp();
         _mtlPipelineState = [getMTLDevice() newComputePipelineStateWithFunction: shaderFunc.mtlFunction error: &psError];  // retained
         if (psError) {
             setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_INITIALIZATION_FAILED, "Could not create compute pipeline:\n%s.", psError.description.UTF8String));
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
index 73f3278..6e40c1c 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
@@ -19,6 +19,7 @@
 #include "MVKQueryPool.h"
 #include "MVKBuffer.h"
 #include "MVKCommandBuffer.h"
+#include "MVKOSExtensions.h"
 #include "MVKFoundation.h"
 #include "MVKLogging.h"
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index 0563520..a73c207 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -159,35 +159,6 @@
 
 
 #pragma mark -
-#pragma mark MVKQueueSubmission
-
-/** This is an abstract class for an operation that can be submitted to an MVKQueue. */
-class MVKQueueSubmission : public MVKBaseDeviceObject {
-
-public:
-
-	/** 
-	 * Executes this action on the queue and then disposes of this instance.
-	 *
-	 * Upon completion of this function, no further calls should be made to this instance.
-	 */
-	virtual void execute() = 0;
-
-	MVKQueueSubmission(MVKDevice* device, MVKQueue* queue);
-
-protected:
-	friend class MVKQueue;
-
-   void recordResult(VkResult vkResult);
-
-	MVKQueue* _queue;
-	MVKQueueSubmission* _prev;
-	MVKQueueSubmission* _next;
-	VkResult _submissionResult;
-};
-
-
-#pragma mark -
 #pragma mark MVKQueueCommandBufferSubmissionCountdown
 
 /** Counts down MTLCommandBuffers on behalf of an MVKQueueCommandBufferSubmission instance. */
@@ -208,6 +179,40 @@
 
 
 #pragma mark -
+#pragma mark MVKQueueSubmission
+
+/** This is an abstract class for an operation that can be submitted to an MVKQueue. */
+class MVKQueueSubmission : public MVKBaseDeviceObject {
+
+public:
+
+	/** 
+	 * Executes this action on the queue and then disposes of this instance.
+	 *
+	 * Upon completion of this function, no further calls should be made to this instance.
+	 */
+	virtual void execute() = 0;
+
+	MVKQueueSubmission(MVKDevice* device,
+					   MVKQueue* queue,
+					   uint32_t waitSemaphoreCount,
+					   const VkSemaphore* pWaitSemaphores);
+
+protected:
+	friend class MVKQueue;
+
+   void recordResult(VkResult vkResult);
+
+	MVKQueue* _queue;
+	MVKQueueSubmission* _prev;
+	MVKQueueSubmission* _next;
+	VkResult _submissionResult;
+	std::vector<MVKSemaphore*> _waitSemaphores;
+	bool _isAwaitingSemaphores;
+};
+
+
+#pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission
 
 /** Submits the commands in a set of command buffers to the queue. */
@@ -251,7 +256,6 @@
 
 	MVKQueueCommandBufferSubmissionCountdown _cmdBuffCountdown;
 	std::vector<MVKCommandBuffer*> _cmdBuffers;
-	std::vector<MVKSemaphore*> _waitSemaphores;
 	std::vector<MVKSemaphore*> _signalSemaphores;
 	MVKFence* _fence;
     MVKCommandUse _cmdBuffUse;
@@ -280,7 +284,6 @@
 									 const VkPresentInfoKHR* pPresentInfo);
 
 protected:
-	std::vector<MVKSemaphore*> _waitSemaphores;
 	std::vector<MVKSwapchainImage*> _surfaceImages;
 };
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index ad43843..c1b4f5f 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -20,6 +20,7 @@
 #include "MVKSwapchain.h"
 #include "MVKSync.h"
 #include "MVKFoundation.h"
+#include "MVKOSExtensions.h"
 #include "MVKLogging.h"
 
 using namespace std;
@@ -231,21 +232,6 @@
 
 
 #pragma mark -
-#pragma mark MVKQueueSubmission
-
-MVKQueueSubmission::MVKQueueSubmission(MVKDevice* device, MVKQueue* queue) : MVKBaseDeviceObject(device) {
-	_queue = queue;
-	_prev = VK_NULL_HANDLE;
-	_next = VK_NULL_HANDLE;
-	_submissionResult = VK_SUCCESS;
-}
-
-void MVKQueueSubmission::recordResult(VkResult vkResult) {
-    if (_submissionResult == VK_SUCCESS) { _submissionResult = vkResult; }
-}
-
-
-#pragma mark -
 #pragma mark MVKQueueCommandBufferSubmissionCountdown
 
 MVKQueueCommandBufferSubmissionCountdown::MVKQueueCommandBufferSubmissionCountdown(MVKQueueCommandBufferSubmission* qSub) {
@@ -256,13 +242,34 @@
 
 
 #pragma mark -
+#pragma mark MVKQueueSubmission
+
+MVKQueueSubmission::MVKQueueSubmission(MVKDevice* device,
+									   MVKQueue* queue,
+									   uint32_t waitSemaphoreCount,
+									   const VkSemaphore* pWaitSemaphores) : MVKBaseDeviceObject(device) {
+	_queue = queue;
+	_prev = VK_NULL_HANDLE;
+	_next = VK_NULL_HANDLE;
+	_submissionResult = VK_SUCCESS;
+
+	_isAwaitingSemaphores = waitSemaphoreCount > 0;
+	_waitSemaphores.reserve(waitSemaphoreCount);
+	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
+		_waitSemaphores.push_back((MVKSemaphore*)pWaitSemaphores[i]);
+	}
+}
+
+void MVKQueueSubmission::recordResult(VkResult vkResult) {
+    if (_submissionResult == VK_SUCCESS) { _submissionResult = vkResult; }
+}
+
+
+#pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission
 
 void MVKQueueCommandBufferSubmission::execute() {
 
-    // Wait on each wait semaphore in turn. It doesn't matter which order they are signalled.
-    for (auto& ws : _waitSemaphores) { ws->wait(); }
-
     // Execute each command buffer, or if no command buffers, but a fence or semaphores,
     // create an empty MTLCommandBuffer to trigger the semaphores and fence.
     if ( !_cmdBuffers.empty() ) {
@@ -292,6 +299,15 @@
 }
 
 void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer() {
+
+	// Wait on each wait semaphore in turn. It doesn't matter which order they are signalled.
+	// We have delayed this as long as possible to allow as much filling of the MTLCommandBuffer
+	// as possible before forcing a wait. We only wait for each semaphore once per submission.
+	if (_isAwaitingSemaphores) {
+		_isAwaitingSemaphores = false;
+		for (auto& ws : _waitSemaphores) { ws->wait(); }
+	}
+
 	[_activeMTLCommandBuffer commit];
 	_activeMTLCommandBuffer = nil;			// not retained
 }
@@ -322,7 +338,10 @@
 																 const VkSubmitInfo* pSubmit,
                                                                  VkFence fence,
                                                                  MVKCommandUse cmdBuffUse)
-        : MVKQueueSubmission(device, queue), _cmdBuffCountdown(this) {
+        : MVKQueueSubmission(device,
+							 queue,
+							 (pSubmit ? pSubmit->waitSemaphoreCount : 0),
+							 (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), _cmdBuffCountdown(this) {
 
     // pSubmit can be null if just tracking the fence alone
     if (pSubmit) {
@@ -334,12 +353,6 @@
             recordResult(cb->getRecordingResult());
         }
 
-        uint32_t wsCnt = pSubmit->waitSemaphoreCount;
-        _waitSemaphores.reserve(wsCnt);
-        for (uint32_t i = 0; i < wsCnt; i++) {
-            _waitSemaphores.push_back((MVKSemaphore*)pSubmit->pWaitSemaphores[i]);
-        }
-
         uint32_t ssCnt = pSubmit->signalSemaphoreCount;
         _signalSemaphores.reserve(ssCnt);
         for (uint32_t i = 0; i < ssCnt; i++) {
@@ -359,22 +372,25 @@
 #define MVK_PRESENT_VIA_CMD_BUFFER		0
 
 void MVKQueuePresentSurfaceSubmission::execute() {
-
-    // Wait on each of the wait semaphores in turn. It doesn't matter which order they are signalled.
-    for (auto& ws : _waitSemaphores) { ws->wait(); }
-
     id<MTLCommandQueue> mtlQ = _queue->getMTLCommandQueue();
-	id<MTLCommandBuffer> mtlCmdBuff = nil;
 
-	if (_device->_mvkConfig.displayWatermark || MVK_PRESENT_VIA_CMD_BUFFER) {
-		mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
+	if (_device->_mvkConfig.presentWithCommandBuffer || _device->_mvkConfig.displayWatermark) {
+		// Create a command buffer, present surfaces via the command buffer,
+		// then wait on the semaphores before committing.
+		id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
 		mtlCmdBuff.label = mvkMTLCommandBufferLabel(kMVKCommandUseQueuePresent);
+		[mtlCmdBuff enqueue];
+
+		for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }
+		for (auto& ws : _waitSemaphores) { ws->wait(); }
+
+		[mtlCmdBuff commit];
+	} else {
+		// Wait on semaphores, then present directly.
+		for (auto& ws : _waitSemaphores) { ws->wait(); }
+		for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(nil); }
 	}
 
-    for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }
-
-	[mtlCmdBuff commit];
-
     // Let Xcode know the frame is done, in case command buffer is not used
     if (_device->_mvkConfig.debugMode) { [mtlQ insertDebugCaptureBoundary]; }
 
@@ -383,12 +399,11 @@
 
 MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKDevice* device,
 																   MVKQueue* queue,
-																   const VkPresentInfoKHR* pPresentInfo) : MVKQueueSubmission(device, queue) {
-	uint32_t wsCnt = pPresentInfo->waitSemaphoreCount;
-	_waitSemaphores.reserve(wsCnt);
-	for (uint32_t i = 0; i < wsCnt; i++) {
-		_waitSemaphores.push_back((MVKSemaphore*)pPresentInfo->pWaitSemaphores[i]);
-	}
+																   const VkPresentInfoKHR* pPresentInfo)
+		: MVKQueueSubmission(device,
+							 queue,
+							 pPresentInfo->waitSemaphoreCount,
+							 pPresentInfo->pWaitSemaphores) {
 
 	// Populate the array of swapchain images, testing each one for a change in surface size
 	_surfaceImages.reserve(pPresentInfo->swapchainCount);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
index e072415..fe2f522 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
@@ -48,7 +48,7 @@
     SPIRVEntryPoint& ep = _entryPoints[pShaderStage->pName];
     NSString* mtlFuncName = @(ep.mtlFunctionName.c_str());
 
-    NSTimeInterval startTime = _device->getPerformanceTimestamp();
+    uint64_t startTime = _device->getPerformanceTimestamp();
     id<MTLFunction> mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName] autorelease];
     _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.functionRetrieval, startTime);
 
@@ -59,7 +59,7 @@
         if (_device->_pMetalFeatures->shaderSpecialization) {
             NSArray<MTLFunctionConstant*>* mtlFCs = mtlFunc.functionConstantsDictionary.allValues;
             if (mtlFCs.count) {
-                NSTimeInterval startTimeSpec = _device->getPerformanceTimestamp();
+                uint64_t startTimeSpec = _device->getPerformanceTimestamp();
 
                 // The Metal shader contains function constants and expects to be specialized
                 // Populate the Metal function constant values from the Vulkan specialization info.
@@ -123,7 +123,7 @@
 }
 
 MVKShaderLibrary::MVKShaderLibrary(MVKDevice* device, SPIRVToMSLConverter& mslConverter) : MVKBaseDeviceObject(device) {
-    NSTimeInterval startTime = _device->getPerformanceTimestamp();
+    uint64_t startTime = _device->getPerformanceTimestamp();
     @autoreleasepool {
         MTLCompileOptions* options = [[MTLCompileOptions new] autorelease]; // TODO: what compile options apply?
         NSError* err = nil;
@@ -140,7 +140,7 @@
 MVKShaderLibrary::MVKShaderLibrary(MVKDevice* device,
                                    const void* mslCompiledCodeData,
                                    size_t mslCompiledCodeLength) : MVKBaseDeviceObject(device) {
-    NSTimeInterval startTime = _device->getPerformanceTimestamp();
+    uint64_t startTime = _device->getPerformanceTimestamp();
     @autoreleasepool {
         dispatch_data_t shdrData = dispatch_data_create(mslCompiledCodeData,
                                                         mslCompiledCodeLength,
@@ -213,7 +213,7 @@
     MVKShaderLibrary* shLib = nullptr;
     bool shouldLogCode = _device->_mvkConfig.debugMode;
 
-    NSTimeInterval startTime = _device->getPerformanceTimestamp();
+    uint64_t startTime = _device->getPerformanceTimestamp();
     bool wasConverted = _converter.convert(*pContext, shouldLogCode, shouldLogCode, shouldLogCode);
     _device->addShaderCompilationEventPerformance(_device->_shaderCompilationPerformance.spirvToMSL, startTime);
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
index c3f6efd..ceaa852 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
@@ -101,7 +101,7 @@
 	std::atomic<uint64_t> _currentAcquisitionID;
     CGSize _mtlLayerOrigDrawSize;
     MVKSwapchainPerformance _performanceStatistics;
-    double _lastFrameTime;
+    uint64_t _lastFrameTime;
     double _averageFrameIntervalFilterAlpha;
     uint32_t _currentPerfLogFrameCount;
 };
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
index 171c969..ef309f3 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@@ -147,21 +147,21 @@
 void MVKSwapchain::markFrameInterval() {
     if ( !(_device->_mvkConfig.performanceTracking || _licenseWatermark) ) { return; }
 
-    NSTimeInterval prevFrameTime = _lastFrameTime;
-    _lastFrameTime = [NSDate timeIntervalSinceReferenceDate];
-    _performanceStatistics.lastFrameInterval = _lastFrameTime - prevFrameTime;
+    uint64_t prevFrameTime = _lastFrameTime;
+    _lastFrameTime = mvkGetTimestamp();
+    _performanceStatistics.lastFrameInterval = mvkGetElapsedMilliseconds(prevFrameTime, _lastFrameTime);
 
     // Low pass filter.
     // y[i] := α * x[i] + (1-α) * y[i-1]  OR
     // y[i] := y[i-1] + α * (x[i] - y[i-1])
     _performanceStatistics.averageFrameInterval += _averageFrameIntervalFilterAlpha * (_performanceStatistics.lastFrameInterval - _performanceStatistics.averageFrameInterval);
-    _performanceStatistics.averageFramesPerSecond = 1.0 / _performanceStatistics.averageFrameInterval;
+    _performanceStatistics.averageFramesPerSecond = 1000.0 / _performanceStatistics.averageFrameInterval;
 
     uint32_t perfLogCntLimit = _device->_mvkConfig.performanceLoggingFrameCount;
     if (perfLogCntLimit > 0) {
         _currentPerfLogFrameCount++;
         if (_currentPerfLogFrameCount >= perfLogCntLimit) {
-            MVKLogInfo("Frame interval: %.3f. Avg frame interval: %.3f. FPS: %.3f.",
+            MVKLogInfo("Frame interval: %.2f ms. Avg frame interval: %.2f ms. FPS: %.2f.",
                        _performanceStatistics.lastFrameInterval,
                        _performanceStatistics.averageFrameInterval,
                        _performanceStatistics.averageFramesPerSecond);
@@ -254,7 +254,7 @@
     _performanceStatistics.averageFramesPerSecond = 0;
     _currentPerfLogFrameCount = 0;
 
-    _lastFrameTime = [NSDate timeIntervalSinceReferenceDate];
+	_lastFrameTime = mvkGetTimestamp();
 
     // Establish the alpha parameter of a low-pass filter for averaging frame intervals.
     double RC_over_dt = 10;
diff --git a/MoltenVK/MoltenVK/Utility/MVKOSExtensions.h b/MoltenVK/MoltenVK/Utility/MVKOSExtensions.h
index 2cd1d0e..f21cbe4 100644
--- a/MoltenVK/MoltenVK/Utility/MVKOSExtensions.h
+++ b/MoltenVK/MoltenVK/Utility/MVKOSExtensions.h
@@ -39,6 +39,29 @@
  */
 MVKOSVersion mvkOSVersion(void);
 
+/**
+ * Returns a monotonic timestamp value for use in Vulkan and performance timestamping.
+ *
+ * The returned value corresponds to the number of CPU "ticks" since the app was initialized.
+ *
+ * Calling this value twice, subtracting the first value from the second, and then multiplying
+ * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the
+ * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds()
+ * can be used to perform this calculation.
+ */
+uint64_t mvkGetTimestamp();
+
+/** Returns the number of nanoseconds between each increment of the value returned by mvkGetTimestamp(). */
+double mvkGetTimestampPeriod();
+
+/**
+ * Returns the number of milliseconds elapsed between startTimestamp and endTimestamp,
+ * each of which should be a value returned by mvkGetTimestamp().
+ * If endTimestamp is zero or not supplied, it is taken to be the current time.
+ * If startTimestamp is zero or not supplied, it is taken to be the time the app was initialized.
+ */
+double mvkGetElapsedMilliseconds(uint64_t startTimestamp = 0, uint64_t endTimestamp = 0);
+
 
 #pragma mark -
 #pragma mark MTLTextureDescriptor
diff --git a/MoltenVK/MoltenVK/Utility/MVKOSExtensions.mm b/MoltenVK/MoltenVK/Utility/MVKOSExtensions.mm
index d855860..719c031 100644
--- a/MoltenVK/MoltenVK/Utility/MVKOSExtensions.mm
+++ b/MoltenVK/MoltenVK/Utility/MVKOSExtensions.mm
@@ -23,6 +23,7 @@
 #include <vector>
 #include <mach/mach.h>
 #include <mach/mach_host.h>
+#include <mach/mach_time.h>
 #include <uuid/uuid.h>
 
 #if MVK_MACOS
@@ -50,6 +51,38 @@
     return _mvkOSVersion;
 }
 
+static uint64_t _mvkTimestampBase;
+static double _mvkTimestampPeriod;
+
+uint64_t mvkGetTimestamp() { return mach_absolute_time() - _mvkTimestampBase; }
+
+double mvkGetTimestampPeriod() { return _mvkTimestampPeriod; }
+
+double mvkGetElapsedMilliseconds(uint64_t startTimestamp, uint64_t endTimestamp) {
+	if (endTimestamp == 0) { endTimestamp = mvkGetTimestamp(); }
+	return (double)(endTimestamp - startTimestamp) * _mvkTimestampPeriod / 1e6;
+}
+
+
+#pragma mark Library initialization
+
+/**
+ * Initialize timestamping capabilities on app startup.
+ * Called automatically when the framework is loaded and initialized.
+ */
+static bool _mvkTimestampsInitialized = false;
+__attribute__((constructor)) static void MVKInitTimestamps() {
+	if (_mvkTimestampsInitialized ) { return; }
+	_mvkTimestampsInitialized = true;
+
+	_mvkTimestampBase = mach_absolute_time();
+	mach_timebase_info_data_t timebase;
+	mach_timebase_info(&timebase);
+	_mvkTimestampPeriod = (double)timebase.numer / (double)timebase.denom;
+	MVKLogDebug("Initializing MoltenVK timestamping. Mach time: %llu. Time period: %d / %d = %.6f.", _mvkTimestampBase, timebase.numer, timebase.denom, _mvkTimestampPeriod);
+
+}
+
 
 #pragma mark -
 #pragma mark MTLTextureDescriptor
diff --git a/MoltenVK/MoltenVK/Utility/MVKWatermark.h b/MoltenVK/MoltenVK/Utility/MVKWatermark.h
index 6bb9da9..03ec750 100644
--- a/MoltenVK/MoltenVK/Utility/MVKWatermark.h
+++ b/MoltenVK/MoltenVK/Utility/MVKWatermark.h
@@ -166,6 +166,5 @@
     float _maxPosition;
     MVKWatermarkPosition _positionVelocity;
     MVKWatermarkPositionMode _positionMode;
-    NSTimeInterval _lastRenderTime;
 };
 
diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
index 22020b1..bb66511 100644
--- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
@@ -28,19 +28,6 @@
 using namespace std;
 
 
-// Pixel normalization divisors
-#define kMax1	0x1				// = (2^1 - 1) = 1
-#define kMax2	0x3				// = (2^2 - 1) = 3
-#define kMax4	0xF				// = (2^4 - 1) = 15
-#define kMax5	0x1F			// = (2^5 - 1) = 31
-#define kMax6	0x3F			// = (2^6 - 1) = 63
-#define kMax8	0xFF			// = (2^8 - 1) = 255
-#define kMax10	0x3FF			// = (2^10 - 1) = 1023
-#define kMax16	0xFFFF			// = (2^16 - 1) = 32767
-#define kMax24	0xFFFFFF		// = (2^24 - 1) = 16777215
-#define kMax32	0xFFFFFFFF		// = (2^32 - 1) = 4294967295
-
-
 #pragma mark -
 #pragma mark Image properties
 
@@ -583,8 +570,15 @@
 }
 
 MVK_PUBLIC_SYMBOL VkFormatProperties mvkVkFormatProperties(VkFormat vkFormat) {
-    const MVKFormatDesc& fmtDesc = formatDescForVkFormat(vkFormat);
-    return fmtDesc.isSupported() ? fmtDesc.properties : (VkFormatProperties)MVK_FMT_NO_FEATS;
+	const MVKFormatDesc& fmtDesc = formatDescForVkFormat(vkFormat);
+	if (fmtDesc.isSupported()) {
+		return fmtDesc.properties;
+	} else {
+		// If texture format is unsupported, vertex buffer format may still be.
+		VkFormatProperties fmtProps = MVK_FMT_NO_FEATS;
+		fmtProps.bufferFeatures |= fmtDesc.properties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
+		return fmtProps;
+	}
 }
 
 MVK_PUBLIC_SYMBOL const char* mvkVkFormatName(VkFormat vkFormat) {
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme
index d9a7a70..de178f0 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/xcshareddata/xcschemes/MoltenVKShaderConverter.xcscheme
@@ -74,19 +74,19 @@
          </CommandLineArgument>
          <CommandLineArgument
             argument = "-gi"
-            isEnabled = "NO">
+            isEnabled = "YES">
          </CommandLineArgument>
          <CommandLineArgument
-            argument = "/Users/bill/Documents/Dev/iOSProjects/Molten/MoltenVK-bh/External/SPIRV-Cross/shaders-msl/vert/pointsize.vert"
-            isEnabled = "NO">
+            argument = "/Users/bill/Documents/Dev/iOSProjects/Molten/MoltenVK-bh/External/SPIRV-Cross/shaders-msl/comp/struct-nested.comp"
+            isEnabled = "YES">
          </CommandLineArgument>
          <CommandLineArgument
             argument = "-si"
-            isEnabled = "YES">
+            isEnabled = "NO">
          </CommandLineArgument>
          <CommandLineArgument
             argument = "/Users/bill/Documents/Dev/iOSProjects/Molten/Support/Valve/Dota2/shader-issues/sample_mask/sample_mask.spv"
-            isEnabled = "YES">
+            isEnabled = "NO">
          </CommandLineArgument>
          <CommandLineArgument
             argument = "-mo"