Optimize MVKQueueCommandBufferSubmission command buffer vector pre-allocation.

Add MVKQueueFullCommandBufferSubmission template class of to support MVKSmallVector
allocations of varying size based on command buffer count and keep allocs on the stack.
MVKQueueCommandBufferSubmission optimize wait and signal vector preallocations.
Clean up vector preallocation in MVKDescriptorSet and MVKDescriptorTypePreallocation.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
index 17b39e6..1ed6b15 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
@@ -138,7 +138,7 @@
 
 	MVKDescriptorSetLayout* _layout;
 	MVKDescriptorPool* _pool;
-	MVKSmallVector<MVKDescriptor*, 1> _descriptors;		// same overhead as count
+	MVKSmallVector<MVKDescriptor*> _descriptors;
 };
 
 
@@ -166,7 +166,7 @@
 	void reset();
 
 	MVKSmallVector<DescriptorClass> _descriptors;
-	MVKSmallVector<bool, 8> _availability;		// same overhead as count
+	MVKSmallVector<bool> _availability;
 	uint32_t _nextAvailableIndex;
 	bool _supportAvailability;
 };
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index ca87224..7788cf6 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -179,7 +179,7 @@
 	friend class MVKQueue;
 
 	MVKQueue* _queue;
-	MVKSmallVector<MVKSemaphore*, 8> _waitSemaphores;
+	MVKSmallVector<MVKSemaphore*> _waitSemaphores;
 	bool _trackPerformance;
 };
 
@@ -193,10 +193,7 @@
 public:
 	void execute() override;
 
-	/** Constructs an instance for the queue. */
-	MVKQueueCommandBufferSubmission(MVKQueue* queue,
-									const VkSubmitInfo* pSubmit,
-									VkFence fence);
+	MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence);
 
 protected:
 	friend MVKCommandBuffer;
@@ -205,14 +202,44 @@
 	void setActiveMTLCommandBuffer(id<MTLCommandBuffer> mtlCmdBuff);
 	void commitActiveMTLCommandBuffer(bool signalCompletion = false);
 	void finish();
+	virtual void submitCommandBuffers() {}
 
-	MVKSmallVector<MVKCommandBuffer*, 32> _cmdBuffers;
-	MVKSmallVector<MVKSemaphore*, 8> _signalSemaphores;
+	MVKSmallVector<MVKSemaphore*> _signalSemaphores;
 	MVKFence* _fence;
 	id<MTLCommandBuffer> _activeMTLCommandBuffer;
 };
 
 
+/**
+ * Submits the commands in a set of command buffers to the queue.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmission {
+
+public:
+	MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence) :
+		MVKQueueCommandBufferSubmission(queue, pSubmit, fence) {
+
+			// pSubmit can be null if just tracking the fence alone
+			if (pSubmit) {
+				uint32_t cbCnt = pSubmit->commandBufferCount;
+				_cmdBuffers.reserve(cbCnt);
+				for (uint32_t i = 0; i < cbCnt; i++) {
+					MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]);
+					_cmdBuffers.push_back(cb);
+					setConfigurationResult(cb->getConfigurationResult());
+				}
+			}
+		}
+
+protected:
+	void submitCommandBuffers() override { for (auto& cb : _cmdBuffers) { cb->submit(this); } }
+
+	MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
+};
+
+
 #pragma mark -
 #pragma mark MVKQueuePresentSurfaceSubmission
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index b6f0183..0307d85 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -97,7 +97,27 @@
     VkResult rslt = VK_SUCCESS;
     for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
         VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence
-        VkResult subRslt = submit(new MVKQueueCommandBufferSubmission(this, &pSubmits[sIdx], fenceOrNil));
+
+		const VkSubmitInfo* pVkSub = &pSubmits[sIdx];
+		MVKQueueCommandBufferSubmission* mvkSub;
+		uint32_t cbCnt = pVkSub->commandBufferCount;
+		if (cbCnt <= 1) {
+			mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil);
+		} else if (cbCnt <= 16) {
+			mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil);
+		} else if (cbCnt <= 32) {
+			mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil);
+		} else if (cbCnt <= 64) {
+			mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil);
+		} else if (cbCnt <= 128) {
+			mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil);
+		} else if (cbCnt <= 256) {
+			mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil);
+		} else {
+			mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil);
+		}
+
+        VkResult subRslt = submit(mvkSub);
         if (rslt == VK_SUCCESS) { rslt = subRslt; }
     }
     return rslt;
@@ -226,7 +246,7 @@
 	for (auto* ws : _waitSemaphores) { ws->encodeWait(getActiveMTLCommandBuffer()); }
 
 	// Submit each command buffer.
-	for (auto& cb : _cmdBuffers) { cb->submit(this); }
+	submitCommandBuffers();
 
 	// If using encoded semaphore signaling, do so now.
 	for (auto* ss : _signalSemaphores) { ss->encodeSignal(getActiveMTLCommandBuffer()); }
@@ -307,14 +327,6 @@
 
     // pSubmit can be null if just tracking the fence alone
     if (pSubmit) {
-        uint32_t cbCnt = pSubmit->commandBufferCount;
-        _cmdBuffers.reserve(cbCnt);
-        for (uint32_t i = 0; i < cbCnt; i++) {
-            MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]);
-            _cmdBuffers.push_back(cb);
-            setConfigurationResult(cb->getConfigurationResult());
-        }
-
         uint32_t ssCnt = pSubmit->signalSemaphoreCount;
         _signalSemaphores.reserve(ssCnt);
         for (uint32_t i = 0; i < ssCnt; i++) {