diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp
index 16b396aa9..f9aacc317 100644
--- a/src/d3d11/d3d11_context.cpp
+++ b/src/d3d11/d3d11_context.cpp
@@ -3578,12 +3578,7 @@ namespace dxvk {
 
     EmitCsCmd<VkDrawIndirectCommand>(D3D11CmdType::Draw, 1u,
       [] (DxvkContext* ctx, const VkDrawIndirectCommand* draws, size_t count) {
-        for (size_t i = 0; i < count; i++) {
-          ctx->draw(draws[i].vertexCount,
-                    draws[i].instanceCount,
-                    draws[i].firstVertex,
-                    draws[i].firstInstance);
-        }
+        ctx->draw(count, draws);
       });
 
     new (m_csData->first()) VkDrawIndirectCommand(draw);
@@ -3608,13 +3603,7 @@ namespace dxvk {
 
     EmitCsCmd<VkDrawIndexedIndirectCommand>(D3D11CmdType::DrawIndexed, 1u,
       [] (DxvkContext* ctx, const VkDrawIndexedIndirectCommand* draws, size_t count) {
-        for (size_t i = 0; i < count; i++) {
-          ctx->drawIndexed(draws[i].indexCount,
-                           draws[i].instanceCount,
-                           draws[i].firstIndex,
-                           draws[i].vertexOffset,
-                           draws[i].firstInstance);
-        }
+        ctx->drawIndexed(count, draws);
       });
 
     new (m_csData->first()) VkDrawIndexedIndirectCommand(draw);
diff --git a/src/d3d11/d3d11_video.cpp b/src/d3d11/d3d11_video.cpp
index a5b20351d..bdcd80daf 100644
--- a/src/d3d11/d3d11_video.cpp
+++ b/src/d3d11/d3d11_video.cpp
@@ -1312,7 +1312,11 @@ namespace dxvk {
       for (uint32_t i = 0; i < cViews.size(); i++)
         ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, Rc<DxvkImageView>(cViews[i]));
 
-      ctx->draw(3, 1, 0, 0);
+      VkDrawIndirectCommand draw = { };
+      draw.vertexCount   = 3u;
+      draw.instanceCount = 1u;
+
+      ctx->draw(1, &draw);
 
       for (uint32_t i = 0; i < cViews.size(); i++)
         ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, nullptr);
diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp
index f704723db..51e190405 100644
--- a/src/d3d9/d3d9_device.cpp
+++ b/src/d3d9/d3d9_device.cpp
@@ -2890,9 +2890,12 @@ namespace dxvk {
 
       // Tests on Windows show that D3D9 does not do non-indexed instanced draws.
 
-      ctx->draw(
-        vertexCount, 1,
-        cStartVertex, 0);
+      VkDrawIndirectCommand draw = { };
+      draw.vertexCount   = vertexCount;
+      draw.instanceCount = 1u;
+      draw.firstVertex   = cStartVertex;
+
+      ctx->draw(1u, &draw);
     });
 
     return D3D_OK;
@@ -2939,10 +2942,13 @@ namespace dxvk {
 
       ApplyPrimitiveType(ctx, cPrimType);
 
-      ctx->drawIndexed(
-        drawInfo.vertexCount, drawInfo.instanceCount,
-        cStartIndex,
-        cBaseVertexIndex, 0);
+      VkDrawIndexedIndirectCommand draw = { };
+      draw.indexCount    = drawInfo.vertexCount;
+      draw.instanceCount = drawInfo.instanceCount;
+      draw.firstIndex    = cStartIndex;
+      draw.vertexOffset  = cBaseVertexIndex;
+
+      ctx->drawIndexed(1u, &draw);
     });
 
     return D3D_OK;
@@ -2981,11 +2987,12 @@ namespace dxvk {
       ApplyPrimitiveType(ctx, cPrimType);
 
       // Tests on Windows show that D3D9 does not do non-indexed instanced draws.
+      VkDrawIndirectCommand draw = { };
+      draw.vertexCount = cVertexCount;
+      draw.instanceCount = 1u;
 
       ctx->bindVertexBuffer(0, std::move(cBufferSlice), cStride);
-      ctx->draw(
-        cVertexCount, 1,
-        0, 0);
+      ctx->draw(1u, &draw);
       ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
     });
 
@@ -3045,12 +3052,13 @@ namespace dxvk {
 
       ApplyPrimitiveType(ctx, cPrimType);
 
+      VkDrawIndexedIndirectCommand draw = { };
+      draw.indexCount    = drawInfo.vertexCount;
+      draw.instanceCount = drawInfo.instanceCount;
+
       ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride);
       ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType);
-      ctx->drawIndexed(
-        drawInfo.vertexCount, drawInfo.instanceCount,
-        0,
-        0, 0);
+      ctx->drawIndexed(1u, &draw);
       ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
       ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32);
     });
@@ -3162,11 +3170,14 @@ namespace dxvk {
       // to avoid val errors / UB.
       ctx->bindShader<VK_SHADER_STAGE_FRAGMENT_BIT>(nullptr);
 
+      VkDrawIndirectCommand draw = { };
+      draw.vertexCount   = drawInfo.vertexCount;
+      draw.instanceCount = drawInfo.instanceCount;
+      draw.firstVertex   = cStartIndex;
+
       ctx->bindShader<VK_SHADER_STAGE_GEOMETRY_BIT>(std::move(shader));
       ctx->bindUniformBuffer(VK_SHADER_STAGE_GEOMETRY_BIT, getSWVPBufferSlot(), std::move(cBufferSlice));
-      ctx->draw(
-        drawInfo.vertexCount, drawInfo.instanceCount,
-        cStartIndex, 0);
+      ctx->draw(1u, &draw);
       ctx->bindUniformBuffer(VK_SHADER_STAGE_GEOMETRY_BIT, getSWVPBufferSlot(), DxvkBufferSlice());
       ctx->bindShader<VK_SHADER_STAGE_GEOMETRY_BIT>(nullptr);
     });
diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp
index e14720e2f..d4ad98b09 100644
--- a/src/dxvk/dxvk_context.cpp
+++ b/src/dxvk/dxvk_context.cpp
@@ -49,6 +49,11 @@ namespace dxvk {
     if (m_device->features().khrMaintenance5.maintenance5)
       m_features.set(DxvkContextFeature::IndexBufferRobustness);
 
+    // Check whether we can batch direct draws
+    if (m_device->features().extMultiDraw.multiDraw
+     && m_device->properties().extMultiDraw.maxMultiDrawCount >= DirectMultiDrawBatchSize)
+      m_features.set(DxvkContextFeature::DirectMultiDraw);
+
     // Add a fast path to query debug utils support
     if (m_device->isDebugEnabled())
       m_features.set(DxvkContextFeature::DebugUtils);
@@ -922,15 +927,9 @@ namespace dxvk {
   
   
   void DxvkContext::draw(
-          uint32_t vertexCount,
-          uint32_t instanceCount,
-          uint32_t firstVertex,
-          uint32_t firstInstance) {
-    if (this->commitGraphicsState<false, false>()) {
-      m_cmd->cmdDraw(
-        vertexCount, instanceCount,
-        firstVertex, firstInstance);
-    }
+          uint32_t          count,
+    const VkDrawIndirectCommand* draws) {
+    drawGeneric<false>(count, draws);
   }
   
   
@@ -953,20 +952,12 @@ namespace dxvk {
   
   
   void DxvkContext::drawIndexed(
-          uint32_t indexCount,
-          uint32_t instanceCount,
-          uint32_t firstIndex,
-          int32_t  vertexOffset,
-          uint32_t firstInstance) {
-    if (this->commitGraphicsState<true, false>()) {
-      m_cmd->cmdDrawIndexed(
-        indexCount, instanceCount,
-        firstIndex, vertexOffset,
-        firstInstance);
-    }
+          uint32_t          count,
+    const VkDrawIndexedIndirectCommand* draws) {
+    drawGeneric<true>(count, draws);
   }
-  
-  
+
+
   void DxvkContext::drawIndexedIndirect(
           VkDeviceSize      offset,
           uint32_t          count,
@@ -1689,6 +1680,116 @@ namespace dxvk {
   }
 
 
+  template<bool Indexed, typename T>
+  void DxvkContext::drawGeneric(
+          uint32_t                  count,
+    const T*                        draws) {
+    if (this->commitGraphicsState<Indexed, false>()) {
+      if (count == 1u) {
+        // Most common case, just emit a single draw
+        if constexpr (Indexed) {
+          m_cmd->cmdDrawIndexed(draws->indexCount, draws->instanceCount,
+            draws->firstIndex, draws->vertexOffset, draws->firstInstance);
+        } else {
+          m_cmd->cmdDraw(draws->vertexCount, draws->instanceCount,
+            draws->firstVertex, draws->firstInstance);
+        }
+      } else if (unlikely(needsDrawBarriers())) {
+        // If the current pipeline has storage resource hazards,
+        // unroll draws and insert a barrier after each one.
+        for (uint32_t i = 0; i < count; i++) {
+          if (i)
+            this->commitGraphicsState<Indexed, false>();
+
+          if constexpr (Indexed) {
+            m_cmd->cmdDrawIndexed(draws[i].indexCount, draws[i].instanceCount,
+              draws[i].firstIndex, draws[i].vertexOffset, draws[i].firstInstance);
+          } else {
+            m_cmd->cmdDraw(draws[i].vertexCount, draws[i].instanceCount,
+              draws[i].firstVertex, draws[i].firstInstance);
+          }
+        }
+      } else {
+        using MultiDrawInfo = std::conditional_t<Indexed,
+          VkMultiDrawIndexedInfoEXT, VkMultiDrawInfoEXT>;
+
+        // Intentially don't initialize this; we'll probably not use
+        // the full batch size anyway, so doing so would be wasteful.
+        std::array<MultiDrawInfo, DirectMultiDrawBatchSize> batch;
+
+        uint32_t instanceCount = 0u;
+        uint32_t instanceIndex = 0u;
+
+        uint32_t batchSize = 0u;
+
+        for (uint32_t i = 0; i < count; i++) {
+          if (!batchSize) {
+            instanceCount = draws[i].instanceCount;
+            instanceIndex = draws[i].firstInstance;
+          }
+
+          if constexpr (Indexed) {
+            auto& drawInfo = batch[batchSize++];
+            drawInfo.firstIndex = draws[i].firstIndex;
+            drawInfo.indexCount = draws[i].indexCount;
+            drawInfo.vertexOffset = draws[i].vertexOffset;
+          } else {
+            auto& drawInfo = batch[batchSize++];
+            drawInfo.firstVertex = draws[i].firstVertex;
+            drawInfo.vertexCount = draws[i].vertexCount;
+          }
+
+          bool emitDraw = i + 1u == count || batchSize == DirectMultiDrawBatchSize;
+
+          if (!emitDraw) {
+            const auto& next = draws[i + 1u];
+
+            emitDraw = instanceCount != next.instanceCount
+                    || instanceIndex != next.firstInstance;
+          }
+
+          if (emitDraw) {
+            if (m_features.test(DxvkContextFeature::DirectMultiDraw)) {
+              if constexpr (Indexed) {
+                m_cmd->cmdDrawMultiIndexed(batchSize, batch.data(),
+                  instanceCount, instanceIndex);
+              } else {
+                m_cmd->cmdDrawMulti(batchSize, batch.data(),
+                  instanceCount, instanceIndex);
+              }
+            } else {
+              // This path only really exists for consistency reasons; all drivers
+              // we care about support MultiDraw natively, but debug tools may not.
+              if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) {
+                const char* procName = Indexed ? "vkCmdDrawMultiIndexedEXT" : "vkCmdDrawMultiEXT";
+                m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer,
+                  vk::makeLabel(0u, str::format(procName, "(", batchSize, ")").c_str()));
+              }
+
+              for (uint32_t i = 0; i < batchSize; i++) {
+                const auto& entry = batch[i];
+
+                if constexpr (Indexed) {
+                  m_cmd->cmdDrawIndexed(entry.indexCount, instanceCount,
+                    entry.firstIndex, entry.vertexOffset, instanceIndex);
+                } else {
+                  m_cmd->cmdDraw(entry.vertexCount, instanceCount,
+                    entry.firstVertex, instanceIndex);
+                }
+              }
+
+              if (unlikely(m_features.test(DxvkContextFeature::DebugUtils)))
+                m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer);
+            }
+
+            batchSize = 0u;
+          }
+        }
+      }
+    }
+  }
+
+
   template<bool Indexed>
   void DxvkContext::drawIndirectGeneric(
           VkDeviceSize              offset,
@@ -1698,11 +1799,8 @@ namespace dxvk {
     if (this->commitGraphicsState<Indexed, true>()) {
       auto descriptor = m_state.id.argBuffer.getDescriptor();
 
-      if (unroll) {
-        // Need to do this check after initially setting up the pipeline
-        unroll = m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws)
-              && !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
-      }
+      if (unroll)
+        unroll = needsDrawBarriers();
 
       // If draws are merged but the pipeline has order-dependent stores, submit
       // one draw at a time as well as barriers in between. Otherwise, keep the
@@ -2854,6 +2952,12 @@ namespace dxvk {
   }
 
 
+  bool DxvkContext::needsDrawBarriers() {
+    return m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws)
+      && !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
+  }
+
+
   void DxvkContext::beginRenderPassDebugRegion() {
     bool hasColorAttachments = false;
     bool hasDepthAttachment = m_state.om.renderTargets.depth.view != nullptr;
diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h
index 921405472..b32a12b50 100644
--- a/src/dxvk/dxvk_context.h
+++ b/src/dxvk/dxvk_context.h
@@ -32,6 +32,8 @@ namespace dxvk {
   class DxvkContext : public RcObject {
     constexpr static VkDeviceSize MaxDiscardSizeInRp = 256u << 10u;
     constexpr static VkDeviceSize MaxDiscardSize     =  16u << 10u;
+
+    constexpr static uint32_t DirectMultiDrawBatchSize = 256u;
   public:
     
     DxvkContext(const Rc<DxvkDevice>& device);
@@ -744,17 +746,13 @@ namespace dxvk {
     /**
      * \brief Draws primitive without using an index buffer
      * 
-     * \param [in] vertexCount Number of vertices to draw
-     * \param [in] instanceCount Number of instances to render
-     * \param [in] firstVertex First vertex in vertex buffer
-     * \param [in] firstInstance First instance ID
+     * \param [in] count Number of draws
+     * \param [in] draws Draw parameters
      */
     void draw(
-            uint32_t          vertexCount,
-            uint32_t          instanceCount,
-            uint32_t          firstVertex,
-            uint32_t          firstInstance);
-    
+            uint32_t          count,
+      const VkDrawIndirectCommand* draws);
+
     /**
      * \brief Indirect draw call
      * 
@@ -791,19 +789,13 @@ namespace dxvk {
     /**
      * \brief Draws primitives using an index buffer
      * 
-     * \param [in] indexCount Number of indices to draw
-     * \param [in] instanceCount Number of instances to render
-     * \param [in] firstIndex First index within the index buffer
-     * \param [in] vertexOffset Vertex ID that corresponds to index 0
-     * \param [in] firstInstance First instance ID
+     * \param [in] count Number of draws
+     * \param [in] draws Draw parameters
      */
     void drawIndexed(
-            uint32_t indexCount,
-            uint32_t instanceCount,
-            uint32_t firstIndex,
-            int32_t  vertexOffset,
-            uint32_t firstInstance);
-    
+            uint32_t          count,
+      const VkDrawIndexedIndirectCommand* draws);
+
     /**
      * \brief Indirect indexed draw call
      * 
@@ -1595,6 +1587,11 @@ namespace dxvk {
       const Rc<DxvkBuffer>&       buffer,
             VkDeviceSize          offset);
 
+    template<bool Indexed, typename T>
+    void drawGeneric(
+            uint32_t              count,
+      const T*                    draws);
+
     template<bool Indexed>
     void drawIndirectGeneric(
             VkDeviceSize          offset,
@@ -2103,7 +2100,7 @@ namespace dxvk {
       return pred(DxvkAccess::Read);
     }
 
-    void invalidateWriteAfterWriteTracking();
+    bool needsDrawBarriers();
 
     void beginRenderPassDebugRegion();
 
diff --git a/src/dxvk/dxvk_context_state.h b/src/dxvk/dxvk_context_state.h
index f91357e1e..564828860 100644
--- a/src/dxvk/dxvk_context_state.h
+++ b/src/dxvk/dxvk_context_state.h
@@ -75,6 +75,7 @@ namespace dxvk {
     VariableMultisampleRate,
     IndexBufferRobustness,
     DebugUtils,
+    DirectMultiDraw,
     FeatureCount
   };
 
diff --git a/src/vulkan/vulkan_util.h b/src/vulkan/vulkan_util.h
index f872d5801..1c2556525 100644
--- a/src/vulkan/vulkan_util.h
+++ b/src/vulkan/vulkan_util.h
@@ -244,7 +244,7 @@ namespace dxvk::vk {
     label.color[0] = ((color >> 16u) & 0xffu) / 255.0f;
     label.color[1] = ((color >> 8u)  & 0xffu) / 255.0f;
     label.color[2] = ((color >> 0u)  & 0xffu) / 255.0f;
-    label.color[3] = 1.0f;
+    label.color[3] = color ? 1.0f : 0.0f;
     label.pLabelName = text;
     return label;
   }