diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp
index c76753c56..d9d177109 100644
--- a/src/dxbc/dxbc_compiler.cpp
+++ b/src/dxbc/dxbc_compiler.cpp
@@ -241,8 +241,8 @@ namespace dxvk {
     info.bindings = m_bindings.data();
     info.inputMask = m_inputMask;
     info.outputMask = m_outputMask;
-    info.uniformSize = m_icbData.size();
-    info.uniformData = m_icbData.data();
+    info.uniformSize = m_icbData.size() * sizeof(uint32_t);
+    info.uniformData = reinterpret_cast<const char*>(m_icbData.data());
     info.pushConstStages = VK_SHADER_STAGE_FRAGMENT_BIT;
     info.pushConstSize = sizeof(DxbcPushConstants);
     info.outputTopology = m_outputTopology;
@@ -817,7 +817,7 @@ namespace dxvk {
     if (ins.controls.accessType() == DxbcConstantBufferAccessType::DynamicallyIndexed)
       elementCount = 4096;
 
-    this->emitDclConstantBufferVar(bufferId, elementCount,
+    this->emitDclConstantBufferVar(bufferId, elementCount, 4u,
       str::format("cb", bufferId).c_str());
   }
   
@@ -825,13 +825,14 @@ namespace dxvk {
   void DxbcCompiler::emitDclConstantBufferVar(
           uint32_t                regIdx,
           uint32_t                numConstants,
+          uint32_t                numComponents,
     const char*                   name) {
     // Uniform buffer data is stored as a fixed-size array
     // of 4x32-bit vectors. SPIR-V requires explicit strides.
     const uint32_t arrayType = m_module.defArrayTypeUnique(
-      getVectorTypeId({ DxbcScalarType::Float32, 4 }),
+      getVectorTypeId({ DxbcScalarType::Float32, numComponents }),
       m_module.constu32(numConstants));
-    m_module.decorateArrayStride(arrayType, 16);
+    m_module.decorateArrayStride(arrayType, sizeof(uint32_t) * numComponents);
     
     // SPIR-V requires us to put that array into a
     // struct and decorate that struct as a block.
@@ -1586,13 +1587,27 @@ namespace dxvk {
           uint32_t                dwordCount,
     const uint32_t*               dwordArray,
           uint32_t                componentCount) {
-    this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb");
+    uint32_t vectorCount = dwordCount / 4u;
 
-    m_icbData.resize(dwordCount * sizeof(uint32_t));
-    std::memcpy(m_icbData.data(), dwordArray, m_icbData.size());
+    // Tightly pack vec2 or scalar arrays if possible. Don't bother with
+    // vec3 since we'd rather have properly vectorized loads in that case.
+    if (m_moduleInfo.options.supportsTightIcbPacking && componentCount <= 2u)
+      m_icbComponents = componentCount;
+    else
+      m_icbComponents = 4u;
 
-    m_icbComponents = 4u;
-    m_icbSize = dwordCount / 4u;
+    // Immediate constant buffer can be read out of bounds, declare
+    // it with the maximum possible size and rely on robustness.
+    this->emitDclConstantBufferVar(Icb_BindingSlotId, 4096u, m_icbComponents, "icb");
+
+    m_icbData.reserve(vectorCount * componentCount);
+
+    for (uint32_t i = 0; i < dwordCount; i += 4u) {
+      for (uint32_t c = 0; c < m_icbComponents; c++)
+        m_icbData.push_back(dwordArray[i + c]);
+    }
+
+    m_icbSize = vectorCount;
   }
 
 
diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h
index ed63f4ba7..e47e31c5f 100644
--- a/src/dxbc/dxbc_compiler.h
+++ b/src/dxbc/dxbc_compiler.h
@@ -504,7 +504,7 @@ namespace dxvk {
     // Immediate constant buffer. If defined, this is
     // an array of four-component uint32 vectors.
     uint32_t          m_icbArray = 0;
-    std::vector<char> m_icbData;
+    std::vector<uint32_t> m_icbData;
 
     uint32_t          m_icbComponents = 0u;
     uint32_t          m_icbSize = 0u;
@@ -593,6 +593,7 @@ namespace dxvk {
     void emitDclConstantBufferVar(
             uint32_t                regIdx,
             uint32_t                numConstants,
+            uint32_t                numComponents,
       const char*                   name);
     
     void emitDclSampler(
diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp
index e55a0f945..7c269f792 100644
--- a/src/dxbc/dxbc_options.cpp
+++ b/src/dxbc/dxbc_options.cpp
@@ -38,6 +38,7 @@ namespace dxvk {
     disableMsaa              = options.disableMsaa;
     forceSampleRateShading   = options.forceSampleRateShading;
     enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;
+    supportsTightIcbPacking  = device->features().vk12.uniformBufferStandardLayout;
 
     // Figure out float control flags to match D3D11 rules
     if (options.floatControls) {
@@ -55,4 +56,4 @@ namespace dxvk {
     }
   }
   
-}
\ No newline at end of file
+}
diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h
index 27ecca1ff..986004924 100644
--- a/src/dxbc/dxbc_options.h
+++ b/src/dxbc/dxbc_options.h
@@ -49,6 +49,10 @@ namespace dxvk {
     // Enable per-sample interlock if supported
     bool enableSampleShadingInterlock = false;
 
+    /// Use tightly packed arrays for immediate
+    /// constant buffers if possible
+    bool supportsTightIcbPacking = false;
+
     /// Float control flags
     DxbcFloatControlFlags floatControl;
 
@@ -56,4 +60,4 @@ namespace dxvk {
     VkDeviceSize minSsboAlignment = 0;
   };
   
-}
\ No newline at end of file
+}