[d3d11] Lazy-bind constant buffers

This commit is contained in:
Philip Rebohle 2025-02-19 01:49:45 +01:00 committed by Philip Rebohle
parent a61c114519
commit 4fdbfffdcc
3 changed files with 201 additions and 56 deletions

View file

@ -1009,6 +1009,9 @@ namespace dxvk {
if (!ctrBuf.defined()) if (!ctrBuf.defined())
return; return;
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
// We bind the SO counter as an indirect count buffer, // We bind the SO counter as an indirect count buffer,
// so reset any tracking we may have been doing here. // so reset any tracking we may have been doing here.
m_state.id.reset(); m_state.id.reset();
@ -1035,6 +1038,9 @@ namespace dxvk {
UINT StartVertexLocation) { UINT StartVertexLocation) {
D3D10DeviceLock lock = LockContext(); D3D10DeviceLock lock = LockContext();
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
EmitCs([=] (DxvkContext* ctx) { EmitCs([=] (DxvkContext* ctx) {
ctx->draw( ctx->draw(
VertexCount, 1, VertexCount, 1,
@ -1050,6 +1056,9 @@ namespace dxvk {
INT BaseVertexLocation) { INT BaseVertexLocation) {
D3D10DeviceLock lock = LockContext(); D3D10DeviceLock lock = LockContext();
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
EmitCs([=] (DxvkContext* ctx) { EmitCs([=] (DxvkContext* ctx) {
ctx->drawIndexed( ctx->drawIndexed(
IndexCount, 1, IndexCount, 1,
@ -1067,6 +1076,9 @@ namespace dxvk {
UINT StartInstanceLocation) { UINT StartInstanceLocation) {
D3D10DeviceLock lock = LockContext(); D3D10DeviceLock lock = LockContext();
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
EmitCs([=] (DxvkContext* ctx) { EmitCs([=] (DxvkContext* ctx) {
ctx->draw( ctx->draw(
VertexCountPerInstance, VertexCountPerInstance,
@ -1086,6 +1098,9 @@ namespace dxvk {
UINT StartInstanceLocation) { UINT StartInstanceLocation) {
D3D10DeviceLock lock = LockContext(); D3D10DeviceLock lock = LockContext();
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
EmitCs([=] (DxvkContext* ctx) { EmitCs([=] (DxvkContext* ctx) {
ctx->drawIndexed( ctx->drawIndexed(
IndexCountPerInstance, IndexCountPerInstance,
@ -1107,6 +1122,9 @@ namespace dxvk {
if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand))) if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand)))
return; return;
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
// If possible, batch up multiple indirect draw calls of // If possible, batch up multiple indirect draw calls of
// the same type into one single multiDrawIndirect call // the same type into one single multiDrawIndirect call
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData); auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData);
@ -1142,6 +1160,9 @@ namespace dxvk {
if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand))) if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand)))
return; return;
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
// If possible, batch up multiple indirect draw calls of // If possible, batch up multiple indirect draw calls of
// the same type into one single multiDrawIndirect call // the same type into one single multiDrawIndirect call
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData); auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData);
@ -1174,6 +1195,9 @@ namespace dxvk {
UINT ThreadGroupCountZ) { UINT ThreadGroupCountZ) {
D3D10DeviceLock lock = LockContext(); D3D10DeviceLock lock = LockContext();
if (unlikely(HasDirtyComputeBindings()))
ApplyDirtyComputeBindings();
EmitCs([=] (DxvkContext* ctx) { EmitCs([=] (DxvkContext* ctx) {
ctx->dispatch( ctx->dispatch(
ThreadGroupCountX, ThreadGroupCountX,
@ -1193,6 +1217,9 @@ namespace dxvk {
if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDispatchIndirectCommand))) if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDispatchIndirectCommand)))
return; return;
if (unlikely(HasDirtyComputeBindings()))
ApplyDirtyComputeBindings();
EmitCs([cOffset = AlignedByteOffsetForArgs] EmitCs([cOffset = AlignedByteOffsetForArgs]
(DxvkContext* ctx) { (DxvkContext* ctx) {
ctx->dispatchIndirect(cOffset); ctx->dispatchIndirect(cOffset);
@ -3157,6 +3184,59 @@ namespace dxvk {
} }
template<typename ContextType>
void D3D11CommonContext<ContextType>::ApplyDirtyConstantBuffers(
DxbcProgramType Stage,
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask) {
uint32_t bindMask = BoundMask.cbvMask & DirtyMask.cbvMask;
if (!bindMask)
return;
const auto& state = m_state.cbv[Stage];
DirtyMask.cbvMask -= bindMask;
for (uint32_t slot : bit::BitMask(bindMask)) {
const auto& cbv = state.buffers[slot];
BindConstantBuffer(Stage, slot, cbv.buffer.ptr(),
cbv.constantOffset, cbv.constantBound);
}
}
template<typename ContextType>
void D3D11CommonContext<ContextType>::ApplyDirtyGraphicsBindings() {
auto dirtyMask = m_state.lazy.shadersDirty & m_state.lazy.shadersUsed;
dirtyMask.clr(DxbcProgramType::ComputeShader);
for (uint32_t stageIndex : bit::BitMask(uint32_t(dirtyMask.raw()))) {
DxbcProgramType stage = DxbcProgramType(stageIndex);
auto& boundMask = m_state.lazy.bindingsUsed[stage];
auto& dirtyMask = m_state.lazy.bindingsDirty[stage];
ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask);
m_state.lazy.shadersDirty.clr(stage);
}
}
template<typename ContextType>
void D3D11CommonContext<ContextType>::ApplyDirtyComputeBindings() {
DxbcProgramType stage = DxbcProgramType::ComputeShader;
auto& boundMask = m_state.lazy.bindingsUsed[stage];
auto& dirtyMask = m_state.lazy.bindingsDirty[stage];
ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask);
m_state.lazy.shadersDirty.clr(stage);
}
template<typename ContextType> template<typename ContextType>
void D3D11CommonContext<ContextType>::ApplyInputLayout() { void D3D11CommonContext<ContextType>::ApplyInputLayout() {
auto inputLayout = m_state.ia.inputLayout.prvRef(); auto inputLayout = m_state.ia.inputLayout.prvRef();
@ -3686,45 +3766,49 @@ namespace dxvk {
template<typename ContextType> template<typename ContextType>
template<DxbcProgramType ShaderStage>
void D3D11CommonContext<ContextType>::BindConstantBuffer( void D3D11CommonContext<ContextType>::BindConstantBuffer(
DxbcProgramType ShaderStage,
UINT Slot, UINT Slot,
D3D11Buffer* pBuffer, D3D11Buffer* pBuffer,
UINT Offset, UINT Offset,
UINT Length) { UINT Length) {
uint32_t slotId = computeConstantBufferBinding(ShaderStage, Slot);
if (pBuffer) { if (pBuffer) {
EmitCs([ EmitCs([
cSlotId = Slot, cSlotId = slotId,
cStage = GetShaderStage(ShaderStage),
cBufferSlice = pBuffer->GetBufferSlice(16 * Offset, 16 * Length) cBufferSlice = pBuffer->GetBufferSlice(16 * Offset, 16 * Length)
] (DxvkContext* ctx) mutable { ] (DxvkContext* ctx) mutable {
VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindUniformBuffer(cStage, cSlotId,
ctx->bindUniformBuffer(stage, cSlotId,
Forwarder::move(cBufferSlice)); Forwarder::move(cBufferSlice));
}); });
} else { } else {
EmitCs([ EmitCs([
cSlotId = Slot cSlotId = slotId,
cStage = GetShaderStage(ShaderStage)
] (DxvkContext* ctx) { ] (DxvkContext* ctx) {
VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindUniformBuffer(cStage, cSlotId, DxvkBufferSlice());
ctx->bindUniformBuffer(stage, cSlotId, DxvkBufferSlice());
}); });
} }
} }
template<typename ContextType> template<typename ContextType>
template<DxbcProgramType ShaderStage>
void D3D11CommonContext<ContextType>::BindConstantBufferRange( void D3D11CommonContext<ContextType>::BindConstantBufferRange(
DxbcProgramType ShaderStage,
UINT Slot, UINT Slot,
UINT Offset, UINT Offset,
UINT Length) { UINT Length) {
uint32_t slotId = computeConstantBufferBinding(ShaderStage, Slot);
EmitCs([ EmitCs([
cSlotId = Slot, cSlotId = slotId,
cOffset = 16 * Offset, cStage = GetShaderStage(ShaderStage),
cLength = 16 * Length cOffset = 16u * Offset,
cLength = 16u * Length
] (DxvkContext* ctx) { ] (DxvkContext* ctx) {
VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindUniformBufferRange(cStage, cSlotId, cOffset, cLength);
ctx->bindUniformBufferRange(stage, cSlotId, cOffset, cLength);
}); });
} }
@ -4236,6 +4320,48 @@ namespace dxvk {
} }
template<typename ContextType>
template<typename T>
bool D3D11CommonContext<ContextType>::DirtyBindingGeneric(
DxbcProgramType ShaderStage,
T BoundMask,
T& DirtyMask,
T DirtyBit,
bool IsNull) {
if ((BoundMask & ~DirtyMask) & DirtyBit) {
// If we're binding a non-null resource to an active slot that has not been
// marked for lazy binding yet, forward the call immediately in order to
// avoid tracking overhead. This is by far the most common case.
if (likely(!IsNull))
return false;
// If we are binding a null resource to an active slot, the app will likely
// either bind something else or bind a shader that does not use this slot.
// In that case, avoid likely redundant CS traffic and apply the binding on
// the next draw.
m_state.lazy.shadersDirty.set(ShaderStage);
}
// Binding is either inactive or already dirty. In the inactive case, there
// is no need to mark the shader stage as dirty since binding a shader that
// activates the binding will implicitly do so.
DirtyMask |= DirtyBit;
return true;
}
template<typename ContextType>
bool D3D11CommonContext<ContextType>::DirtyConstantBuffer(
DxbcProgramType ShaderStage,
uint32_t Slot,
bool IsNull) {
return DirtyBindingGeneric(ShaderStage,
m_state.lazy.bindingsUsed[ShaderStage].cbvMask,
m_state.lazy.bindingsDirty[ShaderStage].cbvMask,
1u << Slot, IsNull);
}
template<typename ContextType> template<typename ContextType>
void D3D11CommonContext<ContextType>::DiscardBuffer( void D3D11CommonContext<ContextType>::DiscardBuffer(
ID3D11Resource* pResource) { ID3D11Resource* pResource) {
@ -4398,6 +4524,21 @@ namespace dxvk {
} }
template<typename ContextType>
bool D3D11CommonContext<ContextType>::HasDirtyComputeBindings() {
return m_state.lazy.shadersDirty.test(DxbcProgramType::ComputeShader);
}
template<typename ContextType>
bool D3D11CommonContext<ContextType>::HasDirtyGraphicsBindings() {
return (m_state.lazy.shadersDirty & m_state.lazy.shadersUsed).any(
DxbcProgramType::VertexShader, DxbcProgramType::GeometryShader,
DxbcProgramType::HullShader, DxbcProgramType::DomainShader,
DxbcProgramType::PixelShader);
}
template<typename ContextType> template<typename ContextType>
void D3D11CommonContext<ContextType>::ResetCommandListState() { void D3D11CommonContext<ContextType>::ResetCommandListState() {
EmitCs([ EmitCs([
@ -4646,36 +4787,6 @@ namespace dxvk {
} }
template<typename ContextType>
void D3D11CommonContext<ContextType>::RestoreUsedBindings() {
// Mark all bindings used since the last reset as dirty so that subsequent draws
// and dispatches will reapply them as necessary. Marking null bindings here may
// lead to some redundant CS thread traffic, but is otherwise harmless.
auto maxBindings = GetMaxUsedBindings();
for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) {
auto stage = DxbcProgramType(i);
auto stageInfo = maxBindings.stages[i];
m_state.lazy.bindingsDirty[stage].cbvMask |= (1u << stageInfo.cbvCount) - 1u;
m_state.lazy.bindingsDirty[stage].samplerMask |= (1u << stageInfo.samplerCount) - 1u;
if (stageInfo.uavCount)
m_state.lazy.bindingsDirty[stage].uavMask |= uint64_t(-1) >> (64u - stageInfo.uavCount);
if (stageInfo.srvCount > 64u) {
m_state.lazy.bindingsDirty[stage].srvMask[0] |= uint64_t(-1);
m_state.lazy.bindingsDirty[stage].srvMask[1] |= uint64_t(-1) >> (128u - stageInfo.srvCount);
} else if (stageInfo.srvCount) {
m_state.lazy.bindingsDirty[stage].srvMask[0] |= uint64_t(-1) >> (64u - stageInfo.srvCount);
}
if (m_state.lazy.shadersUsed.test(stage) && !m_state.lazy.bindingsDirty[stage].empty())
m_state.lazy.shadersDirty.set(stage);
}
}
template<typename ContextType> template<typename ContextType>
void D3D11CommonContext<ContextType>::RestoreCommandListState() { void D3D11CommonContext<ContextType>::RestoreCommandListState() {
BindFramebuffer(); BindFramebuffer();
@ -4747,10 +4858,8 @@ namespace dxvk {
template<DxbcProgramType Stage> template<DxbcProgramType Stage>
void D3D11CommonContext<ContextType>::RestoreConstantBuffers() { void D3D11CommonContext<ContextType>::RestoreConstantBuffers() {
const auto& bindings = m_state.cbv[Stage]; const auto& bindings = m_state.cbv[Stage];
uint32_t slotId = computeConstantBufferBinding(Stage, 0);
for (uint32_t i = 0; i < bindings.maxCount; i++) { for (uint32_t i = 0; i < bindings.maxCount; i++) {
BindConstantBuffer<Stage>(slotId + i, bindings.buffers[i].buffer.ptr(), BindConstantBuffer(Stage, i, bindings.buffers[i].buffer.ptr(),
bindings.buffers[i].constantOffset, bindings.buffers[i].constantBound); bindings.buffers[i].constantOffset, bindings.buffers[i].constantBound);
} }
} }
@ -4807,7 +4916,6 @@ namespace dxvk {
UINT NumBuffers, UINT NumBuffers,
ID3D11Buffer* const* ppConstantBuffers) { ID3D11Buffer* const* ppConstantBuffers) {
auto& bindings = m_state.cbv[ShaderStage]; auto& bindings = m_state.cbv[ShaderStage];
uint32_t slotId = computeConstantBufferBinding(ShaderStage, StartSlot);
for (uint32_t i = 0; i < NumBuffers; i++) { for (uint32_t i = 0; i < NumBuffers; i++) {
auto newBuffer = static_cast<D3D11Buffer*>(ppConstantBuffers[i]); auto newBuffer = static_cast<D3D11Buffer*>(ppConstantBuffers[i]);
@ -4824,7 +4932,8 @@ namespace dxvk {
bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantCount = constantCount;
bindings.buffers[StartSlot + i].constantBound = constantCount; bindings.buffers[StartSlot + i].constantBound = constantCount;
BindConstantBuffer<ShaderStage>(slotId + i, newBuffer, 0, constantCount); if (!DirtyConstantBuffer(ShaderStage, StartSlot + i, !newBuffer))
BindConstantBuffer(ShaderStage, StartSlot + i, newBuffer, 0, constantCount);
} }
} }
@ -4843,8 +4952,6 @@ namespace dxvk {
const UINT* pNumConstants) { const UINT* pNumConstants) {
auto& bindings = m_state.cbv[ShaderStage]; auto& bindings = m_state.cbv[ShaderStage];
uint32_t slotId = computeConstantBufferBinding(ShaderStage, StartSlot);
for (uint32_t i = 0; i < NumBuffers; i++) { for (uint32_t i = 0; i < NumBuffers; i++) {
auto newBuffer = static_cast<D3D11Buffer*>(ppConstantBuffers[i]); auto newBuffer = static_cast<D3D11Buffer*>(ppConstantBuffers[i]);
@ -4883,14 +4990,16 @@ namespace dxvk {
bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantCount = constantCount;
bindings.buffers[StartSlot + i].constantBound = constantBound; bindings.buffers[StartSlot + i].constantBound = constantBound;
BindConstantBuffer<ShaderStage>(slotId + i, newBuffer, constantOffset, constantBound); if (!DirtyConstantBuffer(ShaderStage, StartSlot + i, !newBuffer))
BindConstantBuffer(ShaderStage, StartSlot + i, newBuffer, constantOffset, constantBound);
} else if (bindings.buffers[StartSlot + i].constantOffset != constantOffset } else if (bindings.buffers[StartSlot + i].constantOffset != constantOffset
|| bindings.buffers[StartSlot + i].constantCount != constantCount) { || bindings.buffers[StartSlot + i].constantCount != constantCount) {
bindings.buffers[StartSlot + i].constantOffset = constantOffset; bindings.buffers[StartSlot + i].constantOffset = constantOffset;
bindings.buffers[StartSlot + i].constantCount = constantCount; bindings.buffers[StartSlot + i].constantCount = constantCount;
bindings.buffers[StartSlot + i].constantBound = constantBound; bindings.buffers[StartSlot + i].constantBound = constantBound;
BindConstantBufferRange<ShaderStage>(slotId + i, constantOffset, constantBound); if (!DirtyConstantBuffer(ShaderStage, StartSlot + i, !newBuffer))
BindConstantBufferRange(ShaderStage, StartSlot + i, constantOffset, constantBound);
} }
} }

View file

@ -799,6 +799,15 @@ namespace dxvk {
DxvkBufferSlice AllocStagingBuffer( DxvkBufferSlice AllocStagingBuffer(
VkDeviceSize Size); VkDeviceSize Size);
void ApplyDirtyConstantBuffers(
DxbcProgramType Stage,
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask);
void ApplyDirtyGraphicsBindings();
void ApplyDirtyComputeBindings();
void ApplyInputLayout(); void ApplyInputLayout();
void ApplyPrimitiveTopology(); void ApplyPrimitiveTopology();
@ -854,15 +863,15 @@ namespace dxvk {
D3D11Buffer* pBuffer, D3D11Buffer* pBuffer,
UINT Offset); UINT Offset);
template<DxbcProgramType ShaderStage>
void BindConstantBuffer( void BindConstantBuffer(
DxbcProgramType ShaderStage,
UINT Slot, UINT Slot,
D3D11Buffer* pBuffer, D3D11Buffer* pBuffer,
UINT Offset, UINT Offset,
UINT Length); UINT Length);
template<DxbcProgramType ShaderStage>
void BindConstantBufferRange( void BindConstantBufferRange(
DxbcProgramType ShaderStage,
UINT Slot, UINT Slot,
UINT Offset, UINT Offset,
UINT Length); UINT Length);
@ -911,6 +920,19 @@ namespace dxvk {
DxvkBufferSlice BufferSlice, DxvkBufferSlice BufferSlice,
UINT Flags); UINT Flags);
template<typename T>
bool DirtyBindingGeneric(
DxbcProgramType ShaderStage,
T BoundMask,
T& DirtyMask,
T DirtyBit,
bool IsNull);
bool DirtyConstantBuffer(
DxbcProgramType ShaderStage,
uint32_t Slot,
bool IsNull);
void DiscardBuffer( void DiscardBuffer(
ID3D11Resource* pResource); ID3D11Resource* pResource);
@ -943,6 +965,10 @@ namespace dxvk {
D3D11MaxUsedBindings GetMaxUsedBindings(); D3D11MaxUsedBindings GetMaxUsedBindings();
bool HasDirtyComputeBindings();
bool HasDirtyGraphicsBindings();
void ResetCommandListState(); void ResetCommandListState();
void ResetContextState(); void ResetContextState();
@ -967,8 +993,6 @@ namespace dxvk {
void ResolveOmUavHazards( void ResolveOmUavHazards(
D3D11RenderTargetView* pView); D3D11RenderTargetView* pView);
void RestoreUsedBindings();
void RestoreCommandListState(); void RestoreCommandListState();
template<DxbcProgramType Stage> template<DxbcProgramType Stage>

View file

@ -48,6 +48,9 @@ namespace dxvk {
D3D10DeviceLock lock = m_ctx->LockContext(); D3D10DeviceLock lock = m_ctx->LockContext();
m_ctx->SetDrawBuffers(pBufferForArgs, nullptr); m_ctx->SetDrawBuffers(pBufferForArgs, nullptr);
if (unlikely(m_ctx->HasDirtyGraphicsBindings()))
m_ctx->ApplyDirtyGraphicsBindings();
m_ctx->EmitCs([ m_ctx->EmitCs([
cCount = DrawCount, cCount = DrawCount,
cOffset = ByteOffsetForArgs, cOffset = ByteOffsetForArgs,
@ -67,6 +70,9 @@ namespace dxvk {
D3D10DeviceLock lock = m_ctx->LockContext(); D3D10DeviceLock lock = m_ctx->LockContext();
m_ctx->SetDrawBuffers(pBufferForArgs, nullptr); m_ctx->SetDrawBuffers(pBufferForArgs, nullptr);
if (unlikely(m_ctx->HasDirtyGraphicsBindings()))
m_ctx->ApplyDirtyGraphicsBindings();
m_ctx->EmitCs([ m_ctx->EmitCs([
cCount = DrawCount, cCount = DrawCount,
cOffset = ByteOffsetForArgs, cOffset = ByteOffsetForArgs,
@ -88,6 +94,9 @@ namespace dxvk {
D3D10DeviceLock lock = m_ctx->LockContext(); D3D10DeviceLock lock = m_ctx->LockContext();
m_ctx->SetDrawBuffers(pBufferForArgs, pBufferForCount); m_ctx->SetDrawBuffers(pBufferForArgs, pBufferForCount);
if (unlikely(m_ctx->HasDirtyGraphicsBindings()))
m_ctx->ApplyDirtyGraphicsBindings();
m_ctx->EmitCs([ m_ctx->EmitCs([
cMaxCount = MaxDrawCount, cMaxCount = MaxDrawCount,
cArgOffset = ByteOffsetForArgs, cArgOffset = ByteOffsetForArgs,
@ -110,6 +119,9 @@ namespace dxvk {
D3D10DeviceLock lock = m_ctx->LockContext(); D3D10DeviceLock lock = m_ctx->LockContext();
m_ctx->SetDrawBuffers(pBufferForArgs, pBufferForCount); m_ctx->SetDrawBuffers(pBufferForArgs, pBufferForCount);
if (unlikely(m_ctx->HasDirtyGraphicsBindings()))
m_ctx->ApplyDirtyGraphicsBindings();
m_ctx->EmitCs([ m_ctx->EmitCs([
cMaxCount = MaxDrawCount, cMaxCount = MaxDrawCount,
cArgOffset = ByteOffsetForArgs, cArgOffset = ByteOffsetForArgs,