diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index bdbbf25a6..16529a217 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -1009,10 +1009,22 @@ namespace dxvk { if (!ctrBuf.defined()) return; - EmitCs([=] (DxvkContext* ctx) { - ctx->drawIndirectXfb(ctrBuf, + // We bind the SO counter as an indirect count buffer, + // so reset any tracking we may have been doing here. + m_state.id.reset(); + + EmitCs([=] (DxvkContext* ctx) mutable { + ctx->bindDrawBuffers(DxvkBufferSlice(), + Forwarder::move(ctrBuf)); + + ctx->drawIndirectXfb(0u, vtxBuf.buffer()->getXfbVertexStride(), vtxBuf.offset()); + + // Reset draw buffer right away so we don't + // keep the SO counter alive indefinitely + ctx->bindDrawBuffers(DxvkBufferSlice(), + DxvkBufferSlice()); }); } @@ -1109,7 +1121,7 @@ namespace dxvk { } else { cmdData = EmitCsCmd( [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { - ctx->drawIndexedIndirect(data->offset, data->count, data->stride); + ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true); }); cmdData->type = D3D11CmdType::DrawIndirectIndexed; @@ -1144,7 +1156,7 @@ namespace dxvk { } else { cmdData = EmitCsCmd( [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { - ctx->drawIndirect(data->offset, data->count, data->stride); + ctx->drawIndirect(data->offset, data->count, data->stride, true); }); cmdData->type = D3D11CmdType::DrawIndirect; @@ -4632,10 +4644,6 @@ namespace dxvk { ApplyRasterizerSampleCount(); ApplyViewportState(); - BindDrawBuffers( - m_state.id.argBuffer.ptr(), - m_state.id.cntBuffer.ptr()); - BindIndexBuffer( m_state.ia.indexBuffer.buffer.ptr(), m_state.ia.indexBuffer.offset, @@ -4674,6 +4682,11 @@ namespace dxvk { RestoreSamplers(); RestoreSamplers(); RestoreSamplers(); + + // Draw buffer bindings aren't persistent at the API level, and + // we can't meaningfully track them. Just reset this state here + // and reapply on the next indirect draw. + SetDrawBuffers(nullptr, nullptr); } @@ -5000,10 +5013,13 @@ namespace dxvk { auto argBuffer = static_cast(pBufferForArgs); auto cntBuffer = static_cast(pBufferForCount); - if (m_state.id.argBuffer != argBuffer - || m_state.id.cntBuffer != cntBuffer) { - m_state.id.argBuffer = argBuffer; - m_state.id.cntBuffer = cntBuffer; + auto argBufferCookie = argBuffer ? argBuffer->GetCookie() : 0u; + auto cntBufferCookie = cntBuffer ? cntBuffer->GetCookie() : 0u; + + if (m_state.id.argBufferCookie != argBufferCookie + || m_state.id.cntBufferCookie != cntBufferCookie) { + m_state.id.argBufferCookie = argBufferCookie; + m_state.id.cntBufferCookie = cntBufferCookie; BindDrawBuffers(argBuffer, cntBuffer); } diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp index e8c81e1dc..5254f480f 100644 --- a/src/d3d11/d3d11_context_ext.cpp +++ b/src/d3d11/d3d11_context_ext.cpp @@ -53,7 +53,7 @@ namespace dxvk { cOffset = ByteOffsetForArgs, cStride = ByteStrideForArgs ] (DxvkContext* ctx) { - ctx->drawIndirect(cOffset, cCount, cStride); + ctx->drawIndirect(cOffset, cCount, cStride, false); }); } @@ -72,7 +72,7 @@ namespace dxvk { cOffset = ByteOffsetForArgs, cStride = ByteStrideForArgs ] (DxvkContext* ctx) { - ctx->drawIndexedIndirect(cOffset, cCount, cStride); + ctx->drawIndexedIndirect(cOffset, cCount, cStride, false); }); } @@ -146,11 +146,10 @@ namespace dxvk { D3D11Device* parent = static_cast(m_ctx->GetParentInterface()); DxvkBarrierControlFlags flags = parent->GetOptionsBarrierControlFlags(); - if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE) - flags.set(DxvkBarrierControl::IgnoreWriteAfterWrite); - - if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV) - flags.set(DxvkBarrierControl::IgnoreGraphicsBarriers); + if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE) { + flags.set(DxvkBarrierControl::ComputeAllowReadWriteOverlap, + DxvkBarrierControl::GraphicsAllowReadWriteOverlap); + } m_ctx->EmitCs([cFlags = flags] (DxvkContext* ctx) { ctx->setBarrierControl(cFlags); diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index e762dd9eb..e22201afa 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -865,6 +865,11 @@ namespace dxvk { Rc LatencyTracker) { D3D10DeviceLock lock = LockContext(); + // Don't keep draw buffers alive indefinitely. This cannot be + // done in ExecuteFlush because command recording itself might + // flush, so no state changes are allowed to happen there. + SetDrawBuffers(nullptr, nullptr); + EmitCs([ cTracker = std::move(LatencyTracker) ] (DxvkContext* ctx) { diff --git a/src/d3d11/d3d11_context_state.h b/src/d3d11/d3d11_context_state.h index fd48d8ee4..9dea34937 100644 --- a/src/d3d11/d3d11_context_state.h +++ b/src/d3d11/d3d11_context_state.h @@ -232,12 +232,12 @@ namespace dxvk { * argument and draw count buffer. */ struct D3D11ContextStateID { - Com argBuffer = nullptr; - Com cntBuffer = nullptr; + uint64_t argBufferCookie = 0u; + uint64_t cntBufferCookie = 0u; void reset() { - argBuffer = nullptr; - cntBuffer = nullptr; + argBufferCookie = 0u; + cntBufferCookie = 0u; } }; @@ -347,4 +347,4 @@ namespace dxvk { uint32_t soCount; }; -} \ No newline at end of file +} diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h index 48356f8b4..041f758a3 100644 --- a/src/d3d11/d3d11_device.h +++ b/src/d3d11/d3d11_device.h @@ -472,13 +472,13 @@ namespace dxvk { const Rc& Adapter); DxvkBarrierControlFlags GetOptionsBarrierControlFlags() { - DxvkBarrierControlFlags barrierControl; + DxvkBarrierControlFlags barrierControl = 0u; if (m_d3d11Options.relaxedBarriers) - barrierControl.set(DxvkBarrierControl::IgnoreWriteAfterWrite); + barrierControl.set(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap); - if (m_d3d11Options.ignoreGraphicsBarriers) - barrierControl.set(DxvkBarrierControl::IgnoreGraphicsBarriers); + if (m_d3d11Options.relaxedBarriers || m_d3d11Options.relaxedGraphicsBarriers) + barrierControl.set(DxvkBarrierControl::GraphicsAllowReadWriteOverlap); return barrierControl; } diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h index c2288bd04..7c6b3a479 100644 --- a/src/d3d11/d3d11_interfaces.h +++ b/src/d3d11/d3d11_interfaces.h @@ -24,7 +24,9 @@ enum D3D11_VK_EXTENSION : uint32_t { */ enum D3D11_VK_BARRIER_CONTROL : uint32_t { D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE = 1 << 0, - D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1, + + // Removed: + // D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1, }; diff --git a/src/d3d11/d3d11_options.cpp b/src/d3d11/d3d11_options.cpp index da8e2ca55..6b0705c14 100644 --- a/src/d3d11/d3d11_options.cpp +++ b/src/d3d11/d3d11_options.cpp @@ -17,7 +17,7 @@ namespace dxvk { this->zeroInitWorkgroupMemory = config.getOption("d3d11.zeroInitWorkgroupMemory", false); this->forceVolatileTgsmAccess = config.getOption("d3d11.forceVolatileTgsmAccess", false); this->relaxedBarriers = config.getOption("d3d11.relaxedBarriers", false); - this->ignoreGraphicsBarriers = config.getOption("d3d11.ignoreGraphicsBarriers", false); + this->relaxedGraphicsBarriers = config.getOption("d3d11.relaxedGraphicsBarriers", false); this->maxTessFactor = config.getOption("d3d11.maxTessFactor", 0); this->samplerAnisotropy = config.getOption("d3d11.samplerAnisotropy", -1); this->samplerLodBias = config.getOption("d3d11.samplerLodBias", 0.0f); @@ -61,4 +61,4 @@ namespace dxvk { this->shaderDumpPath = env::getEnvVar("DXVK_SHADER_DUMP_PATH"); } -} \ No newline at end of file +} diff --git a/src/d3d11/d3d11_options.h b/src/d3d11/d3d11_options.h index f41979d56..b1fe1e7af 100644 --- a/src/d3d11/d3d11_options.h +++ b/src/d3d11/d3d11_options.h @@ -43,7 +43,7 @@ namespace dxvk { /// /// May improve performance in some games, /// but might also cause rendering issues. - bool ignoreGraphicsBarriers = false; + bool relaxedGraphicsBarriers = false; /// Maximum tessellation factor. /// @@ -114,4 +114,4 @@ namespace dxvk { std::string shaderDumpPath; }; -} \ No newline at end of file +} diff --git a/src/d3d8/d3d8_device_child.h b/src/d3d8/d3d8_device_child.h index c45d965ad..598ce0b19 100644 --- a/src/d3d8/d3d8_device_child.h +++ b/src/d3d8/d3d8_device_child.h @@ -31,16 +31,28 @@ namespace dxvk { } ULONG STDMETHODCALLTYPE Release() { - // ignore Release calls on objects with 0 refCount - if(unlikely(!this->m_refCount)) - return this->m_refCount; + uint32_t oldRefCount, refCount; + + do { + oldRefCount = this->m_refCount.load(std::memory_order_acquire); + + // clamp value to 0 to prevent underruns + if (unlikely(!oldRefCount)) + return 0; + + refCount = oldRefCount - 1; + + } while (!this->m_refCount.compare_exchange_weak(oldRefCount, + refCount, + std::memory_order_release, + std::memory_order_acquire)); - uint32_t refCount = --this->m_refCount; if (unlikely(!refCount)) { auto* pDevice = GetDevice(); this->ReleasePrivate(); pDevice->Release(); } + return refCount; } diff --git a/src/d3d8/d3d8_options.cpp b/src/d3d8/d3d8_options.cpp index 549fcd3cf..ea8dbc65d 100644 --- a/src/d3d8/d3d8_options.cpp +++ b/src/d3d8/d3d8_options.cpp @@ -9,7 +9,7 @@ namespace dxvk { static inline uint32_t parseDword(std::string_view str) { - uint32_t value = UINT32_MAX; + uint32_t value = std::numeric_limits::max(); std::from_chars(str.data(), str.data() + str.size(), value); return value; } diff --git a/src/d3d9/d3d9_adapter.cpp b/src/d3d9/d3d9_adapter.cpp index 713a83650..212cc737a 100644 --- a/src/d3d9/d3d9_adapter.cpp +++ b/src/d3d9/d3d9_adapter.cpp @@ -588,7 +588,7 @@ namespace dxvk { // Max Vertex Shader Const pCaps->MaxVertexShaderConst = MaxFloatConstantsVS; // Max PS1 Value - pCaps->PixelShader1xMaxValue = options.shaderModel > 0 ? FLT_MAX : 0.0f; + pCaps->PixelShader1xMaxValue = options.shaderModel > 0 ? std::numeric_limits::max() : 0.0f; // Dev Caps 2 pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET /* | D3DDEVCAPS2_DMAPNPATCH */ diff --git a/src/d3d9/d3d9_common_texture.h b/src/d3d9/d3d9_common_texture.h index bba49d65a..378a6ee2d 100644 --- a/src/d3d9/d3d9_common_texture.h +++ b/src/d3d9/d3d9_common_texture.h @@ -75,7 +75,7 @@ namespace dxvk { public: - static constexpr UINT AllLayers = UINT32_MAX; + static constexpr UINT AllLayers = std::numeric_limits::max(); D3D9CommonTexture( D3D9DeviceEx* pDevice, diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index d2128512b..f704723db 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -1760,7 +1760,7 @@ namespace dxvk { m_state.depthStencil = ds; - UpdateActiveHazardsDS(UINT32_MAX); + UpdateActiveHazardsDS(std::numeric_limits::max()); return D3D_OK; } @@ -2157,7 +2157,7 @@ namespace dxvk { if (m_state.IsLightEnabled(Index) == !!Enable) return D3D_OK; - uint32_t searchIndex = UINT32_MAX; + uint32_t searchIndex = std::numeric_limits::max(); uint32_t setIndex = Index; if (!Enable) @@ -2384,7 +2384,7 @@ namespace dxvk { case D3DRS_ZWRITEENABLE: if (likely(!old != !Value)) - UpdateActiveHazardsDS(UINT32_MAX); + UpdateActiveHazardsDS(std::numeric_limits::max()); [[fallthrough]]; case D3DRS_STENCILENABLE: case D3DRS_ZENABLE: @@ -3387,7 +3387,7 @@ namespace dxvk { BindShader(GetCommonShader(shader)); m_vsShaderMasks = newShader->GetShaderMask(); - UpdateTextureTypeMismatchesForShader(newShader, m_vsShaderMasks.samplerMask, caps::MaxTexturesPS + 1); + UpdateTextureTypeMismatchesForShader(newShader, m_vsShaderMasks.samplerMask, FirstVSSamplerSlot); } else { m_vsShaderMasks = D3D9ShaderMasks(); @@ -3795,8 +3795,8 @@ namespace dxvk { if (m_psShaderMasks.samplerMask != newShaderMasks.samplerMask || m_psShaderMasks.rtMask != newShaderMasks.rtMask) { m_psShaderMasks = newShaderMasks; - UpdateActiveHazardsRT(UINT32_MAX); - UpdateActiveHazardsDS(UINT32_MAX); + UpdateActiveHazardsRT(std::numeric_limits::max()); + UpdateActiveHazardsDS(std::numeric_limits::max()); } return D3D_OK; @@ -6415,12 +6415,15 @@ namespace dxvk { void D3D9DeviceEx::UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler) { uint32_t shaderTextureIndex; const D3D9CommonShader* shader; - if (unlikely(stateSampler > caps::MaxTexturesPS + 1)) { + if (likely(IsPSSampler(stateSampler))) { + shader = GetCommonShader(m_state.pixelShader); + shaderTextureIndex = stateSampler; + } else if (unlikely(IsVSSampler(stateSampler))) { shader = GetCommonShader(m_state.vertexShader); shaderTextureIndex = stateSampler - caps::MaxTexturesPS - 1; } else { - shader = GetCommonShader(m_state.pixelShader); - shaderTextureIndex = stateSampler; + // Do not type check the fixed function displacement map texture. + return; } if (unlikely(shader == nullptr || shader->GetInfo().majorVersion() < 2 || m_d3d9Options.forceSamplerTypeSpecConstants)) { @@ -7815,7 +7818,7 @@ namespace dxvk { if (key.Data.Contents.UseLighting) { for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { - if (m_state.enabledLightIndices[i] != UINT32_MAX) + if (m_state.enabledLightIndices[i] != std::numeric_limits::max()) lightCount++; } } @@ -7912,7 +7915,7 @@ namespace dxvk { uint32_t lightIdx = 0; for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) { auto idx = m_state.enabledLightIndices[i]; - if (idx == UINT32_MAX) + if (idx == std::numeric_limits::max()) continue; data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]); diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index cec35f3d4..3a2a4184d 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -653,15 +653,41 @@ namespace dxvk { const D3DDISPLAYMODEEX* pFullscreenDisplayMode, IDirect3DSwapChain9** ppSwapChain); + /** + * @brief Sets the given sampler state + * + * @param StateSampler Sampler index (according to our internal way of storing samplers) + * @param Type Sampler state type to change + * @param Value State value + */ HRESULT SetStateSamplerState( DWORD StateSampler, D3DSAMPLERSTATETYPE Type, DWORD Value); + /** + * @brief Sets the given sampler texture + * + * @param StateSampler Sampler index (according to our internal way of storing samplers) + * @param pTexture Texture to use + */ HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture); + /** + * @brief Sets the transform for the given sampler + * + * @param idx Sampler index (according to our internal way of storing samplers) + * @param pMatrix Transform matrix + */ HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix); + /** + * @brief Sets the fixed function texture processing state + * + * @param Stage Sampler index (according to our internal way of storing samplers) + * @param Type Fixed function texture stage type + * @param Value Value for the state + */ HRESULT SetStateTextureStageState( DWORD Stage, D3D9TextureStageStateTypes Type, @@ -818,8 +844,40 @@ namespace dxvk { void UpdateActiveFetch4(uint32_t stateSampler); + /** + * @brief Sets the mismatching texture type bits for all samplers if necessary. + * + * This function will check all samplers the shader uses and set the set the mismatching texture type bit for the given sampler if it does not + * match the texture type expected by the respective shader. + * + * It will *not* unset the bit if the texture type does match. + * + * @param stateSampler Sampler index (according to our internal way of storing samplers) + */ + + /** + * @brief Sets the mismatching texture type bits for all samplers if necessary. + * + * This function will check all samplers the shader uses and set the set the mismatching texture type bit for the given sampler if it does not + * match the texture type expected by the shader. + * + * @param shader The shader + * @param shaderSamplerMask Mask of all samplers that the shader uses (according to our internal way of storing samplers) + * @param shaderSamplerOffset First index of the shader's samplers according to our internal way of storing samplers. + * Used to transform the sampler indices that are relative to the entire pipeline to ones relative to the shader. + */ void UpdateTextureTypeMismatchesForShader(const D3D9CommonShader* shader, uint32_t shaderSamplerMask, uint32_t shaderSamplerOffset); + /** + * @brief Sets the mismatching texture type bit for the given sampler. + * + * This function will set the mismatching texture type bit for the given sampler if it does not + * match the texture type expected by the respective shader. + * + * It will *not* unset the bit if the texture type does match. + * + * @param stateSampler Sampler index (according to our internal way of storing samplers) + */ void UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler); void UploadManagedTexture(D3D9CommonTexture* pResource); diff --git a/src/d3d9/d3d9_device_child.h b/src/d3d9/d3d9_device_child.h index 433a269ac..0866c7d81 100644 --- a/src/d3d9/d3d9_device_child.h +++ b/src/d3d9/d3d9_device_child.h @@ -25,12 +25,28 @@ namespace dxvk { } ULONG STDMETHODCALLTYPE Release() { - uint32_t refCount = --this->m_refCount; + uint32_t oldRefCount, refCount; + + do { + oldRefCount = this->m_refCount.load(std::memory_order_acquire); + + // clamp value to 0 to prevent underruns + if (unlikely(!oldRefCount)) + return 0; + + refCount = oldRefCount - 1; + + } while (!this->m_refCount.compare_exchange_weak(oldRefCount, + refCount, + std::memory_order_release, + std::memory_order_acquire)); + if (unlikely(!refCount)) { auto* pDevice = GetDevice(); this->ReleasePrivate(); pDevice->Release(); } + return refCount; } diff --git a/src/d3d9/d3d9_fixed_function.cpp b/src/d3d9/d3d9_fixed_function.cpp index a47f89cad..0d92b7431 100644 --- a/src/d3d9/d3d9_fixed_function.cpp +++ b/src/d3d9/d3d9_fixed_function.cpp @@ -1297,7 +1297,7 @@ namespace dxvk { uint32_t atten = m_module.opFFma (m_floatType, d, atten2, atten1); atten = m_module.opFFma (m_floatType, d, atten, atten0); atten = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), atten); - atten = m_module.opNMin (m_floatType, atten, m_module.constf32(FLT_MAX)); + atten = m_module.opNMin (m_floatType, atten, m_module.constf32(std::numeric_limits::max())); atten = m_module.opSelect(m_floatType, m_module.opFOrdGreaterThan(bool_t, d, range), m_module.constf32(0.0f), atten); atten = m_module.opSelect(m_floatType, isDirectional, m_module.constf32(1.0f), atten); diff --git a/src/d3d9/d3d9_shader.cpp b/src/d3d9/d3d9_shader.cpp index 6ab5df3d7..f8807e0cb 100644 --- a/src/d3d9/d3d9_shader.cpp +++ b/src/d3d9/d3d9_shader.cpp @@ -63,7 +63,7 @@ namespace dxvk { // do an or per-draw in the device. // We shift by 17 because 16 ps samplers + 1 dmap (tess) if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT) - m_usedSamplers <<= caps::MaxTexturesPS + 1; + m_usedSamplers <<= FirstVSSamplerSlot; m_usedRTs = pModule->usedRTs(); diff --git a/src/d3d9/d3d9_state.cpp b/src/d3d9/d3d9_state.cpp index af749cf76..6479d2589 100644 --- a/src/d3d9/d3d9_state.cpp +++ b/src/d3d9/d3d9_state.cpp @@ -10,7 +10,7 @@ namespace dxvk { streamFreq[i] = 1; for (uint32_t i = 0; i < enabledLightIndices.size(); i++) - enabledLightIndices[i] = UINT32_MAX; + enabledLightIndices[i] = std::numeric_limits::max(); } diff --git a/src/d3d9/d3d9_stateblock.cpp b/src/d3d9/d3d9_stateblock.cpp index 9998fa94d..ef8f76bb6 100644 --- a/src/d3d9/d3d9_stateblock.cpp +++ b/src/d3d9/d3d9_stateblock.cpp @@ -204,7 +204,7 @@ namespace dxvk { if (m_state.IsLightEnabled(Index) == !!Enable) return D3D_OK; - uint32_t searchIndex = UINT32_MAX; + uint32_t searchIndex = std::numeric_limits::max(); uint32_t setIndex = Index; if (!Enable) @@ -436,7 +436,7 @@ namespace dxvk { void D3D9StateBlock::CapturePixelSamplerStates() { m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); - for (uint32_t i = 0; i < caps::MaxTexturesPS + 1; i++) { + for (uint32_t i = 0; i < FirstVSSamplerSlot; i++) { m_captures.samplers.set(i, true); m_captures.samplerStates[i].set(D3DSAMP_ADDRESSU, true); @@ -519,7 +519,7 @@ namespace dxvk { void D3D9StateBlock::CaptureVertexSamplerStates() { m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates); - for (uint32_t i = caps::MaxTexturesPS + 1; i < SamplerCount; i++) { + for (uint32_t i = FirstVSSamplerSlot; i < SamplerCount; i++) { m_captures.samplers.set(i, true); m_captures.samplerStates[i].set(D3DSAMP_DMAPOFFSET, true); } diff --git a/src/d3d9/d3d9_swapchain.cpp b/src/d3d9/d3d9_swapchain.cpp index 539b0815c..4ab616ed7 100644 --- a/src/d3d9/d3d9_swapchain.cpp +++ b/src/d3d9/d3d9_swapchain.cpp @@ -652,17 +652,17 @@ namespace dxvk { static bool validateGammaRamp(const WORD (&ramp)[256]) { if (ramp[0] >= ramp[std::size(ramp) - 1]) { - Logger::err("validateGammaRamp: ramp inverted or flat"); + Logger::warn("validateGammaRamp: ramp inverted or flat"); return false; } for (size_t i = 1; i < std::size(ramp); i++) { if (ramp[i] < ramp[i - 1]) { - Logger::err("validateGammaRamp: ramp not monotonically increasing"); + Logger::warn("validateGammaRamp: ramp not monotonically increasing"); return false; } if (ramp[i] - ramp[i - 1] >= UINT16_MAX / 2) { - Logger::err("validateGammaRamp: huuuge jump"); + Logger::warn("validateGammaRamp: huuuge jump"); return false; } } diff --git a/src/d3d9/d3d9_util.h b/src/d3d9/d3d9_util.h index 76cd31a7c..4e9202282 100644 --- a/src/d3d9/d3d9_util.h +++ b/src/d3d9/d3d9_util.h @@ -44,6 +44,11 @@ namespace dxvk { } } + /** + * @brief Returns whether or not the sampler index is valid + * + * @param Sampler Sampler index (according to the API) + */ inline bool InvalidSampler(DWORD Sampler) { if (Sampler >= caps::MaxTexturesPS && Sampler < D3DDMAPSAMPLER) return true; @@ -54,6 +59,19 @@ namespace dxvk { return false; } + /** + * @brief The first sampler that belongs to the vertex shader according to our internal way of storing samplers + */ + constexpr uint32_t FirstVSSamplerSlot = caps::MaxTexturesPS + 1; + + /** + * @brief Remaps a sampler index by the API to an internal one + * + * Remaps the sampler index according to the way the API counts them to how we count and store them internally. + * + * @param Sampler Sampler index (according to API) + * @return DWORD Sampler index (according to our internal way of storing samplers) + */ inline DWORD RemapSamplerState(DWORD Sampler) { if (Sampler >= D3DDMAPSAMPLER) Sampler = caps::MaxTexturesPS + (Sampler - D3DDMAPSAMPLER); @@ -61,13 +79,62 @@ namespace dxvk { return Sampler; } + /** + * @brief Remaps the sampler from an index applying to the entire pipeline to one relative to the shader stage and returns the shader type + * + * The displacement map sampler will be treated as a 17th pixel shader sampler. + * + * @param Sampler Sampler index (according to our internal way of storing samplers) + * @return std::pair Shader stage that it belongs to and the relative sampler index + */ inline std::pair RemapStateSamplerShader(DWORD Sampler) { - if (Sampler >= caps::MaxTexturesPS + 1) - return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - caps::MaxTexturesPS - 1); + if (Sampler >= FirstVSSamplerSlot) + return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - FirstVSSamplerSlot); return std::make_pair(DxsoProgramTypes::PixelShader, Sampler); } + /** + * @brief Returns whether the sampler belongs to the vertex shader. + * + * The displacement map sampler is part of a fixed function feature, + * so it does not belong to the vertex shader. + * Use IsDMAPSampler to check for that. + * + * @param Sampler Sampler index (according to our internal way of storing samplers) + */ + inline bool IsVSSampler(uint32_t Sampler) { + return Sampler >= FirstVSSamplerSlot; + } + + /** + * @brief Returns whether the sampler belongs to the pixel shader. + * + * The displacement map sampler is part of a fixed function feature, + * so (unlike in RemapStateSamplerShader) it does not belong to the pixel shader. + * Use IsDMAPSampler to check for that. + * + * @param Sampler Sampler index (according to our internal way of storing samplers) + */ + inline bool IsPSSampler(uint32_t Sampler) { + return Sampler <= caps::MaxTexturesPS; + } + + /** + * @brief Returns whether the sampler is the displacement map sampler + * + * @param Sampler Sampler index (according to our internal way of storing samplers) + */ + inline bool IsDMAPSampler(uint32_t Sampler) { + return Sampler > caps::MaxTexturesPS; + } + + /** + * @brief Remaps the sampler from an index (counted according to the API) to one relative to the shader stage and returns the shader type + * + * @param Sampler Sampler index (according to the API) + * @return std::pair Shader stage that it belongs to and the relative sampler index + */ inline std::pair RemapSamplerShader(DWORD Sampler) { Sampler = RemapSamplerState(Sampler); @@ -243,6 +310,9 @@ namespace dxvk { uint32_t(offsets[1].y) > extent.height; } + /** + * @brief Mirrors D3DTEXTURESTAGESTATETYPE but starts at 0 + */ enum D3D9TextureStageStateTypes : uint32_t { DXVK_TSS_COLOROP = 0, @@ -272,6 +342,12 @@ namespace dxvk { constexpr uint32_t DXVK_TSS_TCI_CAMERASPACEREFLECTIONVECTOR = 0x00030000; constexpr uint32_t DXVK_TSS_TCI_SPHEREMAP = 0x00040000; + /** + * @brief Remaps a texture stage type by the API to an internal one + * + * @param Type Texture stage type according to the API + * @return D3D9TextureStageStateTypes Texture stage type according to our internal way of storing them + */ inline D3D9TextureStageStateTypes RemapTextureStageStateType(D3DTEXTURESTAGESTATETYPE Type) { return D3D9TextureStageStateTypes(Type - 1); } diff --git a/src/dxbc/dxbc_analysis.cpp b/src/dxbc/dxbc_analysis.cpp index dd4a51324..a5bf4fc19 100644 --- a/src/dxbc/dxbc_analysis.cpp +++ b/src/dxbc/dxbc_analysis.cpp @@ -30,26 +30,48 @@ namespace dxvk { switch (ins.opClass) { case DxbcInstClass::Atomic: { const uint32_t operandId = ins.dstCount - 1; - + if (ins.dst[operandId].type == DxbcOperandType::UnorderedAccessView) { const uint32_t registerId = ins.dst[operandId].idx[0].offset; m_analysis->uavInfos[registerId].accessAtomicOp = true; m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + + // Check whether the atomic operation is order-invariant + DxvkAccessOp store = DxvkAccessOp::None; + + switch (ins.op) { + case DxbcOpcode::AtomicAnd: store = DxvkAccessOp::And; break; + case DxbcOpcode::AtomicOr: store = DxvkAccessOp::Or; break; + case DxbcOpcode::AtomicXor: store = DxvkAccessOp::Xor; break; + case DxbcOpcode::AtomicIAdd: store = DxvkAccessOp::Add; break; + case DxbcOpcode::AtomicIMax: store = DxvkAccessOp::IMax; break; + case DxbcOpcode::AtomicIMin: store = DxvkAccessOp::IMin; break; + case DxbcOpcode::AtomicUMax: store = DxvkAccessOp::UMax; break; + case DxbcOpcode::AtomicUMin: store = DxvkAccessOp::UMin; break; + default: break; + } + + if (m_analysis->uavInfos[registerId].atomicStore == DxvkAccessOp::None) + m_analysis->uavInfos[registerId].atomicStore = store; + + // Maintain ordering if the UAV is accessed via other operations as well + if (store == DxvkAccessOp::None || m_analysis->uavInfos[registerId].atomicStore != store) + m_analysis->uavInfos[registerId].nonInvariantAccess = true; } } break; - + case DxbcInstClass::TextureSample: case DxbcInstClass::TextureGather: case DxbcInstClass::TextureQueryLod: case DxbcInstClass::VectorDeriv: { m_analysis->usesDerivatives = true; } break; - + case DxbcInstClass::ControlFlow: { if (ins.op == DxbcOpcode::Discard) m_analysis->usesKill = true; } break; - + case DxbcInstClass::BufferLoad: { uint32_t operandId = ins.op == DxbcOpcode::LdStructured ? 2 : 1; bool sparseFeedback = ins.dstCount == 2; @@ -58,16 +80,18 @@ namespace dxvk { const uint32_t registerId = ins.src[operandId].idx[0].offset; m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT; m_analysis->uavInfos[registerId].sparseFeedback |= sparseFeedback; + m_analysis->uavInfos[registerId].nonInvariantAccess = true; } else if (ins.src[operandId].type == DxbcOperandType::Resource) { const uint32_t registerId = ins.src[operandId].idx[0].offset; m_analysis->srvInfos[registerId].sparseFeedback |= sparseFeedback; } } break; - + case DxbcInstClass::BufferStore: { if (ins.dst[0].type == DxbcOperandType::UnorderedAccessView) { const uint32_t registerId = ins.dst[0].idx[0].offset; m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT; + m_analysis->uavInfos[registerId].nonInvariantAccess = true; } } break; @@ -75,13 +99,15 @@ namespace dxvk { const uint32_t registerId = ins.src[1].idx[0].offset; m_analysis->uavInfos[registerId].accessTypedLoad = true; m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT; + m_analysis->uavInfos[registerId].nonInvariantAccess = true; } break; case DxbcInstClass::TypedUavStore: { const uint32_t registerId = ins.dst[0].idx[0].offset; m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT; + m_analysis->uavInfos[registerId].nonInvariantAccess = true; } break; - + default: break; } diff --git a/src/dxbc/dxbc_analysis.h b/src/dxbc/dxbc_analysis.h index fcbc1ddad..fa589f4fd 100644 --- a/src/dxbc/dxbc_analysis.h +++ b/src/dxbc/dxbc_analysis.h @@ -17,9 +17,11 @@ namespace dxvk { * will be used to generate image types. */ struct DxbcUavInfo { - bool accessTypedLoad = false; - bool accessAtomicOp = false; - bool sparseFeedback = false; + bool accessTypedLoad = false; + bool accessAtomicOp = false; + bool sparseFeedback = false; + bool nonInvariantAccess = false; + DxvkAccessOp atomicStore = DxvkAccessOp::None; VkAccessFlags accessFlags = 0; }; diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 6ab3e9163..f0f362334 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -1098,6 +1098,9 @@ namespace dxvk { : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; binding.access = m_analysis->uavInfos[registerId].accessFlags; + if (!m_analysis->uavInfos[registerId].nonInvariantAccess) + binding.accessOp = m_analysis->uavInfos[registerId].atomicStore; + if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT)) m_module.decorate(varId, spv::DecorationNonWritable); if (!(binding.access & VK_ACCESS_SHADER_READ_BIT)) @@ -1234,9 +1237,14 @@ namespace dxvk { : (isUav ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; binding.resourceBinding = bindingId; - binding.access = isUav - ? m_analysis->uavInfos[registerId].accessFlags - : VkAccessFlags(VK_ACCESS_SHADER_READ_BIT); + binding.access = VK_ACCESS_SHADER_READ_BIT; + + if (isUav) { + binding.access = m_analysis->uavInfos[registerId].accessFlags; + + if (!m_analysis->uavInfos[registerId].nonInvariantAccess) + binding.accessOp = m_analysis->uavInfos[registerId].atomicStore; + } if (useRawSsbo || isUav) { if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT)) diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index d7cdc672f..1d7e1bf1d 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -1957,7 +1957,7 @@ namespace dxvk { if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) { result.id = m_module.opNMin(typeId, result.id, - m_module.constfReplicant(FLT_MAX, result.type.ccount)); + m_module.constfReplicant(std::numeric_limits::max(), result.type.ccount)); } break; case DxsoOpcode::Rsq: @@ -1969,7 +1969,7 @@ namespace dxvk { if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) { result.id = m_module.opNMin(typeId, result.id, - m_module.constfReplicant(FLT_MAX, result.type.ccount)); + m_module.constfReplicant(std::numeric_limits::max(), result.type.ccount)); } break; case DxsoOpcode::Dp3: { @@ -2029,7 +2029,7 @@ namespace dxvk { if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) { result.id = m_module.opNMin(typeId, result.id, - m_module.constfReplicant(FLT_MAX, result.type.ccount)); + m_module.constfReplicant(std::numeric_limits::max(), result.type.ccount)); } break; } @@ -2040,7 +2040,7 @@ namespace dxvk { if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) { result.id = m_module.opNMin(typeId, result.id, - m_module.constfReplicant(FLT_MAX, result.type.ccount)); + m_module.constfReplicant(std::numeric_limits::max(), result.type.ccount)); } break; case DxsoOpcode::Pow: { @@ -2102,7 +2102,7 @@ namespace dxvk { rcpLength.type = scalarType; rcpLength.id = m_module.opInverseSqrt(scalarTypeId, dot.id); if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) { - rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(FLT_MAX)); + rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(std::numeric_limits::max())); } // r * rsq(r . r) @@ -2216,7 +2216,7 @@ namespace dxvk { result.id = m_module.opLog2(typeId, result.id); if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) { result.id = m_module.opNMax(typeId, result.id, - m_module.constfReplicant(-FLT_MAX, result.type.ccount)); + m_module.constfReplicant(-std::numeric_limits::max(), result.type.ccount)); } break; case DxsoOpcode::Lrp: @@ -2965,7 +2965,7 @@ void DxsoCompiler::emitControlFlowGenericLoop( uint32_t lOffset = m_module.opAccessChain(m_module.defPointerType(float_t, spv::StorageClassUniform), m_ps.sharedState, 1, &index); lOffset = m_module.opLoad(float_t, lOffset); - + uint32_t zIndex = 2; uint32_t scale = m_module.opCompositeExtract(float_t, result.id, 1, &zIndex); scale = m_module.opFMul(float_t, scale, lScale); @@ -2980,7 +2980,7 @@ void DxsoCompiler::emitControlFlowGenericLoop( auto SampleType = [&](DxsoSamplerType samplerType) { uint32_t bitOffset = m_programInfo.type() == DxsoProgramTypes::VertexShader - ? samplerIdx + caps::MaxTexturesPS + 1 + ? samplerIdx + FirstVSSamplerSlot : samplerIdx; uint32_t isNull = m_spec.get(m_module, m_specUbo, SpecSamplerNull, bitOffset, 1); diff --git a/src/dxso/dxso_tables.cpp b/src/dxso/dxso_tables.cpp index 5b8ab91f0..df79a970a 100644 --- a/src/dxso/dxso_tables.cpp +++ b/src/dxso/dxso_tables.cpp @@ -86,7 +86,7 @@ namespace dxvk { case DxsoOpcode::SetP: return 3; case DxsoOpcode::TexLdl: return 3; case DxsoOpcode::BreakP: return 2; - default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return UINT32_MAX; + default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return std::numeric_limits::max(); } } diff --git a/src/dxso/dxso_tables.h b/src/dxso/dxso_tables.h index 73e3801d3..19b94f63b 100644 --- a/src/dxso/dxso_tables.h +++ b/src/dxso/dxso_tables.h @@ -4,7 +4,7 @@ namespace dxvk { - constexpr uint32_t InvalidOpcodeLength = UINT32_MAX; + constexpr uint32_t InvalidOpcodeLength = std::numeric_limits::max(); uint32_t DxsoGetDefaultOpcodeLength(DxsoOpcode opcode); diff --git a/src/dxvk/dxvk_barrier.cpp b/src/dxvk/dxvk_barrier.cpp index 287a0ff2f..9d9feb191 100644 --- a/src/dxvk/dxvk_barrier.cpp +++ b/src/dxvk/dxvk_barrier.cpp @@ -20,20 +20,37 @@ namespace dxvk { bool DxvkBarrierTracker::findRange( const DxvkAddressRange& range, - DxvkAccess accessType) const { + DxvkAccess accessType, + DxvkAccessOp accessOp) const { uint32_t rootIndex = computeRootIndex(range, accessType); - return findNode(range, rootIndex); + uint32_t nodeIndex = findNode(range, rootIndex); + + if (likely(!nodeIndex || accessOp == DxvkAccessOp::None)) + return nodeIndex; + + // If we are checking for a specific order-invariant store + // op, the op must have been the only op used to access the + // resource, and the tracked range must cover the requested + // range in its entirety so we can rule out that other parts + // of the resource have been accessed in a different way. + auto& node = m_nodes[nodeIndex]; + + return node.payload.accessOps != DxvkAccessOps(accessOp) + || !node.addressRange.contains(range); } void DxvkBarrierTracker::insertRange( const DxvkAddressRange& range, - DxvkAccess accessType) { - uint32_t rootIndex = computeRootIndex(range, accessType); + DxvkAccess accessType, + DxvkAccessOp accessOp) { + DxvkBarrierPayload payload = { }; + payload.accessOps.set(accessOp); // If we can just insert the node with no conflicts, // we don't have to do anything. - uint32_t nodeIndex = insertNode(range, rootIndex); + uint32_t rootIndex = computeRootIndex(range, accessType); + uint32_t nodeIndex = insertNode(range, rootIndex, payload); if (likely(!nodeIndex)) return; @@ -41,6 +58,7 @@ namespace dxvk { // If there's an existing node and it contains the entire // range we want to add already, also don't do anything. auto& node = m_nodes[nodeIndex]; + node.payload.accessOps.set(payload.accessOps); if (node.addressRange.contains(range)) return; @@ -82,12 +100,14 @@ namespace dxvk { mergedRange.rangeStart = std::min(mergedRange.rangeStart, node.addressRange.rangeStart); mergedRange.rangeEnd = std::max(mergedRange.rangeEnd, node.addressRange.rangeEnd); + payload.accessOps.set(node.payload.accessOps); + removeNode(nodeIndex, rootIndex); nodeIndex = findNode(range, rootIndex); } - insertNode(mergedRange, rootIndex); + insertNode(mergedRange, rootIndex, payload); } @@ -166,7 +186,8 @@ namespace dxvk { uint32_t DxvkBarrierTracker::insertNode( const DxvkAddressRange& range, - uint32_t rootIndex) { + uint32_t rootIndex, + DxvkBarrierPayload payload) { // Check if the given root is valid at all uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u); @@ -178,6 +199,7 @@ namespace dxvk { auto& node = m_nodes[rootIndex]; node.header = 0; node.addressRange = range; + node.payload = payload; return 0; } else { // Traverse tree and abort if we find any range @@ -209,6 +231,7 @@ namespace dxvk { node.setRed(true); node.setParent(parentIndex); node.addressRange = range; + node.payload = payload; // Only do the fixup to maintain red-black properties if // we haven't marked the root node as red in a deletion. @@ -238,6 +261,7 @@ namespace dxvk { childIndex = m_nodes[childIndex].child(0); node.addressRange = m_nodes[childIndex].addressRange; + node.payload = m_nodes[childIndex].payload; removeNode(childIndex, rootIndex); } else { // Deletion is expected to be exceptionally rare, to the point of @@ -268,6 +292,7 @@ namespace dxvk { node.setRed(child.isRed()); node.addressRange = child.addressRange; + node.payload = child.payload; if (cl) m_nodes[cl].setParent(nodeIndex); if (cr) m_nodes[cr].setParent(nodeIndex); @@ -378,6 +403,7 @@ namespace dxvk { node.setChild(1, rr); std::swap(node.addressRange, m_nodes[r].addressRange); + std::swap(node.payload, m_nodes[r].payload); } @@ -406,6 +432,7 @@ namespace dxvk { node.setChild(1, l); std::swap(node.addressRange, m_nodes[l].addressRange); + std::swap(node.payload, m_nodes[l].payload); } @@ -498,4 +525,4 @@ namespace dxvk { flush(list); } -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_barrier.h b/src/dxvk/dxvk_barrier.h index 3b4cdf9c5..fb0d0726f 100644 --- a/src/dxvk/dxvk_barrier.h +++ b/src/dxvk/dxvk_barrier.h @@ -42,6 +42,14 @@ namespace dxvk { }; + /** + * \brief Barrier node payload + */ + struct DxvkBarrierPayload { + DxvkAccessOps accessOps = 0u; + }; + + /** * \brief Barrier tree node * @@ -62,6 +70,9 @@ namespace dxvk { // Address range of the node DxvkAddressRange addressRange = { }; + // Node payload + DxvkBarrierPayload payload = { }; + void setRed(bool red) { header &= ~uint64_t(1u); header |= uint64_t(red); @@ -117,21 +128,25 @@ namespace dxvk { * * \param [in] range Resource range * \param [in] accessType Access type + * \param [in] accessOp Access operation * \returns \c true if the range has a pending access */ bool findRange( const DxvkAddressRange& range, - DxvkAccess accessType) const; + DxvkAccess accessType, + DxvkAccessOp accessOp) const; /** * \brief Inserts address range for a given access type * * \param [in] range Resource range * \param [in] accessType Access type + * \param [in] accessOp Access operation */ void insertRange( const DxvkAddressRange& range, - DxvkAccess accessType); + DxvkAccess accessType, + DxvkAccessOp accessOp); /** * \brief Clears the entire structure @@ -166,7 +181,8 @@ namespace dxvk { uint32_t insertNode( const DxvkAddressRange& range, - uint32_t rootIndex); + uint32_t rootIndex, + DxvkBarrierPayload payload); void removeNode( uint32_t nodeIndex, @@ -285,4 +301,4 @@ namespace dxvk { }; -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index d33938dda..d99cfd4d6 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -21,32 +21,17 @@ namespace dxvk { m_state.om.framebufferInfo = makeFramebufferInfo(m_state.om.renderTargets); m_descriptorManager = new DxvkDescriptorManager(device.ptr()); - // Default destination barriers for graphics pipelines - m_globalRoGraphicsBarrier.stages = m_device->getShaderPipelineStages() - | VK_PIPELINE_STAGE_TRANSFER_BIT - | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT - | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT - | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - m_globalRoGraphicsBarrier.access = 0; + // Global barrier for graphics pipelines. This is only used to + // avoid write-after-read hazards after a render pass, so the + // access mask here can be zero. + m_renderPassBarrierDst.stages = m_device->getShaderPipelineStages() + | VK_PIPELINE_STAGE_TRANSFER_BIT + | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; if (m_device->features().extTransformFeedback.transformFeedback) - m_globalRoGraphicsBarrier.stages |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; - - m_globalRwGraphicsBarrier = m_globalRoGraphicsBarrier; - m_globalRwGraphicsBarrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT - | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - - m_globalRwGraphicsBarrier.access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT - | VK_ACCESS_INDEX_READ_BIT - | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT - | VK_ACCESS_UNIFORM_READ_BIT - | VK_ACCESS_SHADER_READ_BIT - | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT - | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT - | VK_ACCESS_TRANSFER_READ_BIT; - - if (m_device->features().extTransformFeedback.transformFeedback) - m_globalRwGraphicsBarrier.access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT; + m_renderPassBarrierDst.stages |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; // Store the lifetime tracking bit as a context feature so // that we don't have to scan device features at draw time @@ -236,7 +221,7 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *image, subresources, image->info().layout, image->info().stages, 0, - layout, image->info().stages, image->info().access); + layout, image->info().stages, image->info().access, DxvkAccessOp::None); image->setLayout(layout); @@ -288,10 +273,9 @@ namespace dxvk { &value); } - accessBuffer(cmdBuffer, - *buffer, offset, length, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, - VK_ACCESS_2_TRANSFER_WRITE_BIT); + accessBuffer(cmdBuffer, *buffer, offset, length, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, + DxvkAccessOp::None); m_cmd->track(buffer, DxvkAccess::Write); } @@ -361,7 +345,7 @@ namespace dxvk { accessBuffer(cmdBuffer, *bufferView, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_2_SHADER_WRITE_BIT); + VK_ACCESS_2_SHADER_WRITE_BIT, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(cmdBuffer); @@ -478,15 +462,13 @@ namespace dxvk { m_cmd->cmdCopyBuffer(cmdBuffer, ©Info); - accessBuffer(cmdBuffer, - *srcBuffer, srcOffset, numBytes, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, - VK_ACCESS_2_TRANSFER_READ_BIT); + accessBuffer(cmdBuffer, *srcBuffer, srcOffset, numBytes, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, + DxvkAccessOp::None); - accessBuffer(cmdBuffer, - *dstBuffer, dstOffset, numBytes, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, - VK_ACCESS_2_TRANSFER_WRITE_BIT); + accessBuffer(cmdBuffer, *dstBuffer, dstOffset, numBytes, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, + DxvkAccessOp::None); m_cmd->track(dstBuffer, DxvkAccess::Write); m_cmd->track(srcBuffer, DxvkAccess::Read); @@ -818,10 +800,12 @@ namespace dxvk { extent.depth); accessBuffer(DxvkCmdBuffer::ExecBuffer, *dstView, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT); + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, + DxvkAccessOp::None); accessBuffer(DxvkCmdBuffer::ExecBuffer, *srcView, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT); + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, + DxvkAccessOp::None); // Track all involved resources m_cmd->track(dstBuffer, DxvkAccess::Write); @@ -899,9 +883,6 @@ namespace dxvk { uint32_t y, uint32_t z) { if (this->commitComputeState()) { - this->commitComputeBarriers(); - this->commitComputeBarriers(); - m_queryManager.beginQueries(m_cmd, VK_QUERY_TYPE_PIPELINE_STATISTICS); @@ -924,9 +905,6 @@ namespace dxvk { sizeof(VkDispatchIndirectCommand), DxvkAccess::Read); if (this->commitComputeState()) { - this->commitComputeBarriers(); - this->commitComputeBarriers(); - m_queryManager.beginQueries(m_cmd, VK_QUERY_TYPE_PIPELINE_STATISTICS); @@ -936,13 +914,8 @@ namespace dxvk { m_queryManager.endQueries(m_cmd, VK_QUERY_TYPE_PIPELINE_STATISTICS); - accessBuffer(DxvkCmdBuffer::ExecBuffer, - *m_state.id.argBuffer.buffer(), - m_state.id.argBuffer.offset() + offset, - sizeof(VkDispatchIndirectCommand), - VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT, - VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT); - + accessDrawBuffer(offset, 1, 0, sizeof(VkDispatchIndirectCommand)); + this->trackDrawBuffer(); } } @@ -964,15 +937,9 @@ namespace dxvk { void DxvkContext::drawIndirect( VkDeviceSize offset, uint32_t count, - uint32_t stride) { - if (this->commitGraphicsState()) { - auto descriptor = m_state.id.argBuffer.getDescriptor(); - - m_cmd->cmdDrawIndirect( - descriptor.buffer.buffer, - descriptor.buffer.offset + offset, - count, stride); - } + uint32_t stride, + bool unroll) { + drawIndirectGeneric(offset, count, stride, unroll); } @@ -981,17 +948,7 @@ namespace dxvk { VkDeviceSize countOffset, uint32_t maxCount, uint32_t stride) { - if (this->commitGraphicsState()) { - auto argDescriptor = m_state.id.argBuffer.getDescriptor(); - auto cntDescriptor = m_state.id.cntBuffer.getDescriptor(); - - m_cmd->cmdDrawIndirectCount( - argDescriptor.buffer.buffer, - argDescriptor.buffer.offset + offset, - cntDescriptor.buffer.buffer, - cntDescriptor.buffer.offset + countOffset, - maxCount, stride); - } + drawIndirectCountGeneric(offset, countOffset, maxCount, stride); } @@ -1013,15 +970,9 @@ namespace dxvk { void DxvkContext::drawIndexedIndirect( VkDeviceSize offset, uint32_t count, - uint32_t stride) { - if (this->commitGraphicsState()) { - auto descriptor = m_state.id.argBuffer.getDescriptor(); - - m_cmd->cmdDrawIndexedIndirect( - descriptor.buffer.buffer, - descriptor.buffer.offset + offset, - count, stride); - } + uint32_t stride, + bool unroll) { + drawIndirectGeneric(offset, count, stride, unroll); } @@ -1030,32 +981,24 @@ namespace dxvk { VkDeviceSize countOffset, uint32_t maxCount, uint32_t stride) { - if (this->commitGraphicsState()) { - auto argDescriptor = m_state.id.argBuffer.getDescriptor(); - auto cntDescriptor = m_state.id.cntBuffer.getDescriptor(); - - m_cmd->cmdDrawIndexedIndirectCount( - argDescriptor.buffer.buffer, - argDescriptor.buffer.offset + offset, - cntDescriptor.buffer.buffer, - cntDescriptor.buffer.offset + countOffset, - maxCount, stride); - } + drawIndirectCountGeneric(offset, countOffset, maxCount, stride); } - - + + void DxvkContext::drawIndirectXfb( - const DxvkBufferSlice& counterBuffer, + VkDeviceSize counterOffset, uint32_t counterDivisor, uint32_t counterBias) { - if (this->commitGraphicsState()) { - auto physSlice = counterBuffer.getSliceHandle(); + if (this->commitGraphicsState()) { + auto physSlice = m_state.id.cntBuffer.getSliceHandle(); m_cmd->cmdDrawIndirectVertexCount(1, 0, - physSlice.handle, - physSlice.offset, - counterBias, - counterDivisor); + physSlice.handle, physSlice.offset + counterOffset, + counterBias, counterDivisor); + + // The count will generally be written from streamout + if (likely(m_state.id.cntBuffer.buffer()->hasGfxStores())) + accessDrawCountBuffer(counterOffset); } } @@ -1114,7 +1057,7 @@ namespace dxvk { if (initialLayout == VK_IMAGE_LAYOUT_PREINITIALIZED) { accessImage(DxvkCmdBuffer::InitBuffer, *image, subresources, initialLayout, - VK_PIPELINE_STAGE_2_NONE, 0); + VK_PIPELINE_STAGE_2_NONE, 0, DxvkAccessOp::None); m_cmd->track(image, DxvkAccess::None); } else { @@ -1193,10 +1136,8 @@ namespace dxvk { } } - accessImage(DxvkCmdBuffer::InitBuffer, - *image, subresources, clearLayout, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, - VK_ACCESS_2_TRANSFER_WRITE_BIT); + accessImage(DxvkCmdBuffer::InitBuffer, *image, subresources, clearLayout, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, DxvkAccessOp::None); m_cmd->track(image, DxvkAccess::Write); } @@ -1244,9 +1185,8 @@ namespace dxvk { } // Perform initial layout transition - accessImage(DxvkCmdBuffer::InitBuffer, - *image, image->getAvailableSubresources(), - VK_IMAGE_LAYOUT_UNDEFINED, VK_PIPELINE_STAGE_2_NONE, 0); + accessImage(DxvkCmdBuffer::InitBuffer, *image, image->getAvailableSubresources(), + VK_IMAGE_LAYOUT_UNDEFINED, VK_PIPELINE_STAGE_2_NONE, 0, DxvkAccessOp::None); m_cmd->track(image, DxvkAccess::Write); } @@ -1284,7 +1224,8 @@ namespace dxvk { accessBuffer(DxvkCmdBuffer::ExecBuffer, *resource, 0, resource->info().size, - srcStages, srcAccess, dstStages, dstAccess); + srcStages, srcAccess, dstStages, dstAccess, + DxvkAccessOp::None); m_cmd->track(resource, DxvkAccess::Write); } @@ -1306,7 +1247,8 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *resource, resource->getAvailableSubresources(), srcLayout, srcStages, srcAccess, - dstLayout, dstStages, dstAccess); + dstLayout, dstStages, dstAccess, + DxvkAccessOp::None); m_cmd->track(resource, DxvkAccess::Write); } @@ -1463,19 +1405,22 @@ namespace dxvk { VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_2_SHADER_READ_BIT); + VK_ACCESS_2_SHADER_READ_BIT, + DxvkAccessOp::None); } else { accessImage(DxvkCmdBuffer::ExecBuffer, *imageView->image(), mipGenerator.getAllSourceSubresources(), srcLayout, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_2_SHADER_READ_BIT); + VK_ACCESS_2_SHADER_READ_BIT, + DxvkAccessOp::None); accessImage(DxvkCmdBuffer::ExecBuffer, *imageView->image(), mipGenerator.getBottomSubresource(), dstLayout, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT); + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, + DxvkAccessOp::None); } if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) @@ -1575,7 +1520,7 @@ namespace dxvk { // If the image has any pending layout transitions, flush them accordingly. // There might be false positives here, but those do not affect correctness. - if (resourceHasAccess(*image, image->getAvailableSubresources(), DxvkAccess::Write)) { + if (resourceHasAccess(*image, image->getAvailableSubresources(), DxvkAccess::Write, DxvkAccessOp::None)) { spillRenderPass(true); flushBarriers(); @@ -1643,7 +1588,8 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *image, image->getAvailableSubresources(), oldLayout, image->info().stages, image->info().access, - newLayout, image->info().stages, image->info().access); + newLayout, image->info().stages, image->info().access, + DxvkAccessOp::None); m_cmd->track(image, DxvkAccess::Write); return true; @@ -1743,6 +1689,90 @@ namespace dxvk { } + template + void DxvkContext::drawIndirectGeneric( + VkDeviceSize offset, + uint32_t count, + uint32_t stride, + bool unroll) { + if (this->commitGraphicsState()) { + auto descriptor = m_state.id.argBuffer.getDescriptor(); + + if (unroll) { + // Need to do this check after initially setting up the pipeline + unroll = m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws) + && !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap); + } + + // If draws are merged but the pipeline has order-dependent stores, submit + // one draw at a time as well as barriers in between. Otherwise, keep the + // draws merged. + uint32_t step = unroll ? 1u : count; + + for (uint32_t i = 0; i < count; i += step) { + if (unlikely(i)) { + // Insert barrier after the first iteration + this->commitGraphicsState(); + } + + if (Indexed) { + m_cmd->cmdDrawIndexedIndirect(descriptor.buffer.buffer, + descriptor.buffer.offset + offset, step, stride); + } else { + m_cmd->cmdDrawIndirect(descriptor.buffer.buffer, + descriptor.buffer.offset + offset, step, stride); + } + + if (unlikely(m_state.id.argBuffer.buffer()->hasGfxStores())) { + accessDrawBuffer(offset, step, stride, Indexed + ? sizeof(VkDrawIndexedIndirectCommand) + : sizeof(VkDrawIndirectCommand)); + } + + offset += step * stride; + } + } + } + + + template + void DxvkContext::drawIndirectCountGeneric( + VkDeviceSize offset, + VkDeviceSize countOffset, + uint32_t maxCount, + uint32_t stride) { + if (this->commitGraphicsState()) { + auto argDescriptor = m_state.id.argBuffer.getDescriptor(); + auto cntDescriptor = m_state.id.cntBuffer.getDescriptor(); + + if (Indexed) { + m_cmd->cmdDrawIndexedIndirectCount( + argDescriptor.buffer.buffer, + argDescriptor.buffer.offset + offset, + cntDescriptor.buffer.buffer, + cntDescriptor.buffer.offset + countOffset, + maxCount, stride); + } else { + m_cmd->cmdDrawIndirectCount( + argDescriptor.buffer.buffer, + argDescriptor.buffer.offset + offset, + cntDescriptor.buffer.buffer, + cntDescriptor.buffer.offset + countOffset, + maxCount, stride); + } + + if (unlikely(m_state.id.argBuffer.buffer()->hasGfxStores())) { + accessDrawBuffer(offset, maxCount, stride, Indexed + ? sizeof(VkDrawIndexedIndirectCommand) + : sizeof(VkDrawIndirectCommand)); + } + + if (unlikely(m_state.id.cntBuffer.buffer()->hasGfxStores())) + accessDrawCountBuffer(countOffset); + } + } + + void DxvkContext::resolveImage( const Rc& dstImage, const Rc& srcImage, @@ -1854,7 +1884,8 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, dstSubresources, srcLayout, dstImage->info().stages, dstImage->info().access, - dstLayout, dstImage->info().stages, dstImage->info().access); + dstLayout, dstImage->info().stages, dstImage->info().access, + DxvkAccessOp::None); m_cmd->track(dstImage, DxvkAccess::Write); } @@ -2020,7 +2051,7 @@ namespace dxvk { *imageView->image(), imageView->imageSubresources(), imageLayout, clearStages, clearAccess, storeLayout, imageView->image()->info().stages, - imageView->image()->info().access); + imageView->image()->info().access, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); @@ -2223,7 +2254,7 @@ namespace dxvk { // Record layout transition from attachment layout back to default // layout. This will be flushed after the render pass has ended. accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, - dstSubresource, newLayout, stages, access); + dstSubresource, newLayout, stages, access, DxvkAccessOp::None); if (!isDepthStencil) { uint32_t index = m_state.om.framebufferInfo.getColorAttachmentIndex(i); @@ -2334,7 +2365,7 @@ namespace dxvk { accessBuffer(cmdBuffer, *buffer, offset, size, VK_PIPELINE_STAGE_2_TRANSFER_BIT, - VK_ACCESS_2_TRANSFER_WRITE_BIT); + VK_ACCESS_2_TRANSFER_WRITE_BIT, DxvkAccessOp::None); m_cmd->track(buffer, DxvkAccess::Write); } @@ -2614,10 +2645,28 @@ namespace dxvk { void DxvkContext::setBarrierControl(DxvkBarrierControlFlags control) { + // If any currently relevant control flags change, play it safe and force + // a barrier the next time we encounter a write-after-write hazard, even + // if the same set of flags is restored by that time. Only check graphics + // flags inside a render pass to avoid performance regressions when an + // application uses this feature but we already have an app profile. + // Barriers get flushed when beginning or ending a render pass anyway. + DxvkBarrierControlFlags mask = m_flags.test(DxvkContextFlag::GpRenderPassBound) + ? DxvkBarrierControlFlags(DxvkBarrierControl::GraphicsAllowReadWriteOverlap) + : DxvkBarrierControlFlags(DxvkBarrierControl::ComputeAllowReadWriteOverlap, + DxvkBarrierControl::ComputeAllowWriteOnlyOverlap); + + if (!((m_barrierControl ^ control) & mask).isClear()) { + m_flags.set(DxvkContextFlag::ForceWriteAfterWriteSync); + + if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) + popDebugRegion(util::DxvkDebugLabelType::InternalBarrierControl); + } + m_barrierControl = control; } - - + + void DxvkContext::updatePageTable( const DxvkSparseBindInfo& bindInfo, DxvkSparseBindFlags flags) { @@ -2767,7 +2816,8 @@ namespace dxvk { accessBuffer(DxvkCmdBuffer::ExecBuffer, *r.first, 0, r.first->info().size, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, accessFlags); + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + accessFlags, DxvkAccessOp::None); m_cmd->track(r.first, r.second.test(DxvkAccess::Write) ? DxvkAccess::Write : DxvkAccess::Read); } @@ -2777,7 +2827,7 @@ namespace dxvk { | (r.second.test(DxvkAccess::Write) * VK_ACCESS_SHADER_WRITE_BIT); accessImage(DxvkCmdBuffer::ExecBuffer, *r.first, r.first->getAvailableSubresources(), r.first->info().layout, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, accessFlags); + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, accessFlags, DxvkAccessOp::None); m_cmd->track(r.first, r.second.test(DxvkAccess::Write) ? DxvkAccess::Write : DxvkAccess::Read); } @@ -2846,27 +2896,44 @@ namespace dxvk { label << ")"; - beginInternalDebugRegion(vk::makeLabel(0xf0e6dc, label.str().c_str())); + pushDebugRegion(vk::makeLabel(0xf0e6dc, label.str().c_str()), + util::DxvkDebugLabelType::InternalRenderPass); + } + + + template + void DxvkContext::beginBarrierControlDebugRegion() { + if (hasDebugRegion(util::DxvkDebugLabelType::InternalBarrierControl)) + return; + + const char* label = nullptr; + + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE) { + if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowReadWriteOverlap)) + label = "Relaxed sync"; + else if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap)) + label = "Relaxed sync (write-only)"; + } else { + if (m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap)) + label = "Relaxed sync"; + } + + if (label) { + pushDebugRegion(vk::makeLabel(0x9bded9, label), + util::DxvkDebugLabelType::InternalBarrierControl); + } } void DxvkContext::beginDebugLabel(const VkDebugUtilsLabelEXT& label) { - if (m_features.test(DxvkContextFeature::DebugUtils)) { - endInternalDebugRegion(); - - m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer, label); - m_debugLabelStack.emplace_back(label); - } + if (m_features.test(DxvkContextFeature::DebugUtils)) + pushDebugRegion(label, util::DxvkDebugLabelType::External); } void DxvkContext::endDebugLabel() { - if (m_features.test(DxvkContextFeature::DebugUtils)) { - if (!m_debugLabelStack.empty()) { - m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); - m_debugLabelStack.pop_back(); - } - } + if (m_features.test(DxvkContextFeature::DebugUtils)) + popDebugRegion(util::DxvkDebugLabelType::External); } @@ -3045,12 +3112,12 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *dstView->image(), dstView->imageSubresources(), dstLayout, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT); + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, DxvkAccessOp::None); accessImage(DxvkCmdBuffer::ExecBuffer, *srcView->image(), srcView->imageSubresources(), srcLayout, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, - VK_ACCESS_2_SHADER_READ_BIT); + VK_ACCESS_2_SHADER_READ_BIT, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); @@ -3100,14 +3167,12 @@ namespace dxvk { blitInfo.filter = filter; m_cmd->cmdBlitImage(&blitInfo); - - accessImage(DxvkCmdBuffer::ExecBuffer, - *dstView->image(), dstView->imageSubresources(), dstLayout, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT); - - accessImage(DxvkCmdBuffer::ExecBuffer, - *srcView->image(), srcView->imageSubresources(), srcLayout, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT); + + accessImage(DxvkCmdBuffer::ExecBuffer, *dstView->image(), dstView->imageSubresources(), dstLayout, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, DxvkAccessOp::None); + + accessImage(DxvkCmdBuffer::ExecBuffer, *srcView->image(), srcView->imageSubresources(), srcLayout, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, DxvkAccessOp::None); m_cmd->track(dstView->image(), DxvkAccess::Write); m_cmd->track(srcView->image(), DxvkAccess::Read); @@ -3258,10 +3323,10 @@ namespace dxvk { bufferSlice, bufferRowAlignment, bufferSliceAlignment); accessImage(cmdBuffer, *image, dstSubresourceRange, dstImageLayoutTransfer, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT); + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, DxvkAccessOp::None); accessBuffer(cmdBuffer, *buffer, bufferOffset, dataSize, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT); + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, DxvkAccessOp::None); m_cmd->track(image, DxvkAccess::Write); m_cmd->track(buffer, DxvkAccess::Read); @@ -3497,11 +3562,11 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *image, vk::makeSubresourceRange(imageSubresource), - imageLayout, stages, access); + imageLayout, stages, access, DxvkAccessOp::None); accessBuffer(DxvkCmdBuffer::ExecBuffer, *bufferView, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, - VK_ACCESS_2_SHADER_READ_BIT); + VK_ACCESS_2_SHADER_READ_BIT, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); @@ -3549,12 +3614,11 @@ namespace dxvk { image, imageSubresource, imageOffset, imageExtent, srcImageLayoutTransfer, bufferSlice, bufferRowAlignment, bufferSliceAlignment); - accessImage(DxvkCmdBuffer::ExecBuffer, - *image, srcSubresourceRange, srcImageLayoutTransfer, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT); + accessImage(DxvkCmdBuffer::ExecBuffer, *image, srcSubresourceRange, srcImageLayoutTransfer, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, DxvkAccessOp::None); accessBuffer(DxvkCmdBuffer::ExecBuffer, *buffer, bufferOffset, dataSize, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT); + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, DxvkAccessOp::None); m_cmd->track(buffer, DxvkAccess::Write); m_cmd->track(image, DxvkAccess::Read); @@ -3725,18 +3789,19 @@ namespace dxvk { workgroupCount.depth); accessBuffer(DxvkCmdBuffer::ExecBuffer, *bufferView, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT); + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, DxvkAccessOp::None); - accessImage(DxvkCmdBuffer::ExecBuffer, *image, - vk::makeSubresourceRange(imageSubresource), imageLayout, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT); + accessImage(DxvkCmdBuffer::ExecBuffer, *image, vk::makeSubresourceRange(imageSubresource), imageLayout, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, DxvkAccessOp::None); + + m_flags.set(DxvkContextFlag::ForceWriteAfterWriteSync); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); m_cmd->track(buffer, DxvkAccess::Write); m_cmd->track(image, DxvkAccess::Read); - } +} void DxvkContext::clearImageViewFb( @@ -3848,7 +3913,7 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *imageView->image(), imageView->imageSubresources(), - clearLayout, clearStages, clearAccess); + clearLayout, clearStages, clearAccess, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); @@ -3937,10 +4002,12 @@ namespace dxvk { m_cmd->cmdDispatch(cmdBuffer, workgroups.width, workgroups.height, workgroups.depth); - accessImage(cmdBuffer, - *imageView->image(), imageView->imageSubresources(), + accessImage(cmdBuffer, *imageView->image(), imageView->imageSubresources(), VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_2_SHADER_WRITE_BIT); + VK_ACCESS_2_SHADER_WRITE_BIT, DxvkAccessOp::None); + + if (cmdBuffer == DxvkCmdBuffer::ExecBuffer) + m_flags.set(DxvkContextFlag::ForceWriteAfterWriteSync); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(cmdBuffer); @@ -4008,13 +4075,11 @@ namespace dxvk { m_cmd->cmdCopyImage(DxvkCmdBuffer::ExecBuffer, ©Info); } - accessImage(DxvkCmdBuffer::ExecBuffer, - *dstImage, dstSubresourceRange, dstImageLayout, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT); + accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, dstSubresourceRange, dstImageLayout, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, DxvkAccessOp::None); - accessImage(DxvkCmdBuffer::ExecBuffer, - *srcImage, srcSubresourceRange, srcImageLayout, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT); + accessImage(DxvkCmdBuffer::ExecBuffer, *srcImage, srcSubresourceRange, srcImageLayout, + VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_READ_BIT, DxvkAccessOp::None); m_cmd->track(dstImage, DxvkAccess::Write); m_cmd->track(srcImage, DxvkAccess::Read); @@ -4210,14 +4275,12 @@ namespace dxvk { m_cmd->cmdDraw(3, dstSubresource.layerCount, 0, 0); m_cmd->cmdEndRendering(); - accessImage(DxvkCmdBuffer::ExecBuffer, - *srcImage, srcSubresourceRange, srcLayout, - VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, - VK_ACCESS_2_SHADER_READ_BIT); + accessImage(DxvkCmdBuffer::ExecBuffer, *srcImage, srcSubresourceRange, + srcLayout, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, + VK_ACCESS_2_SHADER_READ_BIT, DxvkAccessOp::None); - accessImage(DxvkCmdBuffer::ExecBuffer, - *dstImage, dstSubresourceRange, - dstLayout, dstStages, dstAccess); + accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, dstSubresourceRange, + dstLayout, dstStages, dstAccess, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); @@ -4308,7 +4371,8 @@ namespace dxvk { accessBuffer(DxvkCmdBuffer::ExecBuffer, *buffer, offset, SparseMemoryPageSize * pageCount, VK_PIPELINE_STAGE_2_TRANSFER_BIT, - ToBuffer ? VK_ACCESS_2_TRANSFER_WRITE_BIT : VK_ACCESS_2_TRANSFER_READ_BIT); + ToBuffer ? VK_ACCESS_2_TRANSFER_WRITE_BIT : VK_ACCESS_2_TRANSFER_READ_BIT, + DxvkAccessOp::None); } @@ -4357,7 +4421,8 @@ namespace dxvk { accessBuffer(DxvkCmdBuffer::ExecBuffer, *sparse, 0, sparse->info().size, VK_PIPELINE_STAGE_2_TRANSFER_BIT, - ToBuffer ? VK_ACCESS_2_TRANSFER_READ_BIT : VK_ACCESS_2_TRANSFER_WRITE_BIT); + ToBuffer ? VK_ACCESS_2_TRANSFER_READ_BIT : VK_ACCESS_2_TRANSFER_WRITE_BIT, + DxvkAccessOp::None); m_cmd->track(sparse, ToBuffer ? DxvkAccess::Read : DxvkAccess::Write); m_cmd->track(buffer, ToBuffer ? DxvkAccess::Write : DxvkAccess::Read); @@ -4435,7 +4500,7 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *sparse, sparseSubresources, transferLayout, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, transferAccess); + VK_PIPELINE_STAGE_2_TRANSFER_BIT, transferAccess, DxvkAccessOp::None); m_cmd->track(sparse, ToBuffer ? DxvkAccess::Read : DxvkAccess::Write); m_cmd->track(buffer, ToBuffer ? DxvkAccess::Write : DxvkAccess::Read); @@ -4484,12 +4549,12 @@ namespace dxvk { accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, dstSubresourceRange, dstLayout, VK_PIPELINE_STAGE_2_TRANSFER_BIT, - VK_ACCESS_2_TRANSFER_WRITE_BIT); + VK_ACCESS_2_TRANSFER_WRITE_BIT, DxvkAccessOp::None); accessImage(DxvkCmdBuffer::ExecBuffer, *srcImage, srcSubresourceRange, srcLayout, VK_PIPELINE_STAGE_2_TRANSFER_BIT, - VK_ACCESS_2_TRANSFER_READ_BIT); + VK_ACCESS_2_TRANSFER_READ_BIT, DxvkAccessOp::None); m_cmd->track(dstImage, DxvkAccess::Write); m_cmd->track(srcImage, DxvkAccess::Read); @@ -4582,13 +4647,15 @@ namespace dxvk { *srcImage, srcSubresourceRange, srcLayout, VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT); + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT, + DxvkAccessOp::None); accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, dstSubresourceRange, dstLayout, VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); @@ -4795,14 +4862,12 @@ namespace dxvk { m_cmd->cmdDraw(3, region.dstSubresource.layerCount, 0, 0); m_cmd->cmdEndRendering(); - accessImage(DxvkCmdBuffer::ExecBuffer, - *srcImage, srcSubresourceRange, srcLayout, - VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, - VK_ACCESS_2_SHADER_READ_BIT); + accessImage(DxvkCmdBuffer::ExecBuffer, *srcImage, srcSubresourceRange, + srcLayout, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, + VK_ACCESS_2_SHADER_READ_BIT, DxvkAccessOp::None); - accessImage(DxvkCmdBuffer::ExecBuffer, - *dstImage, dstSubresourceRange, - dstLayout, dstStages, dstAccess); + accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, dstSubresourceRange, + dstLayout, dstStages, dstAccess, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); @@ -5099,9 +5164,9 @@ namespace dxvk { m_initBarriers.addImageBarrier(barrier); } else { - accessImage(DxvkCmdBuffer::SdmaBuffer, - *image, image->getAvailableSubresources(), transferLayout, - VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT); + accessImage(DxvkCmdBuffer::SdmaBuffer, *image, image->getAvailableSubresources(), + transferLayout, VK_PIPELINE_STAGE_2_TRANSFER_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, + DxvkAccessOp::None); } m_cmd->track(source, DxvkAccess::Read); @@ -5111,6 +5176,9 @@ namespace dxvk { void DxvkContext::startRenderPass() { if (!m_flags.test(DxvkContextFlag::GpRenderPassBound)) { + if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) + popDebugRegion(util::DxvkDebugLabelType::InternalBarrierControl); + this->applyRenderTargetLoadLayouts(); this->flushClears(true); @@ -5163,13 +5231,17 @@ namespace dxvk { void DxvkContext::spillRenderPass(bool suspend) { if (m_flags.test(DxvkContextFlag::GpRenderPassBound)) { - m_flags.clr(DxvkContextFlag::GpRenderPassBound); + m_flags.clr(DxvkContextFlag::GpRenderPassBound, + DxvkContextFlag::GpRenderPassSideEffects); this->pauseTransformFeedback(); m_queryManager.endQueries(m_cmd, VK_QUERY_TYPE_OCCLUSION); m_queryManager.endQueries(m_cmd, VK_QUERY_TYPE_PIPELINE_STATISTICS); + if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) + popDebugRegion(util::DxvkDebugLabelType::InternalBarrierControl); + this->renderPassUnbindFramebuffer(); if (suspend) @@ -5177,9 +5249,19 @@ namespace dxvk { else this->transitionRenderTargetLayouts(false); + if (m_renderPassBarrierSrc.stages) { + accessMemory(DxvkCmdBuffer::ExecBuffer, + m_renderPassBarrierSrc.stages, m_renderPassBarrierSrc.access, + m_renderPassBarrierDst.stages, m_renderPassBarrierDst.access); + + m_renderPassBarrierSrc = DxvkGlobalPipelineBarrier(); + } + flushBarriers(); flushResolves(); - endInternalDebugRegion(); + + if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) + popDebugRegion(util::DxvkDebugLabelType::InternalRenderPass); } else if (!suspend) { // We may end a previously suspended render pass if (m_flags.test(DxvkContextFlag::GpRenderPassSuspended)) { @@ -5238,7 +5320,7 @@ namespace dxvk { ops.depthOps.loadLayout, depthStages, 0, depthAttachment.layout, - depthStages, depthAccess); + depthStages, depthAccess, DxvkAccessOp::None); } for (uint32_t i = 0; i < MaxNumRenderTargets; i++) { @@ -5258,7 +5340,7 @@ namespace dxvk { VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, 0, colorAttachment.layout, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, - colorAccess); + colorAccess, DxvkAccessOp::None); } } @@ -5288,7 +5370,8 @@ namespace dxvk { VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, srcAccess, ops.depthOps.storeLayout, depthAttachment.view->image()->info().stages, - depthAttachment.view->image()->info().access); + depthAttachment.view->image()->info().access, + DxvkAccessOp::None); } for (uint32_t i = 0; i < MaxNumRenderTargets; i++) { @@ -5304,7 +5387,8 @@ namespace dxvk { VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, ops.colorOps[i].storeLayout, colorAttachment.view->image()->info().stages, - colorAttachment.view->image()->info().access); + colorAttachment.view->image()->info().access, + DxvkAccessOp::None); } } @@ -5517,8 +5601,19 @@ namespace dxvk { ctrBuffers[i] = physSlice.handle; ctrOffsets[i] = physSlice.offset; - if (physSlice.handle != VK_NULL_HANDLE) - m_cmd->track(m_state.xfb.activeCounters[i].buffer(), DxvkAccess::Read); + if (physSlice.handle) { + // Just in case someone is mad enough to write to a + // transform feedback buffer from a shader as well + m_flags.set(DxvkContextFlag::ForceWriteAfterWriteSync); + + accessBuffer(DxvkCmdBuffer::ExecBuffer, m_state.xfb.activeCounters[i], + VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT, + VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | + VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, + DxvkAccessOp::None); + + m_cmd->track(m_state.xfb.activeCounters[i].buffer(), DxvkAccess::Write); + } } m_cmd->cmdBeginTransformFeedback( @@ -5543,9 +5638,6 @@ namespace dxvk { ctrBuffers[i] = physSlice.handle; ctrOffsets[i] = physSlice.offset; - if (physSlice.handle != VK_NULL_HANDLE) - m_cmd->track(m_state.xfb.activeCounters[i].buffer(), DxvkAccess::Write); - m_state.xfb.activeCounters[i] = DxvkBufferSlice(); } @@ -5649,24 +5741,11 @@ namespace dxvk { DxvkGraphicsPipelineFlags newFlags = newPipeline->flags(); DxvkGraphicsPipelineFlags diffFlags = oldFlags ^ newFlags; - DxvkGraphicsPipelineFlags hazardMask( - DxvkGraphicsPipelineFlag::HasTransformFeedback, - DxvkGraphicsPipelineFlag::HasStorageDescriptors); - m_state.gp.flags = newFlags; - if ((diffFlags & hazardMask) != 0) { - // Force-update vertex/index buffers for hazard checks - m_flags.set(DxvkContextFlag::GpDirtyIndexBuffer, - DxvkContextFlag::GpDirtyVertexBuffers, - DxvkContextFlag::GpDirtyXfbBuffers, - DxvkContextFlag::DirtyDrawBuffer); - - // This is necessary because we'll only do hazard - // tracking if the active pipeline has side effects - if (!m_barrierControl.test(DxvkBarrierControl::IgnoreGraphicsBarriers)) - this->spillRenderPass(true); - } + if (newFlags.any(DxvkGraphicsPipelineFlag::HasTransformFeedback, + DxvkGraphicsPipelineFlag::HasStorageDescriptors)) + m_flags.set(DxvkContextFlag::GpRenderPassSideEffects); if (diffFlags.test(DxvkGraphicsPipelineFlag::HasSampleMaskExport)) m_flags.set(DxvkContextFlag::GpDirtyMultisampleState); @@ -5681,7 +5760,7 @@ namespace dxvk { } - bool DxvkContext::updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier) { + bool DxvkContext::updateGraphicsPipelineState() { bool oldIndependentSets = m_flags.test(DxvkContextFlag::GpIndependentSets); // Check which dynamic states need to be active. States that @@ -5756,23 +5835,15 @@ namespace dxvk { // Emit barrier based on pipeline properties, in order to avoid // accidental write-after-read hazards after the render pass. - DxvkGlobalPipelineBarrier pipelineBarrier = m_state.gp.pipeline->getGlobalBarrier(m_state.gp.state); - srcBarrier.stages |= pipelineBarrier.stages; - srcBarrier.access |= pipelineBarrier.access; - - if (srcBarrier.stages) { - DxvkGlobalPipelineBarrier dstBarrier = (srcBarrier.access & vk::AccessWriteMask) - ? m_globalRwGraphicsBarrier - : m_globalRoGraphicsBarrier; - - accessMemory(DxvkCmdBuffer::ExecBuffer, - srcBarrier.stages, srcBarrier.access, - dstBarrier.stages, dstBarrier.access); - } + DxvkGlobalPipelineBarrier srcBarrier = m_state.gp.pipeline->getGlobalBarrier(m_state.gp.state); + m_renderPassBarrierSrc.stages |= srcBarrier.stages; + m_renderPassBarrierSrc.access |= srcBarrier.access; if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) { + uint32_t color = getGraphicsPipelineDebugColor(); + m_cmd->cmdInsertDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer, - vk::makeLabel(0xa2dcf0, m_state.gp.pipeline->debugName())); + vk::makeLabel(color, m_state.gp.pipeline->debugName())); } m_flags.clr(DxvkContextFlag::GpDirtyPipelineState); @@ -5780,6 +5851,17 @@ namespace dxvk { } + uint32_t DxvkContext::getGraphicsPipelineDebugColor() const { + if (m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasStorageDescriptors)) + return 0xf0a2dc; + + if (m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasTransformFeedback)) + return 0xa2f0dc; + + return 0xa2dcf0; + } + + template void DxvkContext::resetSpecConstants( uint32_t newMask) { @@ -5902,6 +5984,9 @@ namespace dxvk { descriptorInfo.image.imageView = viewHandle; descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + m_cmd->track(res.imageView->image(), DxvkAccess::Read); } else { descriptorInfo.image.sampler = VK_NULL_HANDLE; @@ -5923,6 +6008,9 @@ namespace dxvk { descriptorInfo.image.imageView = viewHandle; descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || res.imageView->image()->hasGfxStores()) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, binding.accessOp); + m_cmd->track(res.imageView->image(), (binding.access & vk::AccessWriteMask) ? DxvkAccess::Write : DxvkAccess::Read); } else { @@ -5945,6 +6033,9 @@ namespace dxvk { descriptorInfo.image.imageView = viewHandle; descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + m_cmd->track(res.sampler); m_cmd->track(res.imageView->image(), DxvkAccess::Read); } else { @@ -5960,6 +6051,9 @@ namespace dxvk { if (res.bufferView != nullptr) { descriptorInfo.texelBuffer = res.bufferView->handle(); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.bufferView->buffer()->hasGfxStores())) + accessBuffer(DxvkCmdBuffer::ExecBuffer, *res.bufferView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + m_cmd->track(res.bufferView->buffer(), DxvkAccess::Read); } else { descriptorInfo.texelBuffer = VK_NULL_HANDLE; @@ -5972,6 +6066,9 @@ namespace dxvk { if (res.bufferView != nullptr) { descriptorInfo.texelBuffer = res.bufferView->handle(); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || res.bufferView->buffer()->hasGfxStores()) + accessBuffer(DxvkCmdBuffer::ExecBuffer, *res.bufferView, util::pipelineStages(binding.stage), binding.access, binding.accessOp); + m_cmd->track(res.bufferView->buffer(), (binding.access & vk::AccessWriteMask) ? DxvkAccess::Write : DxvkAccess::Read); } else { @@ -5985,6 +6082,9 @@ namespace dxvk { if (res.bufferSlice.length()) { descriptorInfo = res.bufferSlice.getDescriptor(); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.bufferSlice.buffer()->hasGfxStores())) + accessBuffer(DxvkCmdBuffer::ExecBuffer, res.bufferSlice, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + m_cmd->track(res.bufferSlice.buffer(), DxvkAccess::Read); } else { descriptorInfo.buffer.buffer = VK_NULL_HANDLE; @@ -5999,6 +6099,9 @@ namespace dxvk { if (res.bufferSlice.length()) { descriptorInfo = res.bufferSlice.getDescriptor(); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.bufferSlice.buffer()->hasGfxStores())) + accessBuffer(DxvkCmdBuffer::ExecBuffer, res.bufferSlice, util::pipelineStages(binding.stage), binding.access, binding.accessOp); + m_cmd->track(res.bufferSlice.buffer(), (binding.access & vk::AccessWriteMask) ? DxvkAccess::Write : DxvkAccess::Read); } else { @@ -6136,7 +6239,8 @@ namespace dxvk { *attachment.view->image(), attachment.view->imageSubresources(), oldLayout, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT); + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, DxvkAccessOp::None); m_cmd->track(attachment.view->image(), DxvkAccess::Write); } @@ -6147,14 +6251,17 @@ namespace dxvk { const DxvkAttachment& attachment, VkImageLayout oldLayout) { if (oldLayout != attachment.view->image()->info().layout) { + VkAccessFlags2 access = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + + if (oldLayout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) + access |= VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + accessImage(DxvkCmdBuffer::ExecBuffer, *attachment.view->image(), attachment.view->imageSubresources(), oldLayout, VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - oldLayout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL - ? VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT - : VK_ACCESS_2_NONE); + access, DxvkAccessOp::None); m_cmd->track(attachment.view->image(), DxvkAccess::Write); } @@ -6297,6 +6404,14 @@ namespace dxvk { m_state.vi.indexType); } + if (unlikely(m_state.vi.indexBuffer.buffer()->hasGfxStores())) { + accessBuffer(DxvkCmdBuffer::ExecBuffer, m_state.vi.indexBuffer, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_INDEX_READ_BIT, DxvkAccessOp::None); + } + + m_renderPassBarrierSrc.stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + m_renderPassBarrierSrc.access |= VK_ACCESS_INDEX_READ_BIT; + m_cmd->track(m_state.vi.indexBuffer.buffer(), DxvkAccess::Read); return true; } @@ -6334,6 +6449,11 @@ namespace dxvk { newDynamicStrides &= strides[i] >= m_state.vi.vertexExtents[i]; } + if (unlikely(m_state.vi.vertexBuffers[binding].buffer()->hasGfxStores())) { + accessBuffer(DxvkCmdBuffer::ExecBuffer, m_state.vi.vertexBuffers[binding], + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, DxvkAccessOp::None); + } + m_cmd->track(m_state.vi.vertexBuffers[binding].buffer(), DxvkAccess::Read); } else { buffers[i] = VK_NULL_HANDLE; @@ -6384,14 +6504,18 @@ namespace dxvk { xfbOffsets[i] = physSlice.offset; xfbLengths[i] = physSlice.length; - if (physSlice.handle == VK_NULL_HANDLE) + if (!physSlice.handle) xfbBuffers[i] = m_common->dummyResources().bufferHandle(); - - if (physSlice.handle != VK_NULL_HANDLE) { - const Rc& buffer = m_state.xfb.buffers[i].buffer(); + + if (physSlice.handle) { + Rc buffer = m_state.xfb.buffers[i].buffer(); buffer->setXfbVertexStride(gsInfo.xfbStrides[i]); - - m_cmd->track(buffer, DxvkAccess::Write); + + accessBuffer(DxvkCmdBuffer::ExecBuffer, m_state.xfb.buffers[i], + VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT, + VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, DxvkAccessOp::None); + + m_cmd->track(std::move(buffer), DxvkAccess::Write); } } @@ -6576,7 +6700,18 @@ namespace dxvk { if (unlikely(!this->updateComputePipelineState())) return false; } - + + if (this->checkComputeHazards()) { + this->flushBarriers(); + + // Dirty descriptors if this hasn't happened yet for + // whatever reason in order to re-emit barriers + m_descriptorState.dirtyStages(VK_SHADER_STAGE_COMPUTE_BIT); + } + + if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) + this->beginBarrierControlDebugRegion(); + if (m_descriptorState.hasDirtyComputeSets()) this->updateComputeShaderResources(); @@ -6597,30 +6732,38 @@ namespace dxvk { if (m_flags.test(DxvkContextFlag::GpDirtyFramebuffer)) this->updateFramebuffer(); - if (!m_flags.test(DxvkContextFlag::GpRenderPassBound)) - this->startRenderPass(); - - if (m_state.gp.flags.any( - DxvkGraphicsPipelineFlag::HasStorageDescriptors, - DxvkGraphicsPipelineFlag::HasTransformFeedback)) { - this->commitGraphicsBarriers(); - + if (m_flags.test(DxvkContextFlag::GpXfbActive)) { // If transform feedback is active and there is a chance that we might // need to rebind the pipeline, we need to end transform feedback and // issue a barrier. End the render pass to do that. Ignore dirty vertex // buffers here since non-dynamic vertex strides are such an extreme // edge case that it's likely irrelevant in practice. - if (m_flags.test(DxvkContextFlag::GpXfbActive) - && m_flags.any(DxvkContextFlag::GpDirtyPipelineState, - DxvkContextFlag::GpDirtySpecConstants)) + if (m_flags.any(DxvkContextFlag::GpDirtyPipelineState, + DxvkContextFlag::GpDirtySpecConstants, + DxvkContextFlag::GpDirtyXfbBuffers)) this->spillRenderPass(true); + } - // This can only happen if the render pass was active before, - // so we'll never begin the render pass twice in one draw - if (!m_flags.test(DxvkContextFlag::GpRenderPassBound)) - this->startRenderPass(); + if (m_flags.test(DxvkContextFlag::GpRenderPassSideEffects)) { + // If either the current pipeline has side effects or if there are pending + // writes from previous draws, check for hazards. This also tracks any + // resources written for the first time, but does not emit any barriers + // on its own so calling this outside a render pass is safe. This also + // implicitly dirties all state for which we need to track resource access. + if (this->checkGraphicsHazards()) + this->spillRenderPass(true); + } - this->commitGraphicsBarriers(); + // Start the render pass. This must happen before any render state + // is set up so that we can safely use secondary command buffers. + if (!m_flags.test(DxvkContextFlag::GpRenderPassBound)) + this->startRenderPass(); + + if (m_flags.test(DxvkContextFlag::GpRenderPassSideEffects)) { + // Make sure that the debug label for barrier control + // always starts within an active render pass + if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) + this->beginBarrierControlDebugRegion(); } if (m_flags.test(DxvkContextFlag::GpDirtyIndexBuffer) && Indexed) { @@ -6635,19 +6778,7 @@ namespace dxvk { this->updateSpecConstants(); if (m_flags.test(DxvkContextFlag::GpDirtyPipelineState)) { - DxvkGlobalPipelineBarrier barrier = { }; - - if (Indexed) { - barrier.stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - barrier.access |= VK_ACCESS_INDEX_READ_BIT; - } - - if (Indirect) { - barrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; - barrier.access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; - } - - if (unlikely(!this->updateGraphicsPipelineState(barrier))) + if (unlikely(!this->updateGraphicsPipelineState())) return false; } @@ -6669,72 +6800,139 @@ namespace dxvk { } - template - void DxvkContext::commitComputeBarriers() { - const auto& layout = m_state.cp.pipeline->getBindings()->layout(); + template + bool DxvkContext::checkResourceHazards( + const DxvkBindingLayout& layout, + uint32_t setMask) { + constexpr bool IsGraphics = BindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS; - // Exit early if we're only checking for hazards and - // if the barrier set is empty, to avoid some overhead. - if (!DoEmit && m_barrierTracker.empty()) - return; + // For graphics, if we are not currently inside a render pass, we'll issue + // a barrier anyway so checking hazards is not meaningful. Avoid some overhead + // and only track written resources in that case. + bool requiresBarrier = IsGraphics && !m_flags.test(DxvkContextFlag::GpRenderPassBound); - for (uint32_t i = 0; i < DxvkDescriptorSets::CsSetCount; i++) { - uint32_t bindingCount = layout.getBindingCount(i); + for (auto setIndex : bit::BitMask(setMask)) { + uint32_t bindingCount = layout.getBindingCount(setIndex); for (uint32_t j = 0; j < bindingCount; j++) { - const DxvkBindingInfo& binding = layout.getBinding(i, j); + const DxvkBindingInfo& binding = layout.getBinding(setIndex, j); const DxvkShaderResourceSlot& slot = m_rc[binding.resourceBinding]; - bool requiresBarrier = false; + // Skip read-only bindings if we already know that we need a barrier + if (requiresBarrier && !(binding.access & vk::AccessWriteMask)) + continue; switch (binding.descriptorType) { - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - if (likely(slot.bufferSlice.length())) { - requiresBarrier = this->checkBufferBarrier(slot.bufferSlice, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, binding.access); + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { + if (slot.bufferView) { + if (!IsGraphics || slot.bufferView->buffer()->hasGfxStores()) + requiresBarrier |= checkBufferViewBarrier(slot.bufferView, binding.access, binding.accessOp); + else if (binding.access & vk::AccessWriteMask) + requiresBarrier |= !slot.bufferView->buffer()->trackGfxStores(); } - break; + } break; - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - if (likely(slot.bufferView != nullptr)) { - requiresBarrier = this->checkBufferViewBarrier(slot.bufferView, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, binding.access); + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { + if (slot.bufferView && (!IsGraphics || slot.bufferView->buffer()->hasGfxStores())) + requiresBarrier |= checkBufferViewBarrier(slot.bufferView, binding.access, DxvkAccessOp::None); + } break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + if (slot.bufferSlice.length() && (!IsGraphics || slot.bufferSlice.buffer()->hasGfxStores())) + requiresBarrier |= checkBufferBarrier(slot.bufferSlice, binding.access, DxvkAccessOp::None); + } break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + if (slot.bufferSlice.length()) { + if (!IsGraphics || slot.bufferSlice.buffer()->hasGfxStores()) + requiresBarrier |= checkBufferBarrier(slot.bufferSlice, binding.access, binding.accessOp); + else if (binding.access & vk::AccessWriteMask) + requiresBarrier |= !slot.bufferSlice.buffer()->trackGfxStores(); } - break; + } break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + if (slot.imageView) { + if (!IsGraphics || slot.imageView->image()->hasGfxStores()) + requiresBarrier |= checkImageViewBarrier(slot.imageView, binding.access, binding.accessOp); + else if (binding.access & vk::AccessWriteMask) + requiresBarrier |= !slot.imageView->image()->trackGfxStores(); + } + } break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - if (likely(slot.imageView != nullptr)) { - requiresBarrier = this->checkImageViewBarrier(slot.imageView, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, binding.access); - } - break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { + if (slot.imageView && (!IsGraphics || slot.imageView->image()->hasGfxStores())) + requiresBarrier |= checkImageViewBarrier(slot.imageView, binding.access, DxvkAccessOp::None); + } break; default: /* nothing to do */; } - if (requiresBarrier) { - flushBarriers(); - return; - } + // We don't need to do any extra tracking for compute here, exit early + if (requiresBarrier && !IsGraphics) + return true; } } + + return requiresBarrier; } - template - void DxvkContext::commitGraphicsBarriers() { - if (m_barrierControl.test(DxvkBarrierControl::IgnoreGraphicsBarriers)) - return; + bool DxvkContext::checkComputeHazards() { + // Exit early if we know that there cannot be any hazards to avoid + // some overhead after barriers are flushed. This is common. + if (m_barrierTracker.empty()) + return false; - constexpr auto storageBufferAccess = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT; - constexpr auto storageImageAccess = VK_ACCESS_SHADER_WRITE_BIT; + const auto& layout = m_state.cp.pipeline->getBindings()->layout(); + return checkResourceHazards(layout, layout.getSetMask()); + } - bool requiresBarrier = false; + + template + bool DxvkContext::checkGraphicsHazards() { + // Check shader resources on every draw to handle WAW hazards, and to make + // sure that writes are handled properly. If the pipeline does not have any + // storage descriptors, we only need to check dirty resources. + const auto& layout = m_state.gp.pipeline->getBindings()->layout(); + + uint32_t setMask = layout.getSetMask(); + + if (!m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasStorageDescriptors)) + setMask &= m_descriptorState.getDirtyGraphicsSets(); + + bool requiresBarrier = checkResourceHazards(layout, setMask); + + // Transform feedback buffer writes won't overlap, so we also only need to + // check those if dirty. + if (m_flags.test(DxvkContextFlag::GpDirtyXfbBuffers) + && m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasTransformFeedback)) { + for (uint32_t i = 0; i < MaxNumXfbBuffers; i++) { + const auto& xfbBufferSlice = m_state.xfb.buffers[i]; + const auto& xfbCounterSlice = m_state.xfb.activeCounters[i]; + + if (xfbBufferSlice.length()) { + requiresBarrier |= !xfbBufferSlice.buffer()->trackGfxStores(); + requiresBarrier |= checkBufferBarrier( + xfbBufferSlice, VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, DxvkAccessOp::None); + + if (xfbCounterSlice.length()) { + requiresBarrier |= !xfbCounterSlice.buffer()->trackGfxStores(); + requiresBarrier |= checkBufferBarrier(xfbCounterSlice, + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | + VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT, + DxvkAccessOp::None); + } + } + } + } + + // From now on, we only have read-only resources to check and can + // exit early if we find a hazard. + if (requiresBarrier) + return true; // Check the draw buffer for indirect draw calls if (m_flags.test(DxvkContextFlag::DirtyDrawBuffer) && Indirect) { @@ -6743,26 +6941,24 @@ namespace dxvk { &m_state.id.cntBuffer, }}; - for (uint32_t i = 0; i < slices.size() && !requiresBarrier; i++) { - if ((slices[i]->length()) - && (slices[i]->buffer()->info().access & storageBufferAccess)) { - requiresBarrier = this->checkBufferBarrier(*slices[i], - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, - VK_ACCESS_INDIRECT_COMMAND_READ_BIT); + for (uint32_t i = 0; i < slices.size(); i++) { + if (slices[i]->length() && slices[i]->buffer()->hasGfxStores()) { + if (checkBufferBarrier(*slices[i], + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, DxvkAccessOp::None)) + return true; } } } // Read-only stage, so we only have to check this if // the bindngs have actually changed between draws - if (m_flags.test(DxvkContextFlag::GpDirtyIndexBuffer) && !requiresBarrier && Indexed) { + if (m_flags.test(DxvkContextFlag::GpDirtyIndexBuffer) && Indexed) { const auto& indexBufferSlice = m_state.vi.indexBuffer; - if ((indexBufferSlice.length()) - && (indexBufferSlice.bufferInfo().access & storageBufferAccess)) { - requiresBarrier = this->checkBufferBarrier(indexBufferSlice, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - VK_ACCESS_INDEX_READ_BIT); + if (indexBufferSlice.length() && indexBufferSlice.buffer()->hasGfxStores()) { + if (checkBufferBarrier(indexBufferSlice, + VK_ACCESS_INDEX_READ_BIT, DxvkAccessOp::None)) + return true; } } @@ -6770,163 +6966,53 @@ namespace dxvk { if (m_flags.test(DxvkContextFlag::GpDirtyVertexBuffers)) { uint32_t bindingCount = m_state.gp.state.il.bindingCount(); - for (uint32_t i = 0; i < bindingCount && !requiresBarrier; i++) { + for (uint32_t i = 0; i < bindingCount; i++) { uint32_t binding = m_state.gp.state.ilBindings[i].binding(); const auto& vertexBufferSlice = m_state.vi.vertexBuffers[binding]; - if ((vertexBufferSlice.length()) - && (vertexBufferSlice.bufferInfo().access & storageBufferAccess)) { - requiresBarrier = this->checkBufferBarrier(vertexBufferSlice, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT); + if (vertexBufferSlice.length() && vertexBufferSlice.buffer()->hasGfxStores()) { + if (checkBufferBarrier(vertexBufferSlice, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, DxvkAccessOp::None)) + return true; } } } - // Transform feedback buffer writes won't overlap, so we - // also only need to check those when they are rebound - if (m_flags.test(DxvkContextFlag::GpDirtyXfbBuffers) - && m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasTransformFeedback)) { - for (uint32_t i = 0; i < MaxNumXfbBuffers && !requiresBarrier; i++) { - const auto& xfbBufferSlice = m_state.xfb.buffers[i]; - const auto& xfbCounterSlice = m_state.xfb.activeCounters[i]; - - if (xfbBufferSlice.length()) { - requiresBarrier = this->checkBufferBarrier(xfbBufferSlice, - VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, - VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT); - - if (xfbCounterSlice.length()) { - requiresBarrier |= this->checkBufferBarrier(xfbCounterSlice, - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, - VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | - VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT); - } - } - } - } - - // Check shader resources on every draw to handle WAW hazards - auto layout = m_state.gp.pipeline->getBindings()->layout(); - - for (uint32_t i = 0; i < DxvkDescriptorSets::SetCount && !requiresBarrier; i++) { - uint32_t bindingCount = layout.getBindingCount(i); - - for (uint32_t j = 0; j < bindingCount && !requiresBarrier; j++) { - const DxvkBindingInfo& binding = layout.getBinding(i, j); - const DxvkShaderResourceSlot& slot = m_rc[binding.resourceBinding]; - - switch (binding.descriptorType) { - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - if ((slot.bufferSlice.length()) - && (slot.bufferSlice.bufferInfo().access & storageBufferAccess)) { - requiresBarrier = this->checkBufferBarrier(slot.bufferSlice, - util::pipelineStages(binding.stage), binding.access); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - if ((slot.bufferView != nullptr) - && (slot.bufferView->buffer()->info().access & storageBufferAccess)) { - requiresBarrier = this->checkBufferViewBarrier(slot.bufferView, - util::pipelineStages(binding.stage), binding.access); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - if ((slot.imageView != nullptr) - && (slot.imageView->image()->info().access & storageImageAccess)) { - requiresBarrier = this->checkImageViewBarrier(slot.imageView, - util::pipelineStages(binding.stage), binding.access); - } - break; - - default: - /* nothing to do */; - } - } - } - - // External subpass dependencies serve as full memory - // and execution barriers, so we can use this to allow - // inter-stage synchronization. - if (requiresBarrier) - this->spillRenderPass(true); + return false; } - template + template bool DxvkContext::checkBufferBarrier( const DxvkBufferSlice& bufferSlice, - VkPipelineStageFlags stages, - VkAccessFlags access) { - if constexpr (DoEmit) { - accessBuffer(DxvkCmdBuffer::ExecBuffer, - *bufferSlice.buffer(), bufferSlice.offset(), - bufferSlice.length(), stages, access); - return false; - } else { - return checkResourceBarrier([this, &bufferSlice] (DxvkAccess access) { - return resourceHasAccess(*bufferSlice.buffer(), - bufferSlice.offset(), bufferSlice.length(), access); - }, stages, access); - } + VkAccessFlags access, + DxvkAccessOp accessOp) { + return checkResourceBarrier([this, &bufferSlice, accessOp] (DxvkAccess access) { + return resourceHasAccess(*bufferSlice.buffer(), + bufferSlice.offset(), bufferSlice.length(), access, accessOp); + }, access); } - template + template bool DxvkContext::checkBufferViewBarrier( const Rc& bufferView, - VkPipelineStageFlags stages, - VkAccessFlags access) { - if constexpr (DoEmit) { - accessBuffer(DxvkCmdBuffer::ExecBuffer, - *bufferView, stages, access); - return false; - } else { - return checkResourceBarrier([this, &bufferView] (DxvkAccess access) { - return resourceHasAccess(*bufferView, access); - }, stages, access); - } + VkAccessFlags access, + DxvkAccessOp accessOp) { + return checkResourceBarrier([this, &bufferView, accessOp] (DxvkAccess access) { + return resourceHasAccess(*bufferView, access, accessOp); + }, access); } - template + template bool DxvkContext::checkImageViewBarrier( const Rc& imageView, - VkPipelineStageFlags stages, - VkAccessFlags access) { - if constexpr (DoEmit) { - accessImage(DxvkCmdBuffer::ExecBuffer, - *imageView->image(), - imageView->imageSubresources(), - imageView->image()->info().layout, - stages, access); - return false; - } else { - return checkResourceBarrier([this, &imageView] (DxvkAccess access) { - return resourceHasAccess(*imageView, access); - }, stages, access); - } - } - - - bool DxvkContext::canIgnoreWawHazards(VkPipelineStageFlags stages) { - if (!m_barrierControl.test(DxvkBarrierControl::IgnoreWriteAfterWrite)) - return false; - - if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { - VkPipelineStageFlags2 stageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT - | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT; - return !m_execBarriers.hasPendingStages(~stageMask); - } - - return true; + VkAccessFlags access, + DxvkAccessOp accessOp) { + return checkResourceBarrier([this, &imageView, accessOp] (DxvkAccess access) { + return resourceHasAccess(*imageView, access, accessOp); + }, access); } @@ -6953,6 +7039,9 @@ namespace dxvk { if (m_flags.test(DxvkContextFlag::DirtyDrawBuffer)) { m_flags.clr(DxvkContextFlag::DirtyDrawBuffer); + m_renderPassBarrierSrc.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + m_renderPassBarrierSrc.access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + if (m_state.id.argBuffer.length()) m_cmd->track(m_state.id.argBuffer.buffer(), DxvkAccess::Read); @@ -7650,12 +7739,27 @@ namespace dxvk { const VkImageSubresourceRange& subresources, VkImageLayout srcLayout, VkPipelineStageFlags2 srcStages, - VkAccessFlags2 srcAccess) { + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp) { accessImage(cmdBuffer, image, subresources, srcLayout, srcStages, srcAccess, image.info().layout, image.info().stages, - image.info().access); + image.info().access, + accessOp); + } + + + void DxvkContext::accessImage( + DxvkCmdBuffer cmdBuffer, + const DxvkImageView& imageView, + VkPipelineStageFlags2 srcStages, + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp) { + accessImage(cmdBuffer, *imageView.image(), + imageView.imageSubresources(), + imageView.image()->info().layout, + srcStages, srcAccess, accessOp); } @@ -7668,7 +7772,8 @@ namespace dxvk { VkAccessFlags2 srcAccess, VkImageLayout dstLayout, VkPipelineStageFlags2 dstStages, - VkAccessFlags2 dstAccess) { + VkAccessFlags2 dstAccess, + DxvkAccessOp accessOp) { auto& batch = getBarrierBatch(cmdBuffer); if (srcLayout == VK_IMAGE_LAYOUT_UNDEFINED || srcLayout == VK_IMAGE_LAYOUT_PREINITIALIZED) @@ -7702,9 +7807,9 @@ namespace dxvk { + (subresources.baseArrayLayer + subresources.layerCount - 1u); if (hasWrite) - m_barrierTracker.insertRange(range, DxvkAccess::Write); + m_barrierTracker.insertRange(range, DxvkAccess::Write, accessOp); if (hasRead) - m_barrierTracker.insertRange(range, DxvkAccess::Read); + m_barrierTracker.insertRange(range, DxvkAccess::Read, accessOp); } else { DxvkAddressRange range; range.resource = image.getResourceId(); @@ -7714,9 +7819,9 @@ namespace dxvk { range.rangeEnd = range.rangeStart + subresources.layerCount - 1u; if (hasWrite) - m_barrierTracker.insertRange(range, DxvkAccess::Write); + m_barrierTracker.insertRange(range, DxvkAccess::Write, accessOp); if (hasRead) - m_barrierTracker.insertRange(range, DxvkAccess::Read); + m_barrierTracker.insertRange(range, DxvkAccess::Read, accessOp); } } } @@ -7729,11 +7834,13 @@ namespace dxvk { VkDeviceSize offset, VkDeviceSize size, VkPipelineStageFlags2 srcStages, - VkAccessFlags2 srcAccess) { + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp) { accessBuffer(cmdBuffer, buffer, offset, size, srcStages, srcAccess, buffer.info().stages, - buffer.info().access); + buffer.info().access, + accessOp); } @@ -7745,7 +7852,8 @@ namespace dxvk { VkPipelineStageFlags2 srcStages, VkAccessFlags2 srcAccess, VkPipelineStageFlags2 dstStages, - VkAccessFlags2 dstAccess) { + VkAccessFlags2 dstAccess, + DxvkAccessOp accessOp) { if (unlikely(!size)) return; @@ -7766,23 +7874,57 @@ namespace dxvk { range.rangeEnd = offset + size - 1; if (srcAccess & vk::AccessWriteMask) - m_barrierTracker.insertRange(range, DxvkAccess::Write); + m_barrierTracker.insertRange(range, DxvkAccess::Write, accessOp); if (srcAccess & vk::AccessReadMask) - m_barrierTracker.insertRange(range, DxvkAccess::Read); + m_barrierTracker.insertRange(range, DxvkAccess::Read, accessOp); } } + void DxvkContext::accessBuffer( + DxvkCmdBuffer cmdBuffer, + const DxvkBufferSlice& bufferSlice, + VkPipelineStageFlags2 srcStages, + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp) { + accessBuffer(cmdBuffer, + *bufferSlice.buffer(), + bufferSlice.offset(), + bufferSlice.length(), + srcStages, srcAccess, + accessOp); + } + + + void DxvkContext::accessBuffer( + DxvkCmdBuffer cmdBuffer, + const DxvkBufferSlice& bufferSlice, + VkPipelineStageFlags2 srcStages, + VkAccessFlags2 srcAccess, + VkPipelineStageFlags2 dstStages, + VkAccessFlags2 dstAccess, + DxvkAccessOp accessOp) { + accessBuffer(cmdBuffer, + *bufferSlice.buffer(), + bufferSlice.offset(), + bufferSlice.length(), + srcStages, srcAccess, + dstStages, dstAccess, + accessOp); + } + + void DxvkContext::accessBuffer( DxvkCmdBuffer cmdBuffer, DxvkBufferView& bufferView, VkPipelineStageFlags2 srcStages, - VkAccessFlags2 srcAccess) { + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp) { accessBuffer(cmdBuffer, *bufferView.buffer(), bufferView.info().offset, bufferView.info().size, - srcStages, srcAccess); + srcStages, srcAccess, accessOp); } @@ -7792,13 +7934,42 @@ namespace dxvk { VkPipelineStageFlags2 srcStages, VkAccessFlags2 srcAccess, VkPipelineStageFlags2 dstStages, - VkAccessFlags2 dstAccess) { + VkAccessFlags2 dstAccess, + DxvkAccessOp accessOp) { accessBuffer(cmdBuffer, *bufferView.buffer(), bufferView.info().offset, bufferView.info().size, srcStages, srcAccess, - dstStages, dstAccess); + dstStages, dstAccess, + accessOp); + } + + + void DxvkContext::accessDrawBuffer( + VkDeviceSize offset, + uint32_t count, + uint32_t stride, + uint32_t size) { + uint32_t dataSize = count ? (count - 1u) * stride + size : 0u; + + accessBuffer(DxvkCmdBuffer::ExecBuffer, + *m_state.id.argBuffer.buffer(), + m_state.id.argBuffer.offset() + offset, dataSize, + VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT, + VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR, + DxvkAccessOp::None); + } + + + void DxvkContext::accessDrawCountBuffer( + VkDeviceSize offset) { + accessBuffer(DxvkCmdBuffer::ExecBuffer, + *m_state.id.cntBuffer.buffer(), + m_state.id.cntBuffer.offset() + offset, sizeof(uint32_t), + VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT, + VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR, + DxvkAccessOp::None); } @@ -7807,10 +7978,13 @@ namespace dxvk { VkDeviceSize offset, VkDeviceSize size, DxvkAccess access) { - bool flush = resourceHasAccess(buffer, offset, size, DxvkAccess::Write); + bool flush = resourceHasAccess(buffer, offset, size, + DxvkAccess::Write, DxvkAccessOp::None); - if (access == DxvkAccess::Write && !flush) - flush = resourceHasAccess(buffer, offset, size, DxvkAccess::Read); + if (access == DxvkAccess::Write && !flush) { + flush = resourceHasAccess(buffer, offset, size, + DxvkAccess::Read, DxvkAccessOp::None); + } if (flush) flushBarriers(); @@ -7831,10 +8005,13 @@ namespace dxvk { DxvkImage& image, const VkImageSubresourceRange& subresources, DxvkAccess access) { - bool flush = resourceHasAccess(image, subresources, DxvkAccess::Write); + bool flush = resourceHasAccess(image, subresources, + DxvkAccess::Write, DxvkAccessOp::None); - if (access == DxvkAccess::Write && !flush) - flush = resourceHasAccess(image, subresources, DxvkAccess::Read); + if (access == DxvkAccess::Write && !flush) { + flush = resourceHasAccess(image, subresources, + DxvkAccess::Read, DxvkAccessOp::None); + } if (flush) flushBarriers(); @@ -7852,6 +8029,8 @@ namespace dxvk { void DxvkContext::flushBarriers() { m_execBarriers.flush(m_cmd); m_barrierTracker.clear(); + + m_flags.clr(DxvkContextFlag::ForceWriteAfterWriteSync); } @@ -7859,7 +8038,8 @@ namespace dxvk { DxvkBuffer& buffer, VkDeviceSize offset, VkDeviceSize size, - DxvkAccess access) { + DxvkAccess access, + DxvkAccessOp accessOp) { if (unlikely(!size)) return false; @@ -7868,23 +8048,25 @@ namespace dxvk { range.rangeStart = offset; range.rangeEnd = offset + size - 1; - return m_barrierTracker.findRange(range, access); + return m_barrierTracker.findRange(range, access, accessOp); } bool DxvkContext::resourceHasAccess( DxvkBufferView& bufferView, - DxvkAccess access) { + DxvkAccess access, + DxvkAccessOp accessOp) { return resourceHasAccess(*bufferView.buffer(), bufferView.info().offset, - bufferView.info().size, access); + bufferView.info().size, access, accessOp); } bool DxvkContext::resourceHasAccess( DxvkImage& image, const VkImageSubresourceRange& subresources, - DxvkAccess access) { + DxvkAccess access, + DxvkAccessOp accessOp) { uint32_t layerCount = image.info().numLayers; // Subresources are enumerated in such a way that array layers of @@ -7900,7 +8082,7 @@ namespace dxvk { // Probe all subresources first, only check individual mip levels // if there are overlaps and if we are checking a subset of array // layers of multiple mips. - bool dirty = m_barrierTracker.findRange(range, access); + bool dirty = m_barrierTracker.findRange(range, access, accessOp); if (!dirty || subresources.levelCount == 1u || subresources.layerCount == layerCount) return dirty; @@ -7909,7 +8091,7 @@ namespace dxvk { range.rangeStart = i * layerCount + subresources.baseArrayLayer; range.rangeEnd = range.rangeStart + subresources.layerCount - 1u; - dirty = m_barrierTracker.findRange(range, access); + dirty = m_barrierTracker.findRange(range, access, accessOp); } return dirty; @@ -7918,8 +8100,9 @@ namespace dxvk { bool DxvkContext::resourceHasAccess( DxvkImageView& imageView, - DxvkAccess access) { - return resourceHasAccess(*imageView.image(), imageView.imageSubresources(), access); + DxvkAccess access, + DxvkAccessOp accessOp) { + return resourceHasAccess(*imageView.image(), imageView.imageSubresources(), access, accessOp); } @@ -8074,23 +8257,42 @@ namespace dxvk { } - void DxvkContext::beginInternalDebugRegion(const VkDebugUtilsLabelEXT& label) { - if (m_features.test(DxvkContextFeature::DebugUtils)) { - // If the app provides us with debug regions, don't add any - // internal ones to avoid potential issues with scoping. - if (m_debugLabelStack.empty()) { - m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer, label); - m_debugLabelInternalActive = true; - } - } + void DxvkContext::pushDebugRegion(const VkDebugUtilsLabelEXT& label, util::DxvkDebugLabelType type) { + m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer, label); + m_debugLabelStack.emplace_back(label, type); } - void DxvkContext::endInternalDebugRegion() { - if (m_debugLabelInternalActive) { - m_debugLabelInternalActive = false; + void DxvkContext::popDebugRegion(util::DxvkDebugLabelType type) { + // Find last active region of the given type + size_t index = m_debugLabelStack.size(); + + while (index && m_debugLabelStack[index - 1u].type() != type) + index -= 1u; + + if (!index) + return; + + // End all debug regions inside the scope we want to end, as + // well as the debug region of the requested type itself + for (size_t i = index; i <= m_debugLabelStack.size(); i++) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); + + // Re-emit nested debug regions and erase the region we ended + for (size_t i = index; i < m_debugLabelStack.size(); i++) { + m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer, m_debugLabelStack[i].get()); + m_debugLabelStack[i - 1u] = m_debugLabelStack[i]; } + + m_debugLabelStack.pop_back(); + } + + + bool DxvkContext::hasDebugRegion( + util::DxvkDebugLabelType type) { + auto e = std::find_if(m_debugLabelStack.crbegin(), m_debugLabelStack.crend(), + [type] (const util::DxvkDebugLabel& label) { return label.type() == type; }); + return e != m_debugLabelStack.crend(); } diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index b841549b5..921405472 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -763,11 +763,14 @@ namespace dxvk { * \param [in] offset Draw buffer offset * \param [in] count Number of draws * \param [in] stride Stride between dispatch calls + * \param [in] unroll Whether to unroll multiple draws if + * there are any potential data dependencies between them. */ void drawIndirect( VkDeviceSize offset, uint32_t count, - uint32_t stride); + uint32_t stride, + bool unroll); /** * \brief Indirect draw call @@ -809,12 +812,15 @@ namespace dxvk { * \param [in] offset Draw buffer offset * \param [in] count Number of draws * \param [in] stride Stride between dispatch calls + * \param [in] unroll Whether to unroll multiple draws if + * there are any potential data dependencies between them. */ void drawIndexedIndirect( VkDeviceSize offset, uint32_t count, - uint32_t stride); - + uint32_t stride, + bool unroll); + /** * \brief Indirect indexed draw call * @@ -832,14 +838,14 @@ namespace dxvk { uint32_t stride); /** - * \brief Transform feddback draw call - - * \param [in] counterBuffer Xfb counter buffer + * \brief Transform feedback draw call + * + * \param [in] counterOffset Draw count offset * \param [in] counterDivisor Vertex stride * \param [in] counterBias Counter bias */ void drawIndirectXfb( - const DxvkBufferSlice& counterBuffer, + VkDeviceSize counterOffset, uint32_t counterDivisor, uint32_t counterBias); @@ -1434,9 +1440,9 @@ namespace dxvk { DxvkBarrierControlFlags m_barrierControl; DxvkGpuQueryManager m_queryManager; - - DxvkGlobalPipelineBarrier m_globalRoGraphicsBarrier; - DxvkGlobalPipelineBarrier m_globalRwGraphicsBarrier; + + DxvkGlobalPipelineBarrier m_renderPassBarrierSrc = { }; + DxvkGlobalPipelineBarrier m_renderPassBarrierDst = { }; DxvkRenderTargetLayouts m_rtLayouts = { }; @@ -1453,7 +1459,6 @@ namespace dxvk { std::vector m_imageLayoutTransitions; std::vector m_debugLabelStack; - bool m_debugLabelInternalActive = false; Rc m_latencyTracker; uint64_t m_latencyFrameId = 0u; @@ -1590,6 +1595,20 @@ namespace dxvk { const Rc& buffer, VkDeviceSize offset); + template + void drawIndirectGeneric( + VkDeviceSize offset, + uint32_t count, + uint32_t stride, + bool unroll); + + template + void drawIndirectCountGeneric( + VkDeviceSize offset, + VkDeviceSize countOffset, + uint32_t maxCount, + uint32_t stride); + void resolveImageHw( const Rc& dstImage, const Rc& srcImage, @@ -1691,7 +1710,9 @@ namespace dxvk { void unbindGraphicsPipeline(); bool updateGraphicsPipeline(); - bool updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier); + bool updateGraphicsPipelineState(); + + uint32_t getGraphicsPipelineDebugColor() const; template void resetSpecConstants( @@ -1757,34 +1778,61 @@ namespace dxvk { template bool commitGraphicsState(); - template - void commitComputeBarriers(); + template + bool checkResourceHazards( + const DxvkBindingLayout& layout, + uint32_t setMask); - void commitComputePostBarriers(); - - template - void commitGraphicsBarriers(); + bool checkComputeHazards(); - template + template + bool checkGraphicsHazards(); + + template bool checkBufferBarrier( const DxvkBufferSlice& bufferSlice, - VkPipelineStageFlags stages, - VkAccessFlags access); + VkAccessFlags access, + DxvkAccessOp accessOp); - template + template bool checkBufferViewBarrier( const Rc& bufferView, - VkPipelineStageFlags stages, - VkAccessFlags access); + VkAccessFlags access, + DxvkAccessOp accessOp); - template + template bool checkImageViewBarrier( const Rc& imageView, - VkPipelineStageFlags stages, - VkAccessFlags access); + VkAccessFlags access, + DxvkAccessOp accessOp); + + template + DxvkAccessFlags getAllowedStorageHazards() { + if (m_barrierControl.isClear() || m_flags.test(DxvkContextFlag::ForceWriteAfterWriteSync)) + return DxvkAccessFlags(); + + if constexpr (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE) { + // If there are any pending accesses that are not directly related + // to shader dispatches, always insert a barrier if there is a hazard. + VkPipelineStageFlags2 stageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT + | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT; + + if (!m_execBarriers.hasPendingStages(~stageMask)) { + if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowReadWriteOverlap)) + return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read); + else if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap)) + return DxvkAccessFlags(DxvkAccess::Write); + } + } else { + // For graphics, the only type of unrelated access we have to worry about + // is transform feedback writes, in which case inserting a barrier is fine. + if (m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap)) + return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read); + } + + return DxvkAccessFlags(); + } - bool canIgnoreWawHazards( - VkPipelineStageFlags stages); void emitMemoryBarrier( VkPipelineStageFlags srcStages, @@ -1876,7 +1924,15 @@ namespace dxvk { const VkImageSubresourceRange& subresources, VkImageLayout srcLayout, VkPipelineStageFlags2 srcStages, - VkAccessFlags2 srcAccess); + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp); + + void accessImage( + DxvkCmdBuffer cmdBuffer, + const DxvkImageView& imageView, + VkPipelineStageFlags2 srcStages, + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp); void accessImage( DxvkCmdBuffer cmdBuffer, @@ -1887,7 +1943,8 @@ namespace dxvk { VkAccessFlags2 srcAccess, VkImageLayout dstLayout, VkPipelineStageFlags2 dstStages, - VkAccessFlags2 dstAccess); + VkAccessFlags2 dstAccess, + DxvkAccessOp accessOp); void accessBuffer( DxvkCmdBuffer cmdBuffer, @@ -1895,7 +1952,8 @@ namespace dxvk { VkDeviceSize offset, VkDeviceSize size, VkPipelineStageFlags2 srcStages, - VkAccessFlags2 srcAccess); + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp); void accessBuffer( DxvkCmdBuffer cmdBuffer, @@ -1905,13 +1963,31 @@ namespace dxvk { VkPipelineStageFlags2 srcStages, VkAccessFlags2 srcAccess, VkPipelineStageFlags2 dstStages, - VkAccessFlags2 dstAccess); + VkAccessFlags2 dstAccess, + DxvkAccessOp accessOp); + + void accessBuffer( + DxvkCmdBuffer cmdBuffer, + const DxvkBufferSlice& bufferSlice, + VkPipelineStageFlags2 srcStages, + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp); + + void accessBuffer( + DxvkCmdBuffer cmdBuffer, + const DxvkBufferSlice& bufferSlice, + VkPipelineStageFlags2 srcStages, + VkAccessFlags2 srcAccess, + VkPipelineStageFlags2 dstStages, + VkAccessFlags2 dstAccess, + DxvkAccessOp accessOp); void accessBuffer( DxvkCmdBuffer cmdBuffer, DxvkBufferView& bufferView, VkPipelineStageFlags2 srcStages, - VkAccessFlags2 srcAccess); + VkAccessFlags2 srcAccess, + DxvkAccessOp accessOp); void accessBuffer( DxvkCmdBuffer cmdBuffer, @@ -1919,7 +1995,17 @@ namespace dxvk { VkPipelineStageFlags2 srcStages, VkAccessFlags2 srcAccess, VkPipelineStageFlags2 dstStages, - VkAccessFlags2 dstAccess); + VkAccessFlags2 dstAccess, + DxvkAccessOp accessOp); + + void accessDrawBuffer( + VkDeviceSize offset, + uint32_t count, + uint32_t stride, + uint32_t size); + + void accessDrawCountBuffer( + VkDeviceSize offset); void flushPendingAccesses( DxvkBuffer& buffer, @@ -1946,20 +2032,24 @@ namespace dxvk { DxvkBuffer& buffer, VkDeviceSize offset, VkDeviceSize size, - DxvkAccess access); + DxvkAccess access, + DxvkAccessOp accessOp); bool resourceHasAccess( DxvkBufferView& bufferView, - DxvkAccess access); + DxvkAccess access, + DxvkAccessOp accessOp); bool resourceHasAccess( DxvkImage& image, const VkImageSubresourceRange& subresources, - DxvkAccess access); + DxvkAccess access, + DxvkAccessOp accessOp); bool resourceHasAccess( DxvkImageView& imageView, - DxvkAccess access); + DxvkAccess access, + DxvkAccessOp accessOp); DxvkBarrierBatch& getBarrierBatch( DxvkCmdBuffer cmdBuffer); @@ -1980,34 +2070,55 @@ namespace dxvk { const Rc& image, DxvkAccess access); - template + template bool checkResourceBarrier( const Pred& pred, - VkPipelineStageFlags stages, VkAccessFlags access) { - // Check for read-after-write first, this is common + // If we're only reading the resource, only pending + // writes matter for synchronization purposes. bool hasPendingWrite = pred(DxvkAccess::Write); - if (access & vk::AccessReadMask) + if (!(access & vk::AccessWriteMask)) return hasPendingWrite; - // Check for a write-after-write hazard, but - // ignore it if there are no reads involved. - bool ignoreWaW = canIgnoreWawHazards(stages); + if (hasPendingWrite) { + // If there is a write-after-write hazard and synchronization + // for those is not explicitly disabled, insert a barrier. + DxvkAccessFlags allowedHazards = getAllowedStorageHazards(); - if (hasPendingWrite && !ignoreWaW) - return true; + if (!allowedHazards.test(DxvkAccess::Write)) + return true; - // Check whether there are any pending reads. + // Skip barrier if overlapping read-modify-write ops are allowed. + // This includes shader atomics, but also non-atomic load-stores. + if (allowedHazards.test(DxvkAccess::Read)) + return false; + + // Otherwise, check if there is a read-after-write hazard. + if (access & vk::AccessReadMask) + return true; + } + + // Check if there are any pending reads to avoid write-after-read issues. return pred(DxvkAccess::Read); } + void invalidateWriteAfterWriteTracking(); + void beginRenderPassDebugRegion(); - void beginInternalDebugRegion( - const VkDebugUtilsLabelEXT& label); + template + void beginBarrierControlDebugRegion(); - void endInternalDebugRegion(); + void pushDebugRegion( + const VkDebugUtilsLabelEXT& label, + util::DxvkDebugLabelType type); + + void popDebugRegion( + util::DxvkDebugLabelType type); + + bool hasDebugRegion( + util::DxvkDebugLabelType type); void beginActiveDebugRegions(); diff --git a/src/dxvk/dxvk_context_state.h b/src/dxvk/dxvk_context_state.h index 2a753fee3..f91357e1e 100644 --- a/src/dxvk/dxvk_context_state.h +++ b/src/dxvk/dxvk_context_state.h @@ -20,10 +20,11 @@ namespace dxvk { * of the graphics and compute pipelines * has changed and/or needs to be updated. */ - enum class DxvkContextFlag : uint32_t { + enum class DxvkContextFlag : uint64_t { GpRenderPassBound, ///< Render pass is currently bound GpRenderPassSuspended, ///< Render pass is currently suspended GpRenderPassSecondaryCmd, ///< Render pass uses secondary command buffer + GpRenderPassSideEffects, ///< Render pass has side effects GpXfbActive, ///< Transform feedback is enabled GpDirtyFramebuffer, ///< Framebuffer binding is out of date GpDirtyPipeline, ///< Graphics pipeline binding is out of date @@ -56,10 +57,12 @@ namespace dxvk { DirtyDrawBuffer, ///< Indirect argument buffer is dirty DirtyPushConstants, ///< Push constant data has changed + ForceWriteAfterWriteSync, ///< Ignores barrier control flags for write-after-write hazards + Count }; - static_assert(uint32_t(DxvkContextFlag::Count) <= 32u); + static_assert(uint32_t(DxvkContextFlag::Count) <= 64u); using DxvkContextFlags = Flags; @@ -85,8 +88,11 @@ namespace dxvk { * synchronize implicitly. */ enum class DxvkBarrierControl : uint32_t { - IgnoreWriteAfterWrite = 1, - IgnoreGraphicsBarriers = 2, + // Ignores write-after-write hazard + ComputeAllowWriteOnlyOverlap = 0, + ComputeAllowReadWriteOverlap = 1, + + GraphicsAllowReadWriteOverlap = 2, }; using DxvkBarrierControlFlags = Flags; diff --git a/src/dxvk/dxvk_graphics.cpp b/src/dxvk/dxvk_graphics.cpp index 7a8ed26a0..9c12ae1f5 100644 --- a/src/dxvk/dxvk_graphics.cpp +++ b/src/dxvk/dxvk_graphics.cpp @@ -955,8 +955,7 @@ namespace dxvk { if (m_shaders.gs->flags().test(DxvkShaderFlag::HasTransformFeedback)) { m_flags.set(DxvkGraphicsPipelineFlag::HasTransformFeedback); - m_barrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT - | VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; + m_barrier.stages |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; m_barrier.access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT; @@ -966,9 +965,13 @@ namespace dxvk { m_flags.set(DxvkGraphicsPipelineFlag::HasRasterizerDiscard); } - if (m_barrier.access & VK_ACCESS_SHADER_WRITE_BIT) + if (m_barrier.access & VK_ACCESS_SHADER_WRITE_BIT) { m_flags.set(DxvkGraphicsPipelineFlag::HasStorageDescriptors); + if (layout->layout().getHazardousSetMask()) + m_flags.set(DxvkGraphicsPipelineFlag::UnrollMergedDraws); + } + if (m_shaders.fs != nullptr) { if (m_shaders.fs->flags().test(DxvkShaderFlag::HasSampleRateShading)) m_flags.set(DxvkGraphicsPipelineFlag::HasSampleRateShading); diff --git a/src/dxvk/dxvk_graphics.h b/src/dxvk/dxvk_graphics.h index 3503fd896..2dde3e162 100644 --- a/src/dxvk/dxvk_graphics.h +++ b/src/dxvk/dxvk_graphics.h @@ -31,6 +31,7 @@ namespace dxvk { HasStorageDescriptors, HasSampleRateShading, HasSampleMaskExport, + UnrollMergedDraws, }; using DxvkGraphicsPipelineFlags = Flags; @@ -660,4 +661,4 @@ namespace dxvk { }; -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_pipelayout.cpp b/src/dxvk/dxvk_pipelayout.cpp index 9132d025c..cf3f69553 100644 --- a/src/dxvk/dxvk_pipelayout.cpp +++ b/src/dxvk/dxvk_pipelayout.cpp @@ -205,7 +205,7 @@ namespace dxvk { DxvkBindingLayout::DxvkBindingLayout(VkShaderStageFlags stages) - : m_pushConst { 0, 0, 0 }, m_pushConstStages(0), m_stages(stages) { + : m_pushConst { 0, 0, 0 }, m_pushConstStages(0), m_stages(stages), m_hazards(0u) { } @@ -236,6 +236,9 @@ namespace dxvk { void DxvkBindingLayout::addBinding(const DxvkBindingInfo& binding) { uint32_t set = binding.computeSetIndex(); m_bindings[set].addBinding(binding); + + if ((binding.access & VK_ACCESS_2_SHADER_WRITE_BIT) && binding.accessOp == DxvkAccessOp::None) + m_hazards |= 1u << set; } @@ -260,6 +263,8 @@ namespace dxvk { addPushConstantRange(layout.m_pushConst); m_pushConstStages |= layout.m_pushConstStages; + + m_hazards |= layout.m_hazards; } @@ -400,4 +405,4 @@ namespace dxvk { return barrier; } -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_pipelayout.h b/src/dxvk/dxvk_pipelayout.h index cccfb336e..4f43b60ad 100644 --- a/src/dxvk/dxvk_pipelayout.h +++ b/src/dxvk/dxvk_pipelayout.h @@ -11,6 +11,27 @@ namespace dxvk { class DxvkDevice; + /** + * \brief Order-invariant atomic access operation + * + * Information used to optimize barriers when a resource + * is accessed exlusively via order-invariant stores. + */ + enum class DxvkAccessOp : uint32_t { + None = 0, + Or = 1, + And = 2, + Xor = 3, + Add = 4, + IMin = 5, + IMax = 6, + UMin = 7, + UMax = 8, + }; + + using DxvkAccessOps = Flags; + + /** * \brief Descriptor set indices */ @@ -37,6 +58,7 @@ namespace dxvk { VkShaderStageFlagBits stage; ///< Shader stage VkAccessFlags access; ///< Access mask for the resource VkBool32 uboSet; ///< Whether to include this in the UBO set + DxvkAccessOp accessOp; ///< Order-invariant store type, if any /** * \brief Computes descriptor set index for the given binding @@ -315,6 +337,16 @@ namespace dxvk { return m_stages; } + /** + * \brief Queries hazardous sets + * + * \returns Mask of sets with storage descriptors + * that are not accessed in an order-invariant way. + */ + uint32_t getHazardousSetMask() const { + return m_hazards; + } + /** * \brief Queries defined descriptor set layouts * @@ -372,6 +404,7 @@ namespace dxvk { VkPushConstantRange m_pushConst; VkShaderStageFlags m_pushConstStages; VkShaderStageFlags m_stages; + uint32_t m_hazards; }; @@ -590,4 +623,4 @@ namespace dxvk { }; -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_sparse.h b/src/dxvk/dxvk_sparse.h index 7eb9385a9..d6e71ca17 100644 --- a/src/dxvk/dxvk_sparse.h +++ b/src/dxvk/dxvk_sparse.h @@ -580,6 +580,27 @@ namespace dxvk { m_trackId = 0u; } + /** + * \brief Checks whether the buffer has been used for gfx stores + * + * \returns \c true if any graphics pipeline has written this + * resource via transform feedback or a storage descriptor. + */ + bool hasGfxStores() const { + return m_hasGfxStores; + } + + /** + * \brief Tracks graphics pipeline side effects + * + * Must be called whenever the resource is written via graphics + * pipeline storage descriptors or transform feedback. + * \returns \c true if side effects were already tracked. + */ + bool trackGfxStores() { + return std::exchange(m_hasGfxStores, true); + } + /** * \brief Queries sparse page table * @@ -622,6 +643,8 @@ namespace dxvk { uint64_t m_trackId = { 0u }; uint64_t m_cookie = { 0u }; + bool m_hasGfxStores = false; + static constexpr uint64_t getIncrement(DxvkAccess access) { return uint64_t(1u) << (uint32_t(access) * 20u); } diff --git a/src/dxvk/dxvk_util.h b/src/dxvk/dxvk_util.h index 863647f24..49a63da1c 100644 --- a/src/dxvk/dxvk_util.h +++ b/src/dxvk/dxvk_util.h @@ -4,6 +4,15 @@ namespace dxvk::util { + /** + * \brief Debug utils label type + */ + enum class DxvkDebugLabelType : uint32_t { + External, ///< App-provided scope + InternalRenderPass, ///< Internal render pass markers + InternalBarrierControl, ///< Barrier control markers + }; + /** * \brief Debug label wrapper * @@ -16,12 +25,16 @@ namespace dxvk::util { DxvkDebugLabel() = default; - DxvkDebugLabel(const VkDebugUtilsLabelEXT& label) - : m_text(label.pLabelName ? label.pLabelName : "") { + DxvkDebugLabel(const VkDebugUtilsLabelEXT& label, DxvkDebugLabelType type) + : m_text(label.pLabelName ? label.pLabelName : ""), m_type(type) { for (uint32_t i = 0; i < m_color.size(); i++) m_color[i] = label.color[i]; } + DxvkDebugLabelType type() const { + return m_type; + } + VkDebugUtilsLabelEXT get() const { VkDebugUtilsLabelEXT label = { VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT }; label.pLabelName = m_text.c_str(); @@ -34,6 +47,7 @@ namespace dxvk::util { std::string m_text; std::array m_color = { }; + DxvkDebugLabelType m_type; }; diff --git a/src/util/com/com_private_data.h b/src/util/com/com_private_data.h index 0673f8e2d..8a21d4153 100644 --- a/src/util/com/com_private_data.h +++ b/src/util/com/com_private_data.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include "com_include.h" @@ -9,7 +11,7 @@ namespace dxvk { /** * \brief COM private data entry type */ - enum ComPrivateDataType { + enum class ComPrivateDataType : uint32_t { None, Data, Iface, diff --git a/src/util/config/config.cpp b/src/util/config/config.cpp index 45e3f8c8d..9a65d478c 100644 --- a/src/util/config/config.cpp +++ b/src/util/config/config.cpp @@ -291,12 +291,11 @@ namespace dxvk { /* Final Fantasy XV: VXAO does thousands of * * draw calls with the same UAV bound */ { R"(\\ffxv_s\.exe$)", {{ - { "d3d11.ignoreGraphicsBarriers", "True" }, + { "d3d11.relaxedGraphicsBarriers", "True" }, }} }, /* God of War - relies on NVAPI/AMDAGS for * * barrier stuff, needs nvapi for DLSS */ { R"(\\GoW\.exe$)", {{ - { "d3d11.ignoreGraphicsBarriers", "True" }, { "d3d11.relaxedBarriers", "True" }, { "dxgi.hideNvidiaGpu", "False" }, { "dxgi.maxFrameLatency", "1" }, @@ -334,7 +333,7 @@ namespace dxvk { * presumably for culling, which doesn't play * * nicely with D3D11 without vendor libraries */ { R"(\\Stray-Win64-Shipping\.exe$)", {{ - { "d3d11.ignoreGraphicsBarriers", "True" }, + { "d3d11.relaxedGraphicsBarriers", "True" }, }} }, /* Metal Gear Solid V: Ground Zeroes * * Texture quality can break at high vram */ @@ -433,7 +432,7 @@ namespace dxvk { * and assumes that AMD GPUs do not expose * * native command lists for AGS usage */ { R"(\\granblue_fantasy_relink\.exe$)", {{ - { "d3d11.ignoreGraphicsBarriers", "True" }, + { "d3d11.relaxedGraphicsBarriers", "True" }, { "d3d11.exposeDriverCommandLists", "False" }, { "dxgi.hideNvidiaGpu", "False" }, }} }, diff --git a/src/vulkan/vulkan_util.h b/src/vulkan/vulkan_util.h index 2995c4f7d..f872d5801 100644 --- a/src/vulkan/vulkan_util.h +++ b/src/vulkan/vulkan_util.h @@ -40,6 +40,11 @@ namespace dxvk::vk { = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT; + constexpr static VkAccessFlags AccessGfxSideEffectMask + = VK_ACCESS_SHADER_WRITE_BIT + | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT + | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; + constexpr static VkPipelineStageFlags StageDeviceMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT