mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-06 20:58:37 +01:00
Merge branch 'low-latency-framepacing' into low-latency-framepacing-PR
This commit is contained in:
commit
9bf588873e
56 changed files with 1443 additions and 766 deletions
|
@ -39,7 +39,7 @@
|
||||||
#
|
#
|
||||||
# Supported values: "max-frame-latency", "low-latency", "min-latency"
|
# Supported values: "max-frame-latency", "low-latency", "min-latency"
|
||||||
|
|
||||||
# dxvk.framePacing = ""
|
# dxvk.framePace = ""
|
||||||
|
|
||||||
|
|
||||||
# Allows fine-tuning the low-latency frame pacing mode.
|
# Allows fine-tuning the low-latency frame pacing mode.
|
||||||
|
@ -149,7 +149,7 @@
|
||||||
# The implementation will either use VK_NV_low_latency2 if supported
|
# The implementation will either use VK_NV_low_latency2 if supported
|
||||||
# by the driver, or a custom algorithm.
|
# by the driver, or a custom algorithm.
|
||||||
# - False: Disable Reflex support as well as built-in latency reduction.
|
# - False: Disable Reflex support as well as built-in latency reduction.
|
||||||
# This build defaults to False to enable dxvk.framePacing. You need to
|
# This build defaults to False to enable dxvk.framePace. You need to
|
||||||
# enable Reflex manually (Auto) until we support switching back and
|
# enable Reflex manually (Auto) until we support switching back and
|
||||||
# forth between Reflex and the low-latency frame pacing - for example
|
# forth between Reflex and the low-latency frame pacing - for example
|
||||||
# via the ingame options - and more critically we want to enable
|
# via the ingame options - and more critically we want to enable
|
||||||
|
|
|
@ -1009,10 +1009,22 @@ namespace dxvk {
|
||||||
if (!ctrBuf.defined())
|
if (!ctrBuf.defined())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
EmitCs([=] (DxvkContext* ctx) {
|
// We bind the SO counter as an indirect count buffer,
|
||||||
ctx->drawIndirectXfb(ctrBuf,
|
// so reset any tracking we may have been doing here.
|
||||||
|
m_state.id.reset();
|
||||||
|
|
||||||
|
EmitCs([=] (DxvkContext* ctx) mutable {
|
||||||
|
ctx->bindDrawBuffers(DxvkBufferSlice(),
|
||||||
|
Forwarder::move(ctrBuf));
|
||||||
|
|
||||||
|
ctx->drawIndirectXfb(0u,
|
||||||
vtxBuf.buffer()->getXfbVertexStride(),
|
vtxBuf.buffer()->getXfbVertexStride(),
|
||||||
vtxBuf.offset());
|
vtxBuf.offset());
|
||||||
|
|
||||||
|
// Reset draw buffer right away so we don't
|
||||||
|
// keep the SO counter alive indefinitely
|
||||||
|
ctx->bindDrawBuffers(DxvkBufferSlice(),
|
||||||
|
DxvkBufferSlice());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1109,7 +1121,7 @@ namespace dxvk {
|
||||||
} else {
|
} else {
|
||||||
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
|
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
|
||||||
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
|
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
|
||||||
ctx->drawIndexedIndirect(data->offset, data->count, data->stride);
|
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
|
||||||
});
|
});
|
||||||
|
|
||||||
cmdData->type = D3D11CmdType::DrawIndirectIndexed;
|
cmdData->type = D3D11CmdType::DrawIndirectIndexed;
|
||||||
|
@ -1144,7 +1156,7 @@ namespace dxvk {
|
||||||
} else {
|
} else {
|
||||||
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
|
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
|
||||||
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
|
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
|
||||||
ctx->drawIndirect(data->offset, data->count, data->stride);
|
ctx->drawIndirect(data->offset, data->count, data->stride, true);
|
||||||
});
|
});
|
||||||
|
|
||||||
cmdData->type = D3D11CmdType::DrawIndirect;
|
cmdData->type = D3D11CmdType::DrawIndirect;
|
||||||
|
@ -4632,10 +4644,6 @@ namespace dxvk {
|
||||||
ApplyRasterizerSampleCount();
|
ApplyRasterizerSampleCount();
|
||||||
ApplyViewportState();
|
ApplyViewportState();
|
||||||
|
|
||||||
BindDrawBuffers(
|
|
||||||
m_state.id.argBuffer.ptr(),
|
|
||||||
m_state.id.cntBuffer.ptr());
|
|
||||||
|
|
||||||
BindIndexBuffer(
|
BindIndexBuffer(
|
||||||
m_state.ia.indexBuffer.buffer.ptr(),
|
m_state.ia.indexBuffer.buffer.ptr(),
|
||||||
m_state.ia.indexBuffer.offset,
|
m_state.ia.indexBuffer.offset,
|
||||||
|
@ -4674,6 +4682,11 @@ namespace dxvk {
|
||||||
RestoreSamplers<DxbcProgramType::GeometryShader>();
|
RestoreSamplers<DxbcProgramType::GeometryShader>();
|
||||||
RestoreSamplers<DxbcProgramType::PixelShader>();
|
RestoreSamplers<DxbcProgramType::PixelShader>();
|
||||||
RestoreSamplers<DxbcProgramType::ComputeShader>();
|
RestoreSamplers<DxbcProgramType::ComputeShader>();
|
||||||
|
|
||||||
|
// Draw buffer bindings aren't persistent at the API level, and
|
||||||
|
// we can't meaningfully track them. Just reset this state here
|
||||||
|
// and reapply on the next indirect draw.
|
||||||
|
SetDrawBuffers(nullptr, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -5000,10 +5013,13 @@ namespace dxvk {
|
||||||
auto argBuffer = static_cast<D3D11Buffer*>(pBufferForArgs);
|
auto argBuffer = static_cast<D3D11Buffer*>(pBufferForArgs);
|
||||||
auto cntBuffer = static_cast<D3D11Buffer*>(pBufferForCount);
|
auto cntBuffer = static_cast<D3D11Buffer*>(pBufferForCount);
|
||||||
|
|
||||||
if (m_state.id.argBuffer != argBuffer
|
auto argBufferCookie = argBuffer ? argBuffer->GetCookie() : 0u;
|
||||||
|| m_state.id.cntBuffer != cntBuffer) {
|
auto cntBufferCookie = cntBuffer ? cntBuffer->GetCookie() : 0u;
|
||||||
m_state.id.argBuffer = argBuffer;
|
|
||||||
m_state.id.cntBuffer = cntBuffer;
|
if (m_state.id.argBufferCookie != argBufferCookie
|
||||||
|
|| m_state.id.cntBufferCookie != cntBufferCookie) {
|
||||||
|
m_state.id.argBufferCookie = argBufferCookie;
|
||||||
|
m_state.id.cntBufferCookie = cntBufferCookie;
|
||||||
|
|
||||||
BindDrawBuffers(argBuffer, cntBuffer);
|
BindDrawBuffers(argBuffer, cntBuffer);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1084,7 +1084,7 @@ namespace dxvk {
|
||||||
UINT SampleMask);
|
UINT SampleMask);
|
||||||
|
|
||||||
template<bool AllowFlush = !IsDeferred, typename Cmd>
|
template<bool AllowFlush = !IsDeferred, typename Cmd>
|
||||||
void EmitCs(Cmd&& command) {
|
void EmitCs(Cmd&& command, bool disableFlush=false ) {
|
||||||
m_cmdData = nullptr;
|
m_cmdData = nullptr;
|
||||||
|
|
||||||
if (unlikely(!m_csChunk->push(command))) {
|
if (unlikely(!m_csChunk->push(command))) {
|
||||||
|
@ -1092,6 +1092,7 @@ namespace dxvk {
|
||||||
m_csChunk = AllocCsChunk();
|
m_csChunk = AllocCsChunk();
|
||||||
|
|
||||||
if constexpr (AllowFlush)
|
if constexpr (AllowFlush)
|
||||||
|
if (!disableFlush)
|
||||||
GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint);
|
GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint);
|
||||||
|
|
||||||
m_csChunk->push(command);
|
m_csChunk->push(command);
|
||||||
|
|
|
@ -53,7 +53,7 @@ namespace dxvk {
|
||||||
cOffset = ByteOffsetForArgs,
|
cOffset = ByteOffsetForArgs,
|
||||||
cStride = ByteStrideForArgs
|
cStride = ByteStrideForArgs
|
||||||
] (DxvkContext* ctx) {
|
] (DxvkContext* ctx) {
|
||||||
ctx->drawIndirect(cOffset, cCount, cStride);
|
ctx->drawIndirect(cOffset, cCount, cStride, false);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ namespace dxvk {
|
||||||
cOffset = ByteOffsetForArgs,
|
cOffset = ByteOffsetForArgs,
|
||||||
cStride = ByteStrideForArgs
|
cStride = ByteStrideForArgs
|
||||||
] (DxvkContext* ctx) {
|
] (DxvkContext* ctx) {
|
||||||
ctx->drawIndexedIndirect(cOffset, cCount, cStride);
|
ctx->drawIndexedIndirect(cOffset, cCount, cStride, false);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,11 +146,10 @@ namespace dxvk {
|
||||||
D3D11Device* parent = static_cast<D3D11Device*>(m_ctx->GetParentInterface());
|
D3D11Device* parent = static_cast<D3D11Device*>(m_ctx->GetParentInterface());
|
||||||
DxvkBarrierControlFlags flags = parent->GetOptionsBarrierControlFlags();
|
DxvkBarrierControlFlags flags = parent->GetOptionsBarrierControlFlags();
|
||||||
|
|
||||||
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE)
|
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE) {
|
||||||
flags.set(DxvkBarrierControl::IgnoreWriteAfterWrite);
|
flags.set(DxvkBarrierControl::ComputeAllowReadWriteOverlap,
|
||||||
|
DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
|
||||||
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV)
|
}
|
||||||
flags.set(DxvkBarrierControl::IgnoreGraphicsBarriers);
|
|
||||||
|
|
||||||
m_ctx->EmitCs([cFlags = flags] (DxvkContext* ctx) {
|
m_ctx->EmitCs([cFlags = flags] (DxvkContext* ctx) {
|
||||||
ctx->setBarrierControl(cFlags);
|
ctx->setBarrierControl(cFlags);
|
||||||
|
|
|
@ -865,6 +865,11 @@ namespace dxvk {
|
||||||
Rc<DxvkLatencyTracker> LatencyTracker) {
|
Rc<DxvkLatencyTracker> LatencyTracker) {
|
||||||
D3D10DeviceLock lock = LockContext();
|
D3D10DeviceLock lock = LockContext();
|
||||||
|
|
||||||
|
// Don't keep draw buffers alive indefinitely. This cannot be
|
||||||
|
// done in ExecuteFlush because command recording itself might
|
||||||
|
// flush, so no state changes are allowed to happen there.
|
||||||
|
SetDrawBuffers(nullptr, nullptr);
|
||||||
|
|
||||||
EmitCs<false>([
|
EmitCs<false>([
|
||||||
cTracker = std::move(LatencyTracker)
|
cTracker = std::move(LatencyTracker)
|
||||||
] (DxvkContext* ctx) {
|
] (DxvkContext* ctx) {
|
||||||
|
|
|
@ -232,12 +232,12 @@ namespace dxvk {
|
||||||
* argument and draw count buffer.
|
* argument and draw count buffer.
|
||||||
*/
|
*/
|
||||||
struct D3D11ContextStateID {
|
struct D3D11ContextStateID {
|
||||||
Com<D3D11Buffer, false> argBuffer = nullptr;
|
uint64_t argBufferCookie = 0u;
|
||||||
Com<D3D11Buffer, false> cntBuffer = nullptr;
|
uint64_t cntBufferCookie = 0u;
|
||||||
|
|
||||||
void reset() {
|
void reset() {
|
||||||
argBuffer = nullptr;
|
argBufferCookie = 0u;
|
||||||
cntBuffer = nullptr;
|
cntBufferCookie = 0u;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -472,13 +472,13 @@ namespace dxvk {
|
||||||
const Rc<DxvkAdapter>& Adapter);
|
const Rc<DxvkAdapter>& Adapter);
|
||||||
|
|
||||||
DxvkBarrierControlFlags GetOptionsBarrierControlFlags() {
|
DxvkBarrierControlFlags GetOptionsBarrierControlFlags() {
|
||||||
DxvkBarrierControlFlags barrierControl;
|
DxvkBarrierControlFlags barrierControl = 0u;
|
||||||
|
|
||||||
if (m_d3d11Options.relaxedBarriers)
|
if (m_d3d11Options.relaxedBarriers)
|
||||||
barrierControl.set(DxvkBarrierControl::IgnoreWriteAfterWrite);
|
barrierControl.set(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap);
|
||||||
|
|
||||||
if (m_d3d11Options.ignoreGraphicsBarriers)
|
if (m_d3d11Options.relaxedBarriers || m_d3d11Options.relaxedGraphicsBarriers)
|
||||||
barrierControl.set(DxvkBarrierControl::IgnoreGraphicsBarriers);
|
barrierControl.set(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
|
||||||
|
|
||||||
return barrierControl;
|
return barrierControl;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,9 @@ enum D3D11_VK_EXTENSION : uint32_t {
|
||||||
*/
|
*/
|
||||||
enum D3D11_VK_BARRIER_CONTROL : uint32_t {
|
enum D3D11_VK_BARRIER_CONTROL : uint32_t {
|
||||||
D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE = 1 << 0,
|
D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE = 1 << 0,
|
||||||
D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1,
|
|
||||||
|
// Removed:
|
||||||
|
// D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ namespace dxvk {
|
||||||
this->zeroInitWorkgroupMemory = config.getOption<bool>("d3d11.zeroInitWorkgroupMemory", false);
|
this->zeroInitWorkgroupMemory = config.getOption<bool>("d3d11.zeroInitWorkgroupMemory", false);
|
||||||
this->forceVolatileTgsmAccess = config.getOption<bool>("d3d11.forceVolatileTgsmAccess", false);
|
this->forceVolatileTgsmAccess = config.getOption<bool>("d3d11.forceVolatileTgsmAccess", false);
|
||||||
this->relaxedBarriers = config.getOption<bool>("d3d11.relaxedBarriers", false);
|
this->relaxedBarriers = config.getOption<bool>("d3d11.relaxedBarriers", false);
|
||||||
this->ignoreGraphicsBarriers = config.getOption<bool>("d3d11.ignoreGraphicsBarriers", false);
|
this->relaxedGraphicsBarriers = config.getOption<bool>("d3d11.relaxedGraphicsBarriers", false);
|
||||||
this->maxTessFactor = config.getOption<int32_t>("d3d11.maxTessFactor", 0);
|
this->maxTessFactor = config.getOption<int32_t>("d3d11.maxTessFactor", 0);
|
||||||
this->samplerAnisotropy = config.getOption<int32_t>("d3d11.samplerAnisotropy", -1);
|
this->samplerAnisotropy = config.getOption<int32_t>("d3d11.samplerAnisotropy", -1);
|
||||||
this->samplerLodBias = config.getOption<float>("d3d11.samplerLodBias", 0.0f);
|
this->samplerLodBias = config.getOption<float>("d3d11.samplerLodBias", 0.0f);
|
||||||
|
|
|
@ -43,7 +43,7 @@ namespace dxvk {
|
||||||
///
|
///
|
||||||
/// May improve performance in some games,
|
/// May improve performance in some games,
|
||||||
/// but might also cause rendering issues.
|
/// but might also cause rendering issues.
|
||||||
bool ignoreGraphicsBarriers = false;
|
bool relaxedGraphicsBarriers = false;
|
||||||
|
|
||||||
/// Maximum tessellation factor.
|
/// Maximum tessellation factor.
|
||||||
///
|
///
|
||||||
|
|
|
@ -472,7 +472,7 @@ namespace dxvk {
|
||||||
cFrameId = m_frameId
|
cFrameId = m_frameId
|
||||||
] (DxvkContext* ctx) {
|
] (DxvkContext* ctx) {
|
||||||
ctx->beginLatencyTracking(cLatency, cFrameId + 1u);
|
ctx->beginLatencyTracking(cLatency, cFrameId + 1u);
|
||||||
});
|
}, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,16 +31,28 @@ namespace dxvk {
|
||||||
}
|
}
|
||||||
|
|
||||||
ULONG STDMETHODCALLTYPE Release() {
|
ULONG STDMETHODCALLTYPE Release() {
|
||||||
// ignore Release calls on objects with 0 refCount
|
uint32_t oldRefCount, refCount;
|
||||||
if(unlikely(!this->m_refCount))
|
|
||||||
return this->m_refCount;
|
do {
|
||||||
|
oldRefCount = this->m_refCount.load(std::memory_order_acquire);
|
||||||
|
|
||||||
|
// clamp value to 0 to prevent underruns
|
||||||
|
if (unlikely(!oldRefCount))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
refCount = oldRefCount - 1;
|
||||||
|
|
||||||
|
} while (!this->m_refCount.compare_exchange_weak(oldRefCount,
|
||||||
|
refCount,
|
||||||
|
std::memory_order_release,
|
||||||
|
std::memory_order_acquire));
|
||||||
|
|
||||||
uint32_t refCount = --this->m_refCount;
|
|
||||||
if (unlikely(!refCount)) {
|
if (unlikely(!refCount)) {
|
||||||
auto* pDevice = GetDevice();
|
auto* pDevice = GetDevice();
|
||||||
this->ReleasePrivate();
|
this->ReleasePrivate();
|
||||||
pDevice->Release();
|
pDevice->Release();
|
||||||
}
|
}
|
||||||
|
|
||||||
return refCount;
|
return refCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
namespace dxvk {
|
namespace dxvk {
|
||||||
|
|
||||||
static inline uint32_t parseDword(std::string_view str) {
|
static inline uint32_t parseDword(std::string_view str) {
|
||||||
uint32_t value = UINT32_MAX;
|
uint32_t value = std::numeric_limits<uint32_t>::max();
|
||||||
std::from_chars(str.data(), str.data() + str.size(), value);
|
std::from_chars(str.data(), str.data() + str.size(), value);
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
|
@ -588,7 +588,7 @@ namespace dxvk {
|
||||||
// Max Vertex Shader Const
|
// Max Vertex Shader Const
|
||||||
pCaps->MaxVertexShaderConst = MaxFloatConstantsVS;
|
pCaps->MaxVertexShaderConst = MaxFloatConstantsVS;
|
||||||
// Max PS1 Value
|
// Max PS1 Value
|
||||||
pCaps->PixelShader1xMaxValue = options.shaderModel > 0 ? FLT_MAX : 0.0f;
|
pCaps->PixelShader1xMaxValue = options.shaderModel > 0 ? std::numeric_limits<float>::max() : 0.0f;
|
||||||
// Dev Caps 2
|
// Dev Caps 2
|
||||||
pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET
|
pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET
|
||||||
/* | D3DDEVCAPS2_DMAPNPATCH */
|
/* | D3DDEVCAPS2_DMAPNPATCH */
|
||||||
|
|
|
@ -75,7 +75,7 @@ namespace dxvk {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
static constexpr UINT AllLayers = UINT32_MAX;
|
static constexpr UINT AllLayers = std::numeric_limits<uint32_t>::max();
|
||||||
|
|
||||||
D3D9CommonTexture(
|
D3D9CommonTexture(
|
||||||
D3D9DeviceEx* pDevice,
|
D3D9DeviceEx* pDevice,
|
||||||
|
|
|
@ -1760,7 +1760,7 @@ namespace dxvk {
|
||||||
|
|
||||||
m_state.depthStencil = ds;
|
m_state.depthStencil = ds;
|
||||||
|
|
||||||
UpdateActiveHazardsDS(UINT32_MAX);
|
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
|
||||||
|
|
||||||
return D3D_OK;
|
return D3D_OK;
|
||||||
}
|
}
|
||||||
|
@ -2157,7 +2157,7 @@ namespace dxvk {
|
||||||
if (m_state.IsLightEnabled(Index) == !!Enable)
|
if (m_state.IsLightEnabled(Index) == !!Enable)
|
||||||
return D3D_OK;
|
return D3D_OK;
|
||||||
|
|
||||||
uint32_t searchIndex = UINT32_MAX;
|
uint32_t searchIndex = std::numeric_limits<uint32_t>::max();
|
||||||
uint32_t setIndex = Index;
|
uint32_t setIndex = Index;
|
||||||
|
|
||||||
if (!Enable)
|
if (!Enable)
|
||||||
|
@ -2384,7 +2384,7 @@ namespace dxvk {
|
||||||
|
|
||||||
case D3DRS_ZWRITEENABLE:
|
case D3DRS_ZWRITEENABLE:
|
||||||
if (likely(!old != !Value))
|
if (likely(!old != !Value))
|
||||||
UpdateActiveHazardsDS(UINT32_MAX);
|
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case D3DRS_STENCILENABLE:
|
case D3DRS_STENCILENABLE:
|
||||||
case D3DRS_ZENABLE:
|
case D3DRS_ZENABLE:
|
||||||
|
@ -3387,7 +3387,7 @@ namespace dxvk {
|
||||||
BindShader<DxsoProgramTypes::VertexShader>(GetCommonShader(shader));
|
BindShader<DxsoProgramTypes::VertexShader>(GetCommonShader(shader));
|
||||||
m_vsShaderMasks = newShader->GetShaderMask();
|
m_vsShaderMasks = newShader->GetShaderMask();
|
||||||
|
|
||||||
UpdateTextureTypeMismatchesForShader(newShader, m_vsShaderMasks.samplerMask, caps::MaxTexturesPS + 1);
|
UpdateTextureTypeMismatchesForShader(newShader, m_vsShaderMasks.samplerMask, FirstVSSamplerSlot);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
m_vsShaderMasks = D3D9ShaderMasks();
|
m_vsShaderMasks = D3D9ShaderMasks();
|
||||||
|
@ -3795,8 +3795,8 @@ namespace dxvk {
|
||||||
if (m_psShaderMasks.samplerMask != newShaderMasks.samplerMask ||
|
if (m_psShaderMasks.samplerMask != newShaderMasks.samplerMask ||
|
||||||
m_psShaderMasks.rtMask != newShaderMasks.rtMask) {
|
m_psShaderMasks.rtMask != newShaderMasks.rtMask) {
|
||||||
m_psShaderMasks = newShaderMasks;
|
m_psShaderMasks = newShaderMasks;
|
||||||
UpdateActiveHazardsRT(UINT32_MAX);
|
UpdateActiveHazardsRT(std::numeric_limits<uint32_t>::max());
|
||||||
UpdateActiveHazardsDS(UINT32_MAX);
|
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
|
||||||
}
|
}
|
||||||
|
|
||||||
return D3D_OK;
|
return D3D_OK;
|
||||||
|
@ -6129,7 +6129,7 @@ namespace dxvk {
|
||||||
] (DxvkContext* ctx) {
|
] (DxvkContext* ctx) {
|
||||||
if (cTracker && cTracker->needsAutoMarkers())
|
if (cTracker && cTracker->needsAutoMarkers())
|
||||||
ctx->beginLatencyTracking(cTracker, cFrameId);
|
ctx->beginLatencyTracking(cTracker, cFrameId);
|
||||||
});
|
}, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -6415,12 +6415,15 @@ namespace dxvk {
|
||||||
void D3D9DeviceEx::UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler) {
|
void D3D9DeviceEx::UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler) {
|
||||||
uint32_t shaderTextureIndex;
|
uint32_t shaderTextureIndex;
|
||||||
const D3D9CommonShader* shader;
|
const D3D9CommonShader* shader;
|
||||||
if (unlikely(stateSampler > caps::MaxTexturesPS + 1)) {
|
if (likely(IsPSSampler(stateSampler))) {
|
||||||
|
shader = GetCommonShader(m_state.pixelShader);
|
||||||
|
shaderTextureIndex = stateSampler;
|
||||||
|
} else if (unlikely(IsVSSampler(stateSampler))) {
|
||||||
shader = GetCommonShader(m_state.vertexShader);
|
shader = GetCommonShader(m_state.vertexShader);
|
||||||
shaderTextureIndex = stateSampler - caps::MaxTexturesPS - 1;
|
shaderTextureIndex = stateSampler - caps::MaxTexturesPS - 1;
|
||||||
} else {
|
} else {
|
||||||
shader = GetCommonShader(m_state.pixelShader);
|
// Do not type check the fixed function displacement map texture.
|
||||||
shaderTextureIndex = stateSampler;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(shader == nullptr || shader->GetInfo().majorVersion() < 2 || m_d3d9Options.forceSamplerTypeSpecConstants)) {
|
if (unlikely(shader == nullptr || shader->GetInfo().majorVersion() < 2 || m_d3d9Options.forceSamplerTypeSpecConstants)) {
|
||||||
|
@ -7815,7 +7818,7 @@ namespace dxvk {
|
||||||
|
|
||||||
if (key.Data.Contents.UseLighting) {
|
if (key.Data.Contents.UseLighting) {
|
||||||
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
|
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
|
||||||
if (m_state.enabledLightIndices[i] != UINT32_MAX)
|
if (m_state.enabledLightIndices[i] != std::numeric_limits<uint32_t>::max())
|
||||||
lightCount++;
|
lightCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7912,7 +7915,7 @@ namespace dxvk {
|
||||||
uint32_t lightIdx = 0;
|
uint32_t lightIdx = 0;
|
||||||
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
|
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
|
||||||
auto idx = m_state.enabledLightIndices[i];
|
auto idx = m_state.enabledLightIndices[i];
|
||||||
if (idx == UINT32_MAX)
|
if (idx == std::numeric_limits<uint32_t>::max())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]);
|
data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]);
|
||||||
|
|
|
@ -653,15 +653,41 @@ namespace dxvk {
|
||||||
const D3DDISPLAYMODEEX* pFullscreenDisplayMode,
|
const D3DDISPLAYMODEEX* pFullscreenDisplayMode,
|
||||||
IDirect3DSwapChain9** ppSwapChain);
|
IDirect3DSwapChain9** ppSwapChain);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets the given sampler state
|
||||||
|
*
|
||||||
|
* @param StateSampler Sampler index (according to our internal way of storing samplers)
|
||||||
|
* @param Type Sampler state type to change
|
||||||
|
* @param Value State value
|
||||||
|
*/
|
||||||
HRESULT SetStateSamplerState(
|
HRESULT SetStateSamplerState(
|
||||||
DWORD StateSampler,
|
DWORD StateSampler,
|
||||||
D3DSAMPLERSTATETYPE Type,
|
D3DSAMPLERSTATETYPE Type,
|
||||||
DWORD Value);
|
DWORD Value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets the given sampler texture
|
||||||
|
*
|
||||||
|
* @param StateSampler Sampler index (according to our internal way of storing samplers)
|
||||||
|
* @param pTexture Texture to use
|
||||||
|
*/
|
||||||
HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture);
|
HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets the transform for the given sampler
|
||||||
|
*
|
||||||
|
* @param idx Sampler index (according to our internal way of storing samplers)
|
||||||
|
* @param pMatrix Transform matrix
|
||||||
|
*/
|
||||||
HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix);
|
HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets the fixed function texture processing state
|
||||||
|
*
|
||||||
|
* @param Stage Sampler index (according to our internal way of storing samplers)
|
||||||
|
* @param Type Fixed function texture stage type
|
||||||
|
* @param Value Value for the state
|
||||||
|
*/
|
||||||
HRESULT SetStateTextureStageState(
|
HRESULT SetStateTextureStageState(
|
||||||
DWORD Stage,
|
DWORD Stage,
|
||||||
D3D9TextureStageStateTypes Type,
|
D3D9TextureStageStateTypes Type,
|
||||||
|
@ -818,8 +844,40 @@ namespace dxvk {
|
||||||
|
|
||||||
void UpdateActiveFetch4(uint32_t stateSampler);
|
void UpdateActiveFetch4(uint32_t stateSampler);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets the mismatching texture type bits for all samplers if necessary.
|
||||||
|
*
|
||||||
|
* This function will check all samplers the shader uses and set the set the mismatching texture type bit for the given sampler if it does not
|
||||||
|
* match the texture type expected by the respective shader.
|
||||||
|
*
|
||||||
|
* It will *not* unset the bit if the texture type does match.
|
||||||
|
*
|
||||||
|
* @param stateSampler Sampler index (according to our internal way of storing samplers)
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets the mismatching texture type bits for all samplers if necessary.
|
||||||
|
*
|
||||||
|
* This function will check all samplers the shader uses and set the set the mismatching texture type bit for the given sampler if it does not
|
||||||
|
* match the texture type expected by the shader.
|
||||||
|
*
|
||||||
|
* @param shader The shader
|
||||||
|
* @param shaderSamplerMask Mask of all samplers that the shader uses (according to our internal way of storing samplers)
|
||||||
|
* @param shaderSamplerOffset First index of the shader's samplers according to our internal way of storing samplers.
|
||||||
|
* Used to transform the sampler indices that are relative to the entire pipeline to ones relative to the shader.
|
||||||
|
*/
|
||||||
void UpdateTextureTypeMismatchesForShader(const D3D9CommonShader* shader, uint32_t shaderSamplerMask, uint32_t shaderSamplerOffset);
|
void UpdateTextureTypeMismatchesForShader(const D3D9CommonShader* shader, uint32_t shaderSamplerMask, uint32_t shaderSamplerOffset);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sets the mismatching texture type bit for the given sampler.
|
||||||
|
*
|
||||||
|
* This function will set the mismatching texture type bit for the given sampler if it does not
|
||||||
|
* match the texture type expected by the respective shader.
|
||||||
|
*
|
||||||
|
* It will *not* unset the bit if the texture type does match.
|
||||||
|
*
|
||||||
|
* @param stateSampler Sampler index (according to our internal way of storing samplers)
|
||||||
|
*/
|
||||||
void UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler);
|
void UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler);
|
||||||
|
|
||||||
void UploadManagedTexture(D3D9CommonTexture* pResource);
|
void UploadManagedTexture(D3D9CommonTexture* pResource);
|
||||||
|
@ -1122,12 +1180,13 @@ namespace dxvk {
|
||||||
private:
|
private:
|
||||||
|
|
||||||
template<bool AllowFlush = true, typename Cmd>
|
template<bool AllowFlush = true, typename Cmd>
|
||||||
void EmitCs(Cmd&& command) {
|
void EmitCs(Cmd&& command, bool disableFlush=false) {
|
||||||
if (unlikely(!m_csChunk->push(command))) {
|
if (unlikely(!m_csChunk->push(command))) {
|
||||||
EmitCsChunk(std::move(m_csChunk));
|
EmitCsChunk(std::move(m_csChunk));
|
||||||
m_csChunk = AllocCsChunk();
|
m_csChunk = AllocCsChunk();
|
||||||
|
|
||||||
if constexpr (AllowFlush)
|
if constexpr (AllowFlush)
|
||||||
|
if (!disableFlush)
|
||||||
ConsiderFlush(GpuFlushType::ImplicitWeakHint);
|
ConsiderFlush(GpuFlushType::ImplicitWeakHint);
|
||||||
|
|
||||||
m_csChunk->push(command);
|
m_csChunk->push(command);
|
||||||
|
|
|
@ -25,12 +25,28 @@ namespace dxvk {
|
||||||
}
|
}
|
||||||
|
|
||||||
ULONG STDMETHODCALLTYPE Release() {
|
ULONG STDMETHODCALLTYPE Release() {
|
||||||
uint32_t refCount = --this->m_refCount;
|
uint32_t oldRefCount, refCount;
|
||||||
|
|
||||||
|
do {
|
||||||
|
oldRefCount = this->m_refCount.load(std::memory_order_acquire);
|
||||||
|
|
||||||
|
// clamp value to 0 to prevent underruns
|
||||||
|
if (unlikely(!oldRefCount))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
refCount = oldRefCount - 1;
|
||||||
|
|
||||||
|
} while (!this->m_refCount.compare_exchange_weak(oldRefCount,
|
||||||
|
refCount,
|
||||||
|
std::memory_order_release,
|
||||||
|
std::memory_order_acquire));
|
||||||
|
|
||||||
if (unlikely(!refCount)) {
|
if (unlikely(!refCount)) {
|
||||||
auto* pDevice = GetDevice();
|
auto* pDevice = GetDevice();
|
||||||
this->ReleasePrivate();
|
this->ReleasePrivate();
|
||||||
pDevice->Release();
|
pDevice->Release();
|
||||||
}
|
}
|
||||||
|
|
||||||
return refCount;
|
return refCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1297,7 +1297,7 @@ namespace dxvk {
|
||||||
uint32_t atten = m_module.opFFma (m_floatType, d, atten2, atten1);
|
uint32_t atten = m_module.opFFma (m_floatType, d, atten2, atten1);
|
||||||
atten = m_module.opFFma (m_floatType, d, atten, atten0);
|
atten = m_module.opFFma (m_floatType, d, atten, atten0);
|
||||||
atten = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), atten);
|
atten = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), atten);
|
||||||
atten = m_module.opNMin (m_floatType, atten, m_module.constf32(FLT_MAX));
|
atten = m_module.opNMin (m_floatType, atten, m_module.constf32(std::numeric_limits<float>::max()));
|
||||||
|
|
||||||
atten = m_module.opSelect(m_floatType, m_module.opFOrdGreaterThan(bool_t, d, range), m_module.constf32(0.0f), atten);
|
atten = m_module.opSelect(m_floatType, m_module.opFOrdGreaterThan(bool_t, d, range), m_module.constf32(0.0f), atten);
|
||||||
atten = m_module.opSelect(m_floatType, isDirectional, m_module.constf32(1.0f), atten);
|
atten = m_module.opSelect(m_floatType, isDirectional, m_module.constf32(1.0f), atten);
|
||||||
|
|
|
@ -63,7 +63,7 @@ namespace dxvk {
|
||||||
// do an or per-draw in the device.
|
// do an or per-draw in the device.
|
||||||
// We shift by 17 because 16 ps samplers + 1 dmap (tess)
|
// We shift by 17 because 16 ps samplers + 1 dmap (tess)
|
||||||
if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT)
|
if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT)
|
||||||
m_usedSamplers <<= caps::MaxTexturesPS + 1;
|
m_usedSamplers <<= FirstVSSamplerSlot;
|
||||||
|
|
||||||
m_usedRTs = pModule->usedRTs();
|
m_usedRTs = pModule->usedRTs();
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ namespace dxvk {
|
||||||
streamFreq[i] = 1;
|
streamFreq[i] = 1;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < enabledLightIndices.size(); i++)
|
for (uint32_t i = 0; i < enabledLightIndices.size(); i++)
|
||||||
enabledLightIndices[i] = UINT32_MAX;
|
enabledLightIndices[i] = std::numeric_limits<uint32_t>::max();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -204,7 +204,7 @@ namespace dxvk {
|
||||||
if (m_state.IsLightEnabled(Index) == !!Enable)
|
if (m_state.IsLightEnabled(Index) == !!Enable)
|
||||||
return D3D_OK;
|
return D3D_OK;
|
||||||
|
|
||||||
uint32_t searchIndex = UINT32_MAX;
|
uint32_t searchIndex = std::numeric_limits<uint32_t>::max();
|
||||||
uint32_t setIndex = Index;
|
uint32_t setIndex = Index;
|
||||||
|
|
||||||
if (!Enable)
|
if (!Enable)
|
||||||
|
@ -436,7 +436,7 @@ namespace dxvk {
|
||||||
void D3D9StateBlock::CapturePixelSamplerStates() {
|
void D3D9StateBlock::CapturePixelSamplerStates() {
|
||||||
m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
|
m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < caps::MaxTexturesPS + 1; i++) {
|
for (uint32_t i = 0; i < FirstVSSamplerSlot; i++) {
|
||||||
m_captures.samplers.set(i, true);
|
m_captures.samplers.set(i, true);
|
||||||
|
|
||||||
m_captures.samplerStates[i].set(D3DSAMP_ADDRESSU, true);
|
m_captures.samplerStates[i].set(D3DSAMP_ADDRESSU, true);
|
||||||
|
@ -519,7 +519,7 @@ namespace dxvk {
|
||||||
void D3D9StateBlock::CaptureVertexSamplerStates() {
|
void D3D9StateBlock::CaptureVertexSamplerStates() {
|
||||||
m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
|
m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
|
||||||
|
|
||||||
for (uint32_t i = caps::MaxTexturesPS + 1; i < SamplerCount; i++) {
|
for (uint32_t i = FirstVSSamplerSlot; i < SamplerCount; i++) {
|
||||||
m_captures.samplers.set(i, true);
|
m_captures.samplers.set(i, true);
|
||||||
m_captures.samplerStates[i].set(D3DSAMP_DMAPOFFSET, true);
|
m_captures.samplerStates[i].set(D3DSAMP_DMAPOFFSET, true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -652,17 +652,17 @@ namespace dxvk {
|
||||||
|
|
||||||
static bool validateGammaRamp(const WORD (&ramp)[256]) {
|
static bool validateGammaRamp(const WORD (&ramp)[256]) {
|
||||||
if (ramp[0] >= ramp[std::size(ramp) - 1]) {
|
if (ramp[0] >= ramp[std::size(ramp) - 1]) {
|
||||||
Logger::err("validateGammaRamp: ramp inverted or flat");
|
Logger::warn("validateGammaRamp: ramp inverted or flat");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 1; i < std::size(ramp); i++) {
|
for (size_t i = 1; i < std::size(ramp); i++) {
|
||||||
if (ramp[i] < ramp[i - 1]) {
|
if (ramp[i] < ramp[i - 1]) {
|
||||||
Logger::err("validateGammaRamp: ramp not monotonically increasing");
|
Logger::warn("validateGammaRamp: ramp not monotonically increasing");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (ramp[i] - ramp[i - 1] >= UINT16_MAX / 2) {
|
if (ramp[i] - ramp[i - 1] >= UINT16_MAX / 2) {
|
||||||
Logger::err("validateGammaRamp: huuuge jump");
|
Logger::warn("validateGammaRamp: huuuge jump");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -987,7 +987,7 @@ namespace dxvk {
|
||||||
entry->second.presenter = CreatePresenter(m_window, entry->second.frameLatencySignal);
|
entry->second.presenter = CreatePresenter(m_window, entry->second.frameLatencySignal);
|
||||||
|
|
||||||
if (m_presentParams.hDeviceWindow == m_window && m_latencyTracking)
|
if (m_presentParams.hDeviceWindow == m_window && m_latencyTracking)
|
||||||
m_latencyTracker = m_device->createLatencyTracker(entry->second.presenter);
|
m_latencyTracker = m_device->createLatencyTracker(entry->second.presenter, entry->second.frameId+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_wctx = &entry->second;
|
m_wctx = &entry->second;
|
||||||
|
|
|
@ -44,6 +44,11 @@ namespace dxvk {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns whether or not the sampler index is valid
|
||||||
|
*
|
||||||
|
* @param Sampler Sampler index (according to the API)
|
||||||
|
*/
|
||||||
inline bool InvalidSampler(DWORD Sampler) {
|
inline bool InvalidSampler(DWORD Sampler) {
|
||||||
if (Sampler >= caps::MaxTexturesPS && Sampler < D3DDMAPSAMPLER)
|
if (Sampler >= caps::MaxTexturesPS && Sampler < D3DDMAPSAMPLER)
|
||||||
return true;
|
return true;
|
||||||
|
@ -54,6 +59,19 @@ namespace dxvk {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The first sampler that belongs to the vertex shader according to our internal way of storing samplers
|
||||||
|
*/
|
||||||
|
constexpr uint32_t FirstVSSamplerSlot = caps::MaxTexturesPS + 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Remaps a sampler index by the API to an internal one
|
||||||
|
*
|
||||||
|
* Remaps the sampler index according to the way the API counts them to how we count and store them internally.
|
||||||
|
*
|
||||||
|
* @param Sampler Sampler index (according to API)
|
||||||
|
* @return DWORD Sampler index (according to our internal way of storing samplers)
|
||||||
|
*/
|
||||||
inline DWORD RemapSamplerState(DWORD Sampler) {
|
inline DWORD RemapSamplerState(DWORD Sampler) {
|
||||||
if (Sampler >= D3DDMAPSAMPLER)
|
if (Sampler >= D3DDMAPSAMPLER)
|
||||||
Sampler = caps::MaxTexturesPS + (Sampler - D3DDMAPSAMPLER);
|
Sampler = caps::MaxTexturesPS + (Sampler - D3DDMAPSAMPLER);
|
||||||
|
@ -61,13 +79,62 @@ namespace dxvk {
|
||||||
return Sampler;
|
return Sampler;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Remaps the sampler from an index applying to the entire pipeline to one relative to the shader stage and returns the shader type
|
||||||
|
*
|
||||||
|
* The displacement map sampler will be treated as a 17th pixel shader sampler.
|
||||||
|
*
|
||||||
|
* @param Sampler Sampler index (according to our internal way of storing samplers)
|
||||||
|
* @return std::pair<DxsoProgramType, DWORD> Shader stage that it belongs to and the relative sampler index
|
||||||
|
*/
|
||||||
inline std::pair<DxsoProgramType, DWORD> RemapStateSamplerShader(DWORD Sampler) {
|
inline std::pair<DxsoProgramType, DWORD> RemapStateSamplerShader(DWORD Sampler) {
|
||||||
if (Sampler >= caps::MaxTexturesPS + 1)
|
if (Sampler >= FirstVSSamplerSlot)
|
||||||
return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - caps::MaxTexturesPS - 1);
|
return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - FirstVSSamplerSlot);
|
||||||
|
|
||||||
return std::make_pair(DxsoProgramTypes::PixelShader, Sampler);
|
return std::make_pair(DxsoProgramTypes::PixelShader, Sampler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns whether the sampler belongs to the vertex shader.
|
||||||
|
*
|
||||||
|
* The displacement map sampler is part of a fixed function feature,
|
||||||
|
* so it does not belong to the vertex shader.
|
||||||
|
* Use IsDMAPSampler to check for that.
|
||||||
|
*
|
||||||
|
* @param Sampler Sampler index (according to our internal way of storing samplers)
|
||||||
|
*/
|
||||||
|
inline bool IsVSSampler(uint32_t Sampler) {
|
||||||
|
return Sampler >= FirstVSSamplerSlot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns whether the sampler belongs to the pixel shader.
|
||||||
|
*
|
||||||
|
* The displacement map sampler is part of a fixed function feature,
|
||||||
|
* so (unlike in RemapStateSamplerShader) it does not belong to the pixel shader.
|
||||||
|
* Use IsDMAPSampler to check for that.
|
||||||
|
*
|
||||||
|
* @param Sampler Sampler index (according to our internal way of storing samplers)
|
||||||
|
*/
|
||||||
|
inline bool IsPSSampler(uint32_t Sampler) {
|
||||||
|
return Sampler <= caps::MaxTexturesPS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns whether the sampler is the displacement map sampler
|
||||||
|
*
|
||||||
|
* @param Sampler Sampler index (according to our internal way of storing samplers)
|
||||||
|
*/
|
||||||
|
inline bool IsDMAPSampler(uint32_t Sampler) {
|
||||||
|
return Sampler > caps::MaxTexturesPS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Remaps the sampler from an index (counted according to the API) to one relative to the shader stage and returns the shader type
|
||||||
|
*
|
||||||
|
* @param Sampler Sampler index (according to the API)
|
||||||
|
* @return std::pair<DxsoProgramType, DWORD> Shader stage that it belongs to and the relative sampler index
|
||||||
|
*/
|
||||||
inline std::pair<DxsoProgramType, DWORD> RemapSamplerShader(DWORD Sampler) {
|
inline std::pair<DxsoProgramType, DWORD> RemapSamplerShader(DWORD Sampler) {
|
||||||
Sampler = RemapSamplerState(Sampler);
|
Sampler = RemapSamplerState(Sampler);
|
||||||
|
|
||||||
|
@ -243,6 +310,9 @@ namespace dxvk {
|
||||||
uint32_t(offsets[1].y) > extent.height;
|
uint32_t(offsets[1].y) > extent.height;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Mirrors D3DTEXTURESTAGESTATETYPE but starts at 0
|
||||||
|
*/
|
||||||
enum D3D9TextureStageStateTypes : uint32_t
|
enum D3D9TextureStageStateTypes : uint32_t
|
||||||
{
|
{
|
||||||
DXVK_TSS_COLOROP = 0,
|
DXVK_TSS_COLOROP = 0,
|
||||||
|
@ -272,6 +342,12 @@ namespace dxvk {
|
||||||
constexpr uint32_t DXVK_TSS_TCI_CAMERASPACEREFLECTIONVECTOR = 0x00030000;
|
constexpr uint32_t DXVK_TSS_TCI_CAMERASPACEREFLECTIONVECTOR = 0x00030000;
|
||||||
constexpr uint32_t DXVK_TSS_TCI_SPHEREMAP = 0x00040000;
|
constexpr uint32_t DXVK_TSS_TCI_SPHEREMAP = 0x00040000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Remaps a texture stage type by the API to an internal one
|
||||||
|
*
|
||||||
|
* @param Type Texture stage type according to the API
|
||||||
|
* @return D3D9TextureStageStateTypes Texture stage type according to our internal way of storing them
|
||||||
|
*/
|
||||||
inline D3D9TextureStageStateTypes RemapTextureStageStateType(D3DTEXTURESTAGESTATETYPE Type) {
|
inline D3D9TextureStageStateTypes RemapTextureStageStateType(D3DTEXTURESTAGESTATETYPE Type) {
|
||||||
return D3D9TextureStageStateTypes(Type - 1);
|
return D3D9TextureStageStateTypes(Type - 1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,28 @@ namespace dxvk {
|
||||||
const uint32_t registerId = ins.dst[operandId].idx[0].offset;
|
const uint32_t registerId = ins.dst[operandId].idx[0].offset;
|
||||||
m_analysis->uavInfos[registerId].accessAtomicOp = true;
|
m_analysis->uavInfos[registerId].accessAtomicOp = true;
|
||||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
|
||||||
|
// Check whether the atomic operation is order-invariant
|
||||||
|
DxvkAccessOp store = DxvkAccessOp::None;
|
||||||
|
|
||||||
|
switch (ins.op) {
|
||||||
|
case DxbcOpcode::AtomicAnd: store = DxvkAccessOp::And; break;
|
||||||
|
case DxbcOpcode::AtomicOr: store = DxvkAccessOp::Or; break;
|
||||||
|
case DxbcOpcode::AtomicXor: store = DxvkAccessOp::Xor; break;
|
||||||
|
case DxbcOpcode::AtomicIAdd: store = DxvkAccessOp::Add; break;
|
||||||
|
case DxbcOpcode::AtomicIMax: store = DxvkAccessOp::IMax; break;
|
||||||
|
case DxbcOpcode::AtomicIMin: store = DxvkAccessOp::IMin; break;
|
||||||
|
case DxbcOpcode::AtomicUMax: store = DxvkAccessOp::UMax; break;
|
||||||
|
case DxbcOpcode::AtomicUMin: store = DxvkAccessOp::UMin; break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_analysis->uavInfos[registerId].atomicStore == DxvkAccessOp::None)
|
||||||
|
m_analysis->uavInfos[registerId].atomicStore = store;
|
||||||
|
|
||||||
|
// Maintain ordering if the UAV is accessed via other operations as well
|
||||||
|
if (store == DxvkAccessOp::None || m_analysis->uavInfos[registerId].atomicStore != store)
|
||||||
|
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
|
@ -58,6 +80,7 @@ namespace dxvk {
|
||||||
const uint32_t registerId = ins.src[operandId].idx[0].offset;
|
const uint32_t registerId = ins.src[operandId].idx[0].offset;
|
||||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT;
|
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT;
|
||||||
m_analysis->uavInfos[registerId].sparseFeedback |= sparseFeedback;
|
m_analysis->uavInfos[registerId].sparseFeedback |= sparseFeedback;
|
||||||
|
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||||
} else if (ins.src[operandId].type == DxbcOperandType::Resource) {
|
} else if (ins.src[operandId].type == DxbcOperandType::Resource) {
|
||||||
const uint32_t registerId = ins.src[operandId].idx[0].offset;
|
const uint32_t registerId = ins.src[operandId].idx[0].offset;
|
||||||
m_analysis->srvInfos[registerId].sparseFeedback |= sparseFeedback;
|
m_analysis->srvInfos[registerId].sparseFeedback |= sparseFeedback;
|
||||||
|
@ -68,6 +91,7 @@ namespace dxvk {
|
||||||
if (ins.dst[0].type == DxbcOperandType::UnorderedAccessView) {
|
if (ins.dst[0].type == DxbcOperandType::UnorderedAccessView) {
|
||||||
const uint32_t registerId = ins.dst[0].idx[0].offset;
|
const uint32_t registerId = ins.dst[0].idx[0].offset;
|
||||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT;
|
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
|
@ -75,11 +99,13 @@ namespace dxvk {
|
||||||
const uint32_t registerId = ins.src[1].idx[0].offset;
|
const uint32_t registerId = ins.src[1].idx[0].offset;
|
||||||
m_analysis->uavInfos[registerId].accessTypedLoad = true;
|
m_analysis->uavInfos[registerId].accessTypedLoad = true;
|
||||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT;
|
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT;
|
||||||
|
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case DxbcInstClass::TypedUavStore: {
|
case DxbcInstClass::TypedUavStore: {
|
||||||
const uint32_t registerId = ins.dst[0].idx[0].offset;
|
const uint32_t registerId = ins.dst[0].idx[0].offset;
|
||||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT;
|
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -20,6 +20,8 @@ namespace dxvk {
|
||||||
bool accessTypedLoad = false;
|
bool accessTypedLoad = false;
|
||||||
bool accessAtomicOp = false;
|
bool accessAtomicOp = false;
|
||||||
bool sparseFeedback = false;
|
bool sparseFeedback = false;
|
||||||
|
bool nonInvariantAccess = false;
|
||||||
|
DxvkAccessOp atomicStore = DxvkAccessOp::None;
|
||||||
VkAccessFlags accessFlags = 0;
|
VkAccessFlags accessFlags = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1098,6 +1098,9 @@ namespace dxvk {
|
||||||
: VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
: VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||||
binding.access = m_analysis->uavInfos[registerId].accessFlags;
|
binding.access = m_analysis->uavInfos[registerId].accessFlags;
|
||||||
|
|
||||||
|
if (!m_analysis->uavInfos[registerId].nonInvariantAccess)
|
||||||
|
binding.accessOp = m_analysis->uavInfos[registerId].atomicStore;
|
||||||
|
|
||||||
if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT))
|
if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT))
|
||||||
m_module.decorate(varId, spv::DecorationNonWritable);
|
m_module.decorate(varId, spv::DecorationNonWritable);
|
||||||
if (!(binding.access & VK_ACCESS_SHADER_READ_BIT))
|
if (!(binding.access & VK_ACCESS_SHADER_READ_BIT))
|
||||||
|
@ -1234,9 +1237,14 @@ namespace dxvk {
|
||||||
: (isUav ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
|
: (isUav ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
|
||||||
binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
|
binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
|
||||||
binding.resourceBinding = bindingId;
|
binding.resourceBinding = bindingId;
|
||||||
binding.access = isUav
|
binding.access = VK_ACCESS_SHADER_READ_BIT;
|
||||||
? m_analysis->uavInfos[registerId].accessFlags
|
|
||||||
: VkAccessFlags(VK_ACCESS_SHADER_READ_BIT);
|
if (isUav) {
|
||||||
|
binding.access = m_analysis->uavInfos[registerId].accessFlags;
|
||||||
|
|
||||||
|
if (!m_analysis->uavInfos[registerId].nonInvariantAccess)
|
||||||
|
binding.accessOp = m_analysis->uavInfos[registerId].atomicStore;
|
||||||
|
}
|
||||||
|
|
||||||
if (useRawSsbo || isUav) {
|
if (useRawSsbo || isUav) {
|
||||||
if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT))
|
if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT))
|
||||||
|
|
|
@ -1957,7 +1957,7 @@ namespace dxvk {
|
||||||
|
|
||||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||||
result.id = m_module.opNMin(typeId, result.id,
|
result.id = m_module.opNMin(typeId, result.id,
|
||||||
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DxsoOpcode::Rsq:
|
case DxsoOpcode::Rsq:
|
||||||
|
@ -1969,7 +1969,7 @@ namespace dxvk {
|
||||||
|
|
||||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||||
result.id = m_module.opNMin(typeId, result.id,
|
result.id = m_module.opNMin(typeId, result.id,
|
||||||
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DxsoOpcode::Dp3: {
|
case DxsoOpcode::Dp3: {
|
||||||
|
@ -2029,7 +2029,7 @@ namespace dxvk {
|
||||||
|
|
||||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||||
result.id = m_module.opNMin(typeId, result.id,
|
result.id = m_module.opNMin(typeId, result.id,
|
||||||
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2040,7 +2040,7 @@ namespace dxvk {
|
||||||
|
|
||||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||||
result.id = m_module.opNMin(typeId, result.id,
|
result.id = m_module.opNMin(typeId, result.id,
|
||||||
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DxsoOpcode::Pow: {
|
case DxsoOpcode::Pow: {
|
||||||
|
@ -2102,7 +2102,7 @@ namespace dxvk {
|
||||||
rcpLength.type = scalarType;
|
rcpLength.type = scalarType;
|
||||||
rcpLength.id = m_module.opInverseSqrt(scalarTypeId, dot.id);
|
rcpLength.id = m_module.opInverseSqrt(scalarTypeId, dot.id);
|
||||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||||
rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(FLT_MAX));
|
rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(std::numeric_limits<float>::max()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// r * rsq(r . r)
|
// r * rsq(r . r)
|
||||||
|
@ -2216,7 +2216,7 @@ namespace dxvk {
|
||||||
result.id = m_module.opLog2(typeId, result.id);
|
result.id = m_module.opLog2(typeId, result.id);
|
||||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||||
result.id = m_module.opNMax(typeId, result.id,
|
result.id = m_module.opNMax(typeId, result.id,
|
||||||
m_module.constfReplicant(-FLT_MAX, result.type.ccount));
|
m_module.constfReplicant(-std::numeric_limits<float>::max(), result.type.ccount));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DxsoOpcode::Lrp:
|
case DxsoOpcode::Lrp:
|
||||||
|
@ -2980,7 +2980,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
|
||||||
|
|
||||||
auto SampleType = [&](DxsoSamplerType samplerType) {
|
auto SampleType = [&](DxsoSamplerType samplerType) {
|
||||||
uint32_t bitOffset = m_programInfo.type() == DxsoProgramTypes::VertexShader
|
uint32_t bitOffset = m_programInfo.type() == DxsoProgramTypes::VertexShader
|
||||||
? samplerIdx + caps::MaxTexturesPS + 1
|
? samplerIdx + FirstVSSamplerSlot
|
||||||
: samplerIdx;
|
: samplerIdx;
|
||||||
|
|
||||||
uint32_t isNull = m_spec.get(m_module, m_specUbo, SpecSamplerNull, bitOffset, 1);
|
uint32_t isNull = m_spec.get(m_module, m_specUbo, SpecSamplerNull, bitOffset, 1);
|
||||||
|
|
|
@ -86,7 +86,7 @@ namespace dxvk {
|
||||||
case DxsoOpcode::SetP: return 3;
|
case DxsoOpcode::SetP: return 3;
|
||||||
case DxsoOpcode::TexLdl: return 3;
|
case DxsoOpcode::TexLdl: return 3;
|
||||||
case DxsoOpcode::BreakP: return 2;
|
case DxsoOpcode::BreakP: return 2;
|
||||||
default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return UINT32_MAX;
|
default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return std::numeric_limits<uint32_t>::max();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
namespace dxvk {
|
namespace dxvk {
|
||||||
|
|
||||||
constexpr uint32_t InvalidOpcodeLength = UINT32_MAX;
|
constexpr uint32_t InvalidOpcodeLength = std::numeric_limits<uint32_t>::max();
|
||||||
|
|
||||||
uint32_t DxsoGetDefaultOpcodeLength(DxsoOpcode opcode);
|
uint32_t DxsoGetDefaultOpcodeLength(DxsoOpcode opcode);
|
||||||
|
|
||||||
|
|
|
@ -20,20 +20,37 @@ namespace dxvk {
|
||||||
|
|
||||||
bool DxvkBarrierTracker::findRange(
|
bool DxvkBarrierTracker::findRange(
|
||||||
const DxvkAddressRange& range,
|
const DxvkAddressRange& range,
|
||||||
DxvkAccess accessType) const {
|
DxvkAccess accessType,
|
||||||
|
DxvkAccessOp accessOp) const {
|
||||||
uint32_t rootIndex = computeRootIndex(range, accessType);
|
uint32_t rootIndex = computeRootIndex(range, accessType);
|
||||||
return findNode(range, rootIndex);
|
uint32_t nodeIndex = findNode(range, rootIndex);
|
||||||
|
|
||||||
|
if (likely(!nodeIndex || accessOp == DxvkAccessOp::None))
|
||||||
|
return nodeIndex;
|
||||||
|
|
||||||
|
// If we are checking for a specific order-invariant store
|
||||||
|
// op, the op must have been the only op used to access the
|
||||||
|
// resource, and the tracked range must cover the requested
|
||||||
|
// range in its entirety so we can rule out that other parts
|
||||||
|
// of the resource have been accessed in a different way.
|
||||||
|
auto& node = m_nodes[nodeIndex];
|
||||||
|
|
||||||
|
return node.payload.accessOps != DxvkAccessOps(accessOp)
|
||||||
|
|| !node.addressRange.contains(range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DxvkBarrierTracker::insertRange(
|
void DxvkBarrierTracker::insertRange(
|
||||||
const DxvkAddressRange& range,
|
const DxvkAddressRange& range,
|
||||||
DxvkAccess accessType) {
|
DxvkAccess accessType,
|
||||||
uint32_t rootIndex = computeRootIndex(range, accessType);
|
DxvkAccessOp accessOp) {
|
||||||
|
DxvkBarrierPayload payload = { };
|
||||||
|
payload.accessOps.set(accessOp);
|
||||||
|
|
||||||
// If we can just insert the node with no conflicts,
|
// If we can just insert the node with no conflicts,
|
||||||
// we don't have to do anything.
|
// we don't have to do anything.
|
||||||
uint32_t nodeIndex = insertNode(range, rootIndex);
|
uint32_t rootIndex = computeRootIndex(range, accessType);
|
||||||
|
uint32_t nodeIndex = insertNode(range, rootIndex, payload);
|
||||||
|
|
||||||
if (likely(!nodeIndex))
|
if (likely(!nodeIndex))
|
||||||
return;
|
return;
|
||||||
|
@ -41,6 +58,7 @@ namespace dxvk {
|
||||||
// If there's an existing node and it contains the entire
|
// If there's an existing node and it contains the entire
|
||||||
// range we want to add already, also don't do anything.
|
// range we want to add already, also don't do anything.
|
||||||
auto& node = m_nodes[nodeIndex];
|
auto& node = m_nodes[nodeIndex];
|
||||||
|
node.payload.accessOps.set(payload.accessOps);
|
||||||
|
|
||||||
if (node.addressRange.contains(range))
|
if (node.addressRange.contains(range))
|
||||||
return;
|
return;
|
||||||
|
@ -82,12 +100,14 @@ namespace dxvk {
|
||||||
mergedRange.rangeStart = std::min(mergedRange.rangeStart, node.addressRange.rangeStart);
|
mergedRange.rangeStart = std::min(mergedRange.rangeStart, node.addressRange.rangeStart);
|
||||||
mergedRange.rangeEnd = std::max(mergedRange.rangeEnd, node.addressRange.rangeEnd);
|
mergedRange.rangeEnd = std::max(mergedRange.rangeEnd, node.addressRange.rangeEnd);
|
||||||
|
|
||||||
|
payload.accessOps.set(node.payload.accessOps);
|
||||||
|
|
||||||
removeNode(nodeIndex, rootIndex);
|
removeNode(nodeIndex, rootIndex);
|
||||||
|
|
||||||
nodeIndex = findNode(range, rootIndex);
|
nodeIndex = findNode(range, rootIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
insertNode(mergedRange, rootIndex);
|
insertNode(mergedRange, rootIndex, payload);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,7 +186,8 @@ namespace dxvk {
|
||||||
|
|
||||||
uint32_t DxvkBarrierTracker::insertNode(
|
uint32_t DxvkBarrierTracker::insertNode(
|
||||||
const DxvkAddressRange& range,
|
const DxvkAddressRange& range,
|
||||||
uint32_t rootIndex) {
|
uint32_t rootIndex,
|
||||||
|
DxvkBarrierPayload payload) {
|
||||||
// Check if the given root is valid at all
|
// Check if the given root is valid at all
|
||||||
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
|
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
|
||||||
|
|
||||||
|
@ -178,6 +199,7 @@ namespace dxvk {
|
||||||
auto& node = m_nodes[rootIndex];
|
auto& node = m_nodes[rootIndex];
|
||||||
node.header = 0;
|
node.header = 0;
|
||||||
node.addressRange = range;
|
node.addressRange = range;
|
||||||
|
node.payload = payload;
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
// Traverse tree and abort if we find any range
|
// Traverse tree and abort if we find any range
|
||||||
|
@ -209,6 +231,7 @@ namespace dxvk {
|
||||||
node.setRed(true);
|
node.setRed(true);
|
||||||
node.setParent(parentIndex);
|
node.setParent(parentIndex);
|
||||||
node.addressRange = range;
|
node.addressRange = range;
|
||||||
|
node.payload = payload;
|
||||||
|
|
||||||
// Only do the fixup to maintain red-black properties if
|
// Only do the fixup to maintain red-black properties if
|
||||||
// we haven't marked the root node as red in a deletion.
|
// we haven't marked the root node as red in a deletion.
|
||||||
|
@ -238,6 +261,7 @@ namespace dxvk {
|
||||||
childIndex = m_nodes[childIndex].child(0);
|
childIndex = m_nodes[childIndex].child(0);
|
||||||
|
|
||||||
node.addressRange = m_nodes[childIndex].addressRange;
|
node.addressRange = m_nodes[childIndex].addressRange;
|
||||||
|
node.payload = m_nodes[childIndex].payload;
|
||||||
removeNode(childIndex, rootIndex);
|
removeNode(childIndex, rootIndex);
|
||||||
} else {
|
} else {
|
||||||
// Deletion is expected to be exceptionally rare, to the point of
|
// Deletion is expected to be exceptionally rare, to the point of
|
||||||
|
@ -268,6 +292,7 @@ namespace dxvk {
|
||||||
node.setRed(child.isRed());
|
node.setRed(child.isRed());
|
||||||
|
|
||||||
node.addressRange = child.addressRange;
|
node.addressRange = child.addressRange;
|
||||||
|
node.payload = child.payload;
|
||||||
|
|
||||||
if (cl) m_nodes[cl].setParent(nodeIndex);
|
if (cl) m_nodes[cl].setParent(nodeIndex);
|
||||||
if (cr) m_nodes[cr].setParent(nodeIndex);
|
if (cr) m_nodes[cr].setParent(nodeIndex);
|
||||||
|
@ -378,6 +403,7 @@ namespace dxvk {
|
||||||
node.setChild(1, rr);
|
node.setChild(1, rr);
|
||||||
|
|
||||||
std::swap(node.addressRange, m_nodes[r].addressRange);
|
std::swap(node.addressRange, m_nodes[r].addressRange);
|
||||||
|
std::swap(node.payload, m_nodes[r].payload);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -406,6 +432,7 @@ namespace dxvk {
|
||||||
node.setChild(1, l);
|
node.setChild(1, l);
|
||||||
|
|
||||||
std::swap(node.addressRange, m_nodes[l].addressRange);
|
std::swap(node.addressRange, m_nodes[l].addressRange);
|
||||||
|
std::swap(node.payload, m_nodes[l].payload);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,14 @@ namespace dxvk {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Barrier node payload
|
||||||
|
*/
|
||||||
|
struct DxvkBarrierPayload {
|
||||||
|
DxvkAccessOps accessOps = 0u;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Barrier tree node
|
* \brief Barrier tree node
|
||||||
*
|
*
|
||||||
|
@ -62,6 +70,9 @@ namespace dxvk {
|
||||||
// Address range of the node
|
// Address range of the node
|
||||||
DxvkAddressRange addressRange = { };
|
DxvkAddressRange addressRange = { };
|
||||||
|
|
||||||
|
// Node payload
|
||||||
|
DxvkBarrierPayload payload = { };
|
||||||
|
|
||||||
void setRed(bool red) {
|
void setRed(bool red) {
|
||||||
header &= ~uint64_t(1u);
|
header &= ~uint64_t(1u);
|
||||||
header |= uint64_t(red);
|
header |= uint64_t(red);
|
||||||
|
@ -117,21 +128,25 @@ namespace dxvk {
|
||||||
*
|
*
|
||||||
* \param [in] range Resource range
|
* \param [in] range Resource range
|
||||||
* \param [in] accessType Access type
|
* \param [in] accessType Access type
|
||||||
|
* \param [in] accessOp Access operation
|
||||||
* \returns \c true if the range has a pending access
|
* \returns \c true if the range has a pending access
|
||||||
*/
|
*/
|
||||||
bool findRange(
|
bool findRange(
|
||||||
const DxvkAddressRange& range,
|
const DxvkAddressRange& range,
|
||||||
DxvkAccess accessType) const;
|
DxvkAccess accessType,
|
||||||
|
DxvkAccessOp accessOp) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Inserts address range for a given access type
|
* \brief Inserts address range for a given access type
|
||||||
*
|
*
|
||||||
* \param [in] range Resource range
|
* \param [in] range Resource range
|
||||||
* \param [in] accessType Access type
|
* \param [in] accessType Access type
|
||||||
|
* \param [in] accessOp Access operation
|
||||||
*/
|
*/
|
||||||
void insertRange(
|
void insertRange(
|
||||||
const DxvkAddressRange& range,
|
const DxvkAddressRange& range,
|
||||||
DxvkAccess accessType);
|
DxvkAccess accessType,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Clears the entire structure
|
* \brief Clears the entire structure
|
||||||
|
@ -166,7 +181,8 @@ namespace dxvk {
|
||||||
|
|
||||||
uint32_t insertNode(
|
uint32_t insertNode(
|
||||||
const DxvkAddressRange& range,
|
const DxvkAddressRange& range,
|
||||||
uint32_t rootIndex);
|
uint32_t rootIndex,
|
||||||
|
DxvkBarrierPayload payload);
|
||||||
|
|
||||||
void removeNode(
|
void removeNode(
|
||||||
uint32_t nodeIndex,
|
uint32_t nodeIndex,
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -763,11 +763,14 @@ namespace dxvk {
|
||||||
* \param [in] offset Draw buffer offset
|
* \param [in] offset Draw buffer offset
|
||||||
* \param [in] count Number of draws
|
* \param [in] count Number of draws
|
||||||
* \param [in] stride Stride between dispatch calls
|
* \param [in] stride Stride between dispatch calls
|
||||||
|
* \param [in] unroll Whether to unroll multiple draws if
|
||||||
|
* there are any potential data dependencies between them.
|
||||||
*/
|
*/
|
||||||
void drawIndirect(
|
void drawIndirect(
|
||||||
VkDeviceSize offset,
|
VkDeviceSize offset,
|
||||||
uint32_t count,
|
uint32_t count,
|
||||||
uint32_t stride);
|
uint32_t stride,
|
||||||
|
bool unroll);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Indirect draw call
|
* \brief Indirect draw call
|
||||||
|
@ -809,11 +812,14 @@ namespace dxvk {
|
||||||
* \param [in] offset Draw buffer offset
|
* \param [in] offset Draw buffer offset
|
||||||
* \param [in] count Number of draws
|
* \param [in] count Number of draws
|
||||||
* \param [in] stride Stride between dispatch calls
|
* \param [in] stride Stride between dispatch calls
|
||||||
|
* \param [in] unroll Whether to unroll multiple draws if
|
||||||
|
* there are any potential data dependencies between them.
|
||||||
*/
|
*/
|
||||||
void drawIndexedIndirect(
|
void drawIndexedIndirect(
|
||||||
VkDeviceSize offset,
|
VkDeviceSize offset,
|
||||||
uint32_t count,
|
uint32_t count,
|
||||||
uint32_t stride);
|
uint32_t stride,
|
||||||
|
bool unroll);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Indirect indexed draw call
|
* \brief Indirect indexed draw call
|
||||||
|
@ -832,14 +838,14 @@ namespace dxvk {
|
||||||
uint32_t stride);
|
uint32_t stride);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Transform feddback draw call
|
* \brief Transform feedback draw call
|
||||||
|
*
|
||||||
* \param [in] counterBuffer Xfb counter buffer
|
* \param [in] counterOffset Draw count offset
|
||||||
* \param [in] counterDivisor Vertex stride
|
* \param [in] counterDivisor Vertex stride
|
||||||
* \param [in] counterBias Counter bias
|
* \param [in] counterBias Counter bias
|
||||||
*/
|
*/
|
||||||
void drawIndirectXfb(
|
void drawIndirectXfb(
|
||||||
const DxvkBufferSlice& counterBuffer,
|
VkDeviceSize counterOffset,
|
||||||
uint32_t counterDivisor,
|
uint32_t counterDivisor,
|
||||||
uint32_t counterBias);
|
uint32_t counterBias);
|
||||||
|
|
||||||
|
@ -1435,8 +1441,8 @@ namespace dxvk {
|
||||||
|
|
||||||
DxvkGpuQueryManager m_queryManager;
|
DxvkGpuQueryManager m_queryManager;
|
||||||
|
|
||||||
DxvkGlobalPipelineBarrier m_globalRoGraphicsBarrier;
|
DxvkGlobalPipelineBarrier m_renderPassBarrierSrc = { };
|
||||||
DxvkGlobalPipelineBarrier m_globalRwGraphicsBarrier;
|
DxvkGlobalPipelineBarrier m_renderPassBarrierDst = { };
|
||||||
|
|
||||||
DxvkRenderTargetLayouts m_rtLayouts = { };
|
DxvkRenderTargetLayouts m_rtLayouts = { };
|
||||||
|
|
||||||
|
@ -1453,7 +1459,6 @@ namespace dxvk {
|
||||||
std::vector<VkImageMemoryBarrier2> m_imageLayoutTransitions;
|
std::vector<VkImageMemoryBarrier2> m_imageLayoutTransitions;
|
||||||
|
|
||||||
std::vector<util::DxvkDebugLabel> m_debugLabelStack;
|
std::vector<util::DxvkDebugLabel> m_debugLabelStack;
|
||||||
bool m_debugLabelInternalActive = false;
|
|
||||||
|
|
||||||
Rc<DxvkLatencyTracker> m_latencyTracker;
|
Rc<DxvkLatencyTracker> m_latencyTracker;
|
||||||
uint64_t m_latencyFrameId = 0u;
|
uint64_t m_latencyFrameId = 0u;
|
||||||
|
@ -1590,6 +1595,20 @@ namespace dxvk {
|
||||||
const Rc<DxvkBuffer>& buffer,
|
const Rc<DxvkBuffer>& buffer,
|
||||||
VkDeviceSize offset);
|
VkDeviceSize offset);
|
||||||
|
|
||||||
|
template<bool Indexed>
|
||||||
|
void drawIndirectGeneric(
|
||||||
|
VkDeviceSize offset,
|
||||||
|
uint32_t count,
|
||||||
|
uint32_t stride,
|
||||||
|
bool unroll);
|
||||||
|
|
||||||
|
template<bool Indexed>
|
||||||
|
void drawIndirectCountGeneric(
|
||||||
|
VkDeviceSize offset,
|
||||||
|
VkDeviceSize countOffset,
|
||||||
|
uint32_t maxCount,
|
||||||
|
uint32_t stride);
|
||||||
|
|
||||||
void resolveImageHw(
|
void resolveImageHw(
|
||||||
const Rc<DxvkImage>& dstImage,
|
const Rc<DxvkImage>& dstImage,
|
||||||
const Rc<DxvkImage>& srcImage,
|
const Rc<DxvkImage>& srcImage,
|
||||||
|
@ -1691,7 +1710,9 @@ namespace dxvk {
|
||||||
|
|
||||||
void unbindGraphicsPipeline();
|
void unbindGraphicsPipeline();
|
||||||
bool updateGraphicsPipeline();
|
bool updateGraphicsPipeline();
|
||||||
bool updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier);
|
bool updateGraphicsPipelineState();
|
||||||
|
|
||||||
|
uint32_t getGraphicsPipelineDebugColor() const;
|
||||||
|
|
||||||
template<VkPipelineBindPoint BindPoint>
|
template<VkPipelineBindPoint BindPoint>
|
||||||
void resetSpecConstants(
|
void resetSpecConstants(
|
||||||
|
@ -1757,34 +1778,61 @@ namespace dxvk {
|
||||||
template<bool Indexed, bool Indirect>
|
template<bool Indexed, bool Indirect>
|
||||||
bool commitGraphicsState();
|
bool commitGraphicsState();
|
||||||
|
|
||||||
template<bool DoEmit>
|
template<VkPipelineBindPoint BindPoint>
|
||||||
void commitComputeBarriers();
|
bool checkResourceHazards(
|
||||||
|
const DxvkBindingLayout& layout,
|
||||||
|
uint32_t setMask);
|
||||||
|
|
||||||
void commitComputePostBarriers();
|
bool checkComputeHazards();
|
||||||
|
|
||||||
template<bool Indexed, bool Indirect, bool DoEmit>
|
template<bool Indexed, bool Indirect>
|
||||||
void commitGraphicsBarriers();
|
bool checkGraphicsHazards();
|
||||||
|
|
||||||
template<bool DoEmit>
|
template<VkPipelineBindPoint BindPoint>
|
||||||
bool checkBufferBarrier(
|
bool checkBufferBarrier(
|
||||||
const DxvkBufferSlice& bufferSlice,
|
const DxvkBufferSlice& bufferSlice,
|
||||||
VkPipelineStageFlags stages,
|
VkAccessFlags access,
|
||||||
VkAccessFlags access);
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
template<bool DoEmit>
|
template<VkPipelineBindPoint BindPoint>
|
||||||
bool checkBufferViewBarrier(
|
bool checkBufferViewBarrier(
|
||||||
const Rc<DxvkBufferView>& bufferView,
|
const Rc<DxvkBufferView>& bufferView,
|
||||||
VkPipelineStageFlags stages,
|
VkAccessFlags access,
|
||||||
VkAccessFlags access);
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
template<bool DoEmit>
|
template<VkPipelineBindPoint BindPoint>
|
||||||
bool checkImageViewBarrier(
|
bool checkImageViewBarrier(
|
||||||
const Rc<DxvkImageView>& imageView,
|
const Rc<DxvkImageView>& imageView,
|
||||||
VkPipelineStageFlags stages,
|
VkAccessFlags access,
|
||||||
VkAccessFlags access);
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
|
template<VkPipelineBindPoint BindPoint>
|
||||||
|
DxvkAccessFlags getAllowedStorageHazards() {
|
||||||
|
if (m_barrierControl.isClear() || m_flags.test(DxvkContextFlag::ForceWriteAfterWriteSync))
|
||||||
|
return DxvkAccessFlags();
|
||||||
|
|
||||||
|
if constexpr (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE) {
|
||||||
|
// If there are any pending accesses that are not directly related
|
||||||
|
// to shader dispatches, always insert a barrier if there is a hazard.
|
||||||
|
VkPipelineStageFlags2 stageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
|
||||||
|
| VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
|
||||||
|
|
||||||
|
if (!m_execBarriers.hasPendingStages(~stageMask)) {
|
||||||
|
if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowReadWriteOverlap))
|
||||||
|
return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read);
|
||||||
|
else if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap))
|
||||||
|
return DxvkAccessFlags(DxvkAccess::Write);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For graphics, the only type of unrelated access we have to worry about
|
||||||
|
// is transform feedback writes, in which case inserting a barrier is fine.
|
||||||
|
if (m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap))
|
||||||
|
return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read);
|
||||||
|
}
|
||||||
|
|
||||||
|
return DxvkAccessFlags();
|
||||||
|
}
|
||||||
|
|
||||||
bool canIgnoreWawHazards(
|
|
||||||
VkPipelineStageFlags stages);
|
|
||||||
|
|
||||||
void emitMemoryBarrier(
|
void emitMemoryBarrier(
|
||||||
VkPipelineStageFlags srcStages,
|
VkPipelineStageFlags srcStages,
|
||||||
|
@ -1876,7 +1924,15 @@ namespace dxvk {
|
||||||
const VkImageSubresourceRange& subresources,
|
const VkImageSubresourceRange& subresources,
|
||||||
VkImageLayout srcLayout,
|
VkImageLayout srcLayout,
|
||||||
VkPipelineStageFlags2 srcStages,
|
VkPipelineStageFlags2 srcStages,
|
||||||
VkAccessFlags2 srcAccess);
|
VkAccessFlags2 srcAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
|
void accessImage(
|
||||||
|
DxvkCmdBuffer cmdBuffer,
|
||||||
|
const DxvkImageView& imageView,
|
||||||
|
VkPipelineStageFlags2 srcStages,
|
||||||
|
VkAccessFlags2 srcAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
void accessImage(
|
void accessImage(
|
||||||
DxvkCmdBuffer cmdBuffer,
|
DxvkCmdBuffer cmdBuffer,
|
||||||
|
@ -1887,7 +1943,8 @@ namespace dxvk {
|
||||||
VkAccessFlags2 srcAccess,
|
VkAccessFlags2 srcAccess,
|
||||||
VkImageLayout dstLayout,
|
VkImageLayout dstLayout,
|
||||||
VkPipelineStageFlags2 dstStages,
|
VkPipelineStageFlags2 dstStages,
|
||||||
VkAccessFlags2 dstAccess);
|
VkAccessFlags2 dstAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
void accessBuffer(
|
void accessBuffer(
|
||||||
DxvkCmdBuffer cmdBuffer,
|
DxvkCmdBuffer cmdBuffer,
|
||||||
|
@ -1895,7 +1952,8 @@ namespace dxvk {
|
||||||
VkDeviceSize offset,
|
VkDeviceSize offset,
|
||||||
VkDeviceSize size,
|
VkDeviceSize size,
|
||||||
VkPipelineStageFlags2 srcStages,
|
VkPipelineStageFlags2 srcStages,
|
||||||
VkAccessFlags2 srcAccess);
|
VkAccessFlags2 srcAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
void accessBuffer(
|
void accessBuffer(
|
||||||
DxvkCmdBuffer cmdBuffer,
|
DxvkCmdBuffer cmdBuffer,
|
||||||
|
@ -1905,13 +1963,31 @@ namespace dxvk {
|
||||||
VkPipelineStageFlags2 srcStages,
|
VkPipelineStageFlags2 srcStages,
|
||||||
VkAccessFlags2 srcAccess,
|
VkAccessFlags2 srcAccess,
|
||||||
VkPipelineStageFlags2 dstStages,
|
VkPipelineStageFlags2 dstStages,
|
||||||
VkAccessFlags2 dstAccess);
|
VkAccessFlags2 dstAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
|
void accessBuffer(
|
||||||
|
DxvkCmdBuffer cmdBuffer,
|
||||||
|
const DxvkBufferSlice& bufferSlice,
|
||||||
|
VkPipelineStageFlags2 srcStages,
|
||||||
|
VkAccessFlags2 srcAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
|
void accessBuffer(
|
||||||
|
DxvkCmdBuffer cmdBuffer,
|
||||||
|
const DxvkBufferSlice& bufferSlice,
|
||||||
|
VkPipelineStageFlags2 srcStages,
|
||||||
|
VkAccessFlags2 srcAccess,
|
||||||
|
VkPipelineStageFlags2 dstStages,
|
||||||
|
VkAccessFlags2 dstAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
void accessBuffer(
|
void accessBuffer(
|
||||||
DxvkCmdBuffer cmdBuffer,
|
DxvkCmdBuffer cmdBuffer,
|
||||||
DxvkBufferView& bufferView,
|
DxvkBufferView& bufferView,
|
||||||
VkPipelineStageFlags2 srcStages,
|
VkPipelineStageFlags2 srcStages,
|
||||||
VkAccessFlags2 srcAccess);
|
VkAccessFlags2 srcAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
void accessBuffer(
|
void accessBuffer(
|
||||||
DxvkCmdBuffer cmdBuffer,
|
DxvkCmdBuffer cmdBuffer,
|
||||||
|
@ -1919,7 +1995,17 @@ namespace dxvk {
|
||||||
VkPipelineStageFlags2 srcStages,
|
VkPipelineStageFlags2 srcStages,
|
||||||
VkAccessFlags2 srcAccess,
|
VkAccessFlags2 srcAccess,
|
||||||
VkPipelineStageFlags2 dstStages,
|
VkPipelineStageFlags2 dstStages,
|
||||||
VkAccessFlags2 dstAccess);
|
VkAccessFlags2 dstAccess,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
|
void accessDrawBuffer(
|
||||||
|
VkDeviceSize offset,
|
||||||
|
uint32_t count,
|
||||||
|
uint32_t stride,
|
||||||
|
uint32_t size);
|
||||||
|
|
||||||
|
void accessDrawCountBuffer(
|
||||||
|
VkDeviceSize offset);
|
||||||
|
|
||||||
void flushPendingAccesses(
|
void flushPendingAccesses(
|
||||||
DxvkBuffer& buffer,
|
DxvkBuffer& buffer,
|
||||||
|
@ -1946,20 +2032,24 @@ namespace dxvk {
|
||||||
DxvkBuffer& buffer,
|
DxvkBuffer& buffer,
|
||||||
VkDeviceSize offset,
|
VkDeviceSize offset,
|
||||||
VkDeviceSize size,
|
VkDeviceSize size,
|
||||||
DxvkAccess access);
|
DxvkAccess access,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
bool resourceHasAccess(
|
bool resourceHasAccess(
|
||||||
DxvkBufferView& bufferView,
|
DxvkBufferView& bufferView,
|
||||||
DxvkAccess access);
|
DxvkAccess access,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
bool resourceHasAccess(
|
bool resourceHasAccess(
|
||||||
DxvkImage& image,
|
DxvkImage& image,
|
||||||
const VkImageSubresourceRange& subresources,
|
const VkImageSubresourceRange& subresources,
|
||||||
DxvkAccess access);
|
DxvkAccess access,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
bool resourceHasAccess(
|
bool resourceHasAccess(
|
||||||
DxvkImageView& imageView,
|
DxvkImageView& imageView,
|
||||||
DxvkAccess access);
|
DxvkAccess access,
|
||||||
|
DxvkAccessOp accessOp);
|
||||||
|
|
||||||
DxvkBarrierBatch& getBarrierBatch(
|
DxvkBarrierBatch& getBarrierBatch(
|
||||||
DxvkCmdBuffer cmdBuffer);
|
DxvkCmdBuffer cmdBuffer);
|
||||||
|
@ -1980,34 +2070,55 @@ namespace dxvk {
|
||||||
const Rc<DxvkImage>& image,
|
const Rc<DxvkImage>& image,
|
||||||
DxvkAccess access);
|
DxvkAccess access);
|
||||||
|
|
||||||
template<typename Pred>
|
template<VkPipelineBindPoint BindPoint, typename Pred>
|
||||||
bool checkResourceBarrier(
|
bool checkResourceBarrier(
|
||||||
const Pred& pred,
|
const Pred& pred,
|
||||||
VkPipelineStageFlags stages,
|
|
||||||
VkAccessFlags access) {
|
VkAccessFlags access) {
|
||||||
// Check for read-after-write first, this is common
|
// If we're only reading the resource, only pending
|
||||||
|
// writes matter for synchronization purposes.
|
||||||
bool hasPendingWrite = pred(DxvkAccess::Write);
|
bool hasPendingWrite = pred(DxvkAccess::Write);
|
||||||
|
|
||||||
if (access & vk::AccessReadMask)
|
if (!(access & vk::AccessWriteMask))
|
||||||
return hasPendingWrite;
|
return hasPendingWrite;
|
||||||
|
|
||||||
// Check for a write-after-write hazard, but
|
if (hasPendingWrite) {
|
||||||
// ignore it if there are no reads involved.
|
// If there is a write-after-write hazard and synchronization
|
||||||
bool ignoreWaW = canIgnoreWawHazards(stages);
|
// for those is not explicitly disabled, insert a barrier.
|
||||||
|
DxvkAccessFlags allowedHazards = getAllowedStorageHazards<BindPoint>();
|
||||||
|
|
||||||
if (hasPendingWrite && !ignoreWaW)
|
if (!allowedHazards.test(DxvkAccess::Write))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// Check whether there are any pending reads.
|
// Skip barrier if overlapping read-modify-write ops are allowed.
|
||||||
|
// This includes shader atomics, but also non-atomic load-stores.
|
||||||
|
if (allowedHazards.test(DxvkAccess::Read))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Otherwise, check if there is a read-after-write hazard.
|
||||||
|
if (access & vk::AccessReadMask)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if there are any pending reads to avoid write-after-read issues.
|
||||||
return pred(DxvkAccess::Read);
|
return pred(DxvkAccess::Read);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void invalidateWriteAfterWriteTracking();
|
||||||
|
|
||||||
void beginRenderPassDebugRegion();
|
void beginRenderPassDebugRegion();
|
||||||
|
|
||||||
void beginInternalDebugRegion(
|
template<VkPipelineBindPoint BindPoint>
|
||||||
const VkDebugUtilsLabelEXT& label);
|
void beginBarrierControlDebugRegion();
|
||||||
|
|
||||||
void endInternalDebugRegion();
|
void pushDebugRegion(
|
||||||
|
const VkDebugUtilsLabelEXT& label,
|
||||||
|
util::DxvkDebugLabelType type);
|
||||||
|
|
||||||
|
void popDebugRegion(
|
||||||
|
util::DxvkDebugLabelType type);
|
||||||
|
|
||||||
|
bool hasDebugRegion(
|
||||||
|
util::DxvkDebugLabelType type);
|
||||||
|
|
||||||
void beginActiveDebugRegions();
|
void beginActiveDebugRegions();
|
||||||
|
|
||||||
|
|
|
@ -20,10 +20,11 @@ namespace dxvk {
|
||||||
* of the graphics and compute pipelines
|
* of the graphics and compute pipelines
|
||||||
* has changed and/or needs to be updated.
|
* has changed and/or needs to be updated.
|
||||||
*/
|
*/
|
||||||
enum class DxvkContextFlag : uint32_t {
|
enum class DxvkContextFlag : uint64_t {
|
||||||
GpRenderPassBound, ///< Render pass is currently bound
|
GpRenderPassBound, ///< Render pass is currently bound
|
||||||
GpRenderPassSuspended, ///< Render pass is currently suspended
|
GpRenderPassSuspended, ///< Render pass is currently suspended
|
||||||
GpRenderPassSecondaryCmd, ///< Render pass uses secondary command buffer
|
GpRenderPassSecondaryCmd, ///< Render pass uses secondary command buffer
|
||||||
|
GpRenderPassSideEffects, ///< Render pass has side effects
|
||||||
GpXfbActive, ///< Transform feedback is enabled
|
GpXfbActive, ///< Transform feedback is enabled
|
||||||
GpDirtyFramebuffer, ///< Framebuffer binding is out of date
|
GpDirtyFramebuffer, ///< Framebuffer binding is out of date
|
||||||
GpDirtyPipeline, ///< Graphics pipeline binding is out of date
|
GpDirtyPipeline, ///< Graphics pipeline binding is out of date
|
||||||
|
@ -56,10 +57,12 @@ namespace dxvk {
|
||||||
DirtyDrawBuffer, ///< Indirect argument buffer is dirty
|
DirtyDrawBuffer, ///< Indirect argument buffer is dirty
|
||||||
DirtyPushConstants, ///< Push constant data has changed
|
DirtyPushConstants, ///< Push constant data has changed
|
||||||
|
|
||||||
|
ForceWriteAfterWriteSync, ///< Ignores barrier control flags for write-after-write hazards
|
||||||
|
|
||||||
Count
|
Count
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(uint32_t(DxvkContextFlag::Count) <= 32u);
|
static_assert(uint32_t(DxvkContextFlag::Count) <= 64u);
|
||||||
|
|
||||||
using DxvkContextFlags = Flags<DxvkContextFlag>;
|
using DxvkContextFlags = Flags<DxvkContextFlag>;
|
||||||
|
|
||||||
|
@ -85,8 +88,11 @@ namespace dxvk {
|
||||||
* synchronize implicitly.
|
* synchronize implicitly.
|
||||||
*/
|
*/
|
||||||
enum class DxvkBarrierControl : uint32_t {
|
enum class DxvkBarrierControl : uint32_t {
|
||||||
IgnoreWriteAfterWrite = 1,
|
// Ignores write-after-write hazard
|
||||||
IgnoreGraphicsBarriers = 2,
|
ComputeAllowWriteOnlyOverlap = 0,
|
||||||
|
ComputeAllowReadWriteOverlap = 1,
|
||||||
|
|
||||||
|
GraphicsAllowReadWriteOverlap = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
using DxvkBarrierControlFlags = Flags<DxvkBarrierControl>;
|
using DxvkBarrierControlFlags = Flags<DxvkBarrierControl>;
|
||||||
|
|
|
@ -309,15 +309,16 @@ namespace dxvk {
|
||||||
|
|
||||||
|
|
||||||
Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
|
Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
|
||||||
const Rc<Presenter>& presenter) {
|
const Rc<Presenter>& presenter,
|
||||||
|
uint64_t firstFrameId ) {
|
||||||
if (m_options.latencySleep == Tristate::False)
|
if (m_options.latencySleep == Tristate::False)
|
||||||
return new FramePacer(m_options);
|
return new FramePacer(m_options, firstFrameId);
|
||||||
|
|
||||||
if (m_options.latencySleep == Tristate::Auto) {
|
if (m_options.latencySleep == Tristate::Auto) {
|
||||||
if (m_features.nvLowLatency2)
|
if (m_features.nvLowLatency2)
|
||||||
return new DxvkReflexLatencyTrackerNv(presenter);
|
return new DxvkReflexLatencyTrackerNv(presenter);
|
||||||
else
|
else
|
||||||
return new FramePacer(m_options);
|
return new FramePacer(m_options, firstFrameId);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new DxvkBuiltInLatencyTracker(presenter,
|
return new DxvkBuiltInLatencyTracker(presenter,
|
||||||
|
|
|
@ -489,7 +489,8 @@ namespace dxvk {
|
||||||
* \param [in] presenter Presenter instance
|
* \param [in] presenter Presenter instance
|
||||||
*/
|
*/
|
||||||
Rc<DxvkLatencyTracker> createLatencyTracker(
|
Rc<DxvkLatencyTracker> createLatencyTracker(
|
||||||
const Rc<Presenter>& presenter);
|
const Rc<Presenter>& presenter,
|
||||||
|
uint64_t firstFrameId = 17);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Presents a swap chain image
|
* \brief Presents a swap chain image
|
||||||
|
|
|
@ -955,8 +955,7 @@ namespace dxvk {
|
||||||
if (m_shaders.gs->flags().test(DxvkShaderFlag::HasTransformFeedback)) {
|
if (m_shaders.gs->flags().test(DxvkShaderFlag::HasTransformFeedback)) {
|
||||||
m_flags.set(DxvkGraphicsPipelineFlag::HasTransformFeedback);
|
m_flags.set(DxvkGraphicsPipelineFlag::HasTransformFeedback);
|
||||||
|
|
||||||
m_barrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
m_barrier.stages |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
|
||||||
| VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
|
|
||||||
m_barrier.access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
|
m_barrier.access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
|
||||||
| VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
|
| VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
|
||||||
| VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT;
|
| VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT;
|
||||||
|
@ -966,9 +965,13 @@ namespace dxvk {
|
||||||
m_flags.set(DxvkGraphicsPipelineFlag::HasRasterizerDiscard);
|
m_flags.set(DxvkGraphicsPipelineFlag::HasRasterizerDiscard);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_barrier.access & VK_ACCESS_SHADER_WRITE_BIT)
|
if (m_barrier.access & VK_ACCESS_SHADER_WRITE_BIT) {
|
||||||
m_flags.set(DxvkGraphicsPipelineFlag::HasStorageDescriptors);
|
m_flags.set(DxvkGraphicsPipelineFlag::HasStorageDescriptors);
|
||||||
|
|
||||||
|
if (layout->layout().getHazardousSetMask())
|
||||||
|
m_flags.set(DxvkGraphicsPipelineFlag::UnrollMergedDraws);
|
||||||
|
}
|
||||||
|
|
||||||
if (m_shaders.fs != nullptr) {
|
if (m_shaders.fs != nullptr) {
|
||||||
if (m_shaders.fs->flags().test(DxvkShaderFlag::HasSampleRateShading))
|
if (m_shaders.fs->flags().test(DxvkShaderFlag::HasSampleRateShading))
|
||||||
m_flags.set(DxvkGraphicsPipelineFlag::HasSampleRateShading);
|
m_flags.set(DxvkGraphicsPipelineFlag::HasSampleRateShading);
|
||||||
|
|
|
@ -31,6 +31,7 @@ namespace dxvk {
|
||||||
HasStorageDescriptors,
|
HasStorageDescriptors,
|
||||||
HasSampleRateShading,
|
HasSampleRateShading,
|
||||||
HasSampleMaskExport,
|
HasSampleMaskExport,
|
||||||
|
UnrollMergedDraws,
|
||||||
};
|
};
|
||||||
|
|
||||||
using DxvkGraphicsPipelineFlags = Flags<DxvkGraphicsPipelineFlag>;
|
using DxvkGraphicsPipelineFlags = Flags<DxvkGraphicsPipelineFlag>;
|
||||||
|
|
|
@ -128,7 +128,8 @@ namespace dxvk {
|
||||||
virtual void notifyCpuPresentEnd(
|
virtual void notifyCpuPresentEnd(
|
||||||
uint64_t frameId) = 0;
|
uint64_t frameId) = 0;
|
||||||
|
|
||||||
virtual void notifySubmit() { }
|
virtual void notifySubmit(
|
||||||
|
uint64_t frameId) { }
|
||||||
virtual void notifyPresent(
|
virtual void notifyPresent(
|
||||||
uint64_t frameId) { }
|
uint64_t frameId) { }
|
||||||
|
|
||||||
|
|
|
@ -205,7 +205,7 @@ namespace dxvk {
|
||||||
|
|
||||||
|
|
||||||
DxvkBindingLayout::DxvkBindingLayout(VkShaderStageFlags stages)
|
DxvkBindingLayout::DxvkBindingLayout(VkShaderStageFlags stages)
|
||||||
: m_pushConst { 0, 0, 0 }, m_pushConstStages(0), m_stages(stages) {
|
: m_pushConst { 0, 0, 0 }, m_pushConstStages(0), m_stages(stages), m_hazards(0u) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,6 +236,9 @@ namespace dxvk {
|
||||||
void DxvkBindingLayout::addBinding(const DxvkBindingInfo& binding) {
|
void DxvkBindingLayout::addBinding(const DxvkBindingInfo& binding) {
|
||||||
uint32_t set = binding.computeSetIndex();
|
uint32_t set = binding.computeSetIndex();
|
||||||
m_bindings[set].addBinding(binding);
|
m_bindings[set].addBinding(binding);
|
||||||
|
|
||||||
|
if ((binding.access & VK_ACCESS_2_SHADER_WRITE_BIT) && binding.accessOp == DxvkAccessOp::None)
|
||||||
|
m_hazards |= 1u << set;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -260,6 +263,8 @@ namespace dxvk {
|
||||||
|
|
||||||
addPushConstantRange(layout.m_pushConst);
|
addPushConstantRange(layout.m_pushConst);
|
||||||
m_pushConstStages |= layout.m_pushConstStages;
|
m_pushConstStages |= layout.m_pushConstStages;
|
||||||
|
|
||||||
|
m_hazards |= layout.m_hazards;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,27 @@ namespace dxvk {
|
||||||
|
|
||||||
class DxvkDevice;
|
class DxvkDevice;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Order-invariant atomic access operation
|
||||||
|
*
|
||||||
|
* Information used to optimize barriers when a resource
|
||||||
|
* is accessed exlusively via order-invariant stores.
|
||||||
|
*/
|
||||||
|
enum class DxvkAccessOp : uint32_t {
|
||||||
|
None = 0,
|
||||||
|
Or = 1,
|
||||||
|
And = 2,
|
||||||
|
Xor = 3,
|
||||||
|
Add = 4,
|
||||||
|
IMin = 5,
|
||||||
|
IMax = 6,
|
||||||
|
UMin = 7,
|
||||||
|
UMax = 8,
|
||||||
|
};
|
||||||
|
|
||||||
|
using DxvkAccessOps = Flags<DxvkAccessOp>;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Descriptor set indices
|
* \brief Descriptor set indices
|
||||||
*/
|
*/
|
||||||
|
@ -37,6 +58,7 @@ namespace dxvk {
|
||||||
VkShaderStageFlagBits stage; ///< Shader stage
|
VkShaderStageFlagBits stage; ///< Shader stage
|
||||||
VkAccessFlags access; ///< Access mask for the resource
|
VkAccessFlags access; ///< Access mask for the resource
|
||||||
VkBool32 uboSet; ///< Whether to include this in the UBO set
|
VkBool32 uboSet; ///< Whether to include this in the UBO set
|
||||||
|
DxvkAccessOp accessOp; ///< Order-invariant store type, if any
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Computes descriptor set index for the given binding
|
* \brief Computes descriptor set index for the given binding
|
||||||
|
@ -315,6 +337,16 @@ namespace dxvk {
|
||||||
return m_stages;
|
return m_stages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Queries hazardous sets
|
||||||
|
*
|
||||||
|
* \returns Mask of sets with storage descriptors
|
||||||
|
* that are not accessed in an order-invariant way.
|
||||||
|
*/
|
||||||
|
uint32_t getHazardousSetMask() const {
|
||||||
|
return m_hazards;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Queries defined descriptor set layouts
|
* \brief Queries defined descriptor set layouts
|
||||||
*
|
*
|
||||||
|
@ -372,6 +404,7 @@ namespace dxvk {
|
||||||
VkPushConstantRange m_pushConst;
|
VkPushConstantRange m_pushConst;
|
||||||
VkShaderStageFlags m_pushConstStages;
|
VkShaderStageFlags m_pushConstStages;
|
||||||
VkShaderStageFlags m_stages;
|
VkShaderStageFlags m_stages;
|
||||||
|
uint32_t m_hazards;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ namespace dxvk {
|
||||||
DxvkLatencyInfo latencyInfo,
|
DxvkLatencyInfo latencyInfo,
|
||||||
DxvkSubmitStatus* status) {
|
DxvkSubmitStatus* status) {
|
||||||
if (latencyInfo.tracker)
|
if (latencyInfo.tracker)
|
||||||
latencyInfo.tracker->notifySubmit();
|
latencyInfo.tracker->notifySubmit(latencyInfo.frameId);
|
||||||
std::unique_lock<dxvk::mutex> lock(m_mutex);
|
std::unique_lock<dxvk::mutex> lock(m_mutex);
|
||||||
|
|
||||||
m_finishCond.wait(lock, [this] {
|
m_finishCond.wait(lock, [this] {
|
||||||
|
|
|
@ -580,6 +580,27 @@ namespace dxvk {
|
||||||
m_trackId = 0u;
|
m_trackId = 0u;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Checks whether the buffer has been used for gfx stores
|
||||||
|
*
|
||||||
|
* \returns \c true if any graphics pipeline has written this
|
||||||
|
* resource via transform feedback or a storage descriptor.
|
||||||
|
*/
|
||||||
|
bool hasGfxStores() const {
|
||||||
|
return m_hasGfxStores;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Tracks graphics pipeline side effects
|
||||||
|
*
|
||||||
|
* Must be called whenever the resource is written via graphics
|
||||||
|
* pipeline storage descriptors or transform feedback.
|
||||||
|
* \returns \c true if side effects were already tracked.
|
||||||
|
*/
|
||||||
|
bool trackGfxStores() {
|
||||||
|
return std::exchange(m_hasGfxStores, true);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Queries sparse page table
|
* \brief Queries sparse page table
|
||||||
*
|
*
|
||||||
|
@ -622,6 +643,8 @@ namespace dxvk {
|
||||||
uint64_t m_trackId = { 0u };
|
uint64_t m_trackId = { 0u };
|
||||||
uint64_t m_cookie = { 0u };
|
uint64_t m_cookie = { 0u };
|
||||||
|
|
||||||
|
bool m_hasGfxStores = false;
|
||||||
|
|
||||||
static constexpr uint64_t getIncrement(DxvkAccess access) {
|
static constexpr uint64_t getIncrement(DxvkAccess access) {
|
||||||
return uint64_t(1u) << (uint32_t(access) * 20u);
|
return uint64_t(1u) << (uint32_t(access) * 20u);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,15 @@
|
||||||
|
|
||||||
namespace dxvk::util {
|
namespace dxvk::util {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Debug utils label type
|
||||||
|
*/
|
||||||
|
enum class DxvkDebugLabelType : uint32_t {
|
||||||
|
External, ///< App-provided scope
|
||||||
|
InternalRenderPass, ///< Internal render pass markers
|
||||||
|
InternalBarrierControl, ///< Barrier control markers
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Debug label wrapper
|
* \brief Debug label wrapper
|
||||||
*
|
*
|
||||||
|
@ -16,12 +25,16 @@ namespace dxvk::util {
|
||||||
|
|
||||||
DxvkDebugLabel() = default;
|
DxvkDebugLabel() = default;
|
||||||
|
|
||||||
DxvkDebugLabel(const VkDebugUtilsLabelEXT& label)
|
DxvkDebugLabel(const VkDebugUtilsLabelEXT& label, DxvkDebugLabelType type)
|
||||||
: m_text(label.pLabelName ? label.pLabelName : "") {
|
: m_text(label.pLabelName ? label.pLabelName : ""), m_type(type) {
|
||||||
for (uint32_t i = 0; i < m_color.size(); i++)
|
for (uint32_t i = 0; i < m_color.size(); i++)
|
||||||
m_color[i] = label.color[i];
|
m_color[i] = label.color[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DxvkDebugLabelType type() const {
|
||||||
|
return m_type;
|
||||||
|
}
|
||||||
|
|
||||||
VkDebugUtilsLabelEXT get() const {
|
VkDebugUtilsLabelEXT get() const {
|
||||||
VkDebugUtilsLabelEXT label = { VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
|
VkDebugUtilsLabelEXT label = { VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
|
||||||
label.pLabelName = m_text.c_str();
|
label.pLabelName = m_text.c_str();
|
||||||
|
@ -34,6 +47,7 @@ namespace dxvk::util {
|
||||||
|
|
||||||
std::string m_text;
|
std::string m_text;
|
||||||
std::array<float, 4> m_color = { };
|
std::array<float, 4> m_color = { };
|
||||||
|
DxvkDebugLabelType m_type;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,14 @@
|
||||||
#include "dxvk_framepacer_mode_low_latency.h"
|
#include "dxvk_framepacer_mode_low_latency.h"
|
||||||
#include "dxvk_framepacer_mode_min_latency.h"
|
#include "dxvk_framepacer_mode_min_latency.h"
|
||||||
#include "dxvk_options.h"
|
#include "dxvk_options.h"
|
||||||
|
#include "../../util/util_flush.h"
|
||||||
#include "../../util/util_env.h"
|
#include "../../util/util_env.h"
|
||||||
#include "../../util/log/log.h"
|
#include "../../util/log/log.h"
|
||||||
|
|
||||||
namespace dxvk {
|
namespace dxvk {
|
||||||
|
|
||||||
|
|
||||||
FramePacer::FramePacer( const DxvkOptions& options ) {
|
FramePacer::FramePacer( const DxvkOptions& options, uint64_t firstFrameId ) {
|
||||||
// we'll default to LOW_LATENCY in the draft-PR for now, for demonstration purposes,
|
// we'll default to LOW_LATENCY in the draft-PR for now, for demonstration purposes,
|
||||||
// highlighting the generally much better input lag and medium-term time consistency.
|
// highlighting the generally much better input lag and medium-term time consistency.
|
||||||
// although MAX_FRAME_LATENCY has advantages in many games and is likely the better default,
|
// although MAX_FRAME_LATENCY has advantages in many games and is likely the better default,
|
||||||
|
@ -40,11 +41,15 @@ namespace dxvk {
|
||||||
|
|
||||||
case FramePacerMode::LOW_LATENCY:
|
case FramePacerMode::LOW_LATENCY:
|
||||||
Logger::info( "Frame pace: low-latency" );
|
Logger::info( "Frame pace: low-latency" );
|
||||||
|
GpuFlushTracker::m_minPendingSubmissions = 1;
|
||||||
|
GpuFlushTracker::m_minChunkCount = 1;
|
||||||
m_mode = std::make_unique<LowLatencyMode>(mode, &m_latencyMarkersStorage, options);
|
m_mode = std::make_unique<LowLatencyMode>(mode, &m_latencyMarkersStorage, options);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FramePacerMode::MIN_LATENCY:
|
case FramePacerMode::MIN_LATENCY:
|
||||||
Logger::info( "Frame pace: min-latency" );
|
Logger::info( "Frame pace: min-latency" );
|
||||||
|
GpuFlushTracker::m_minPendingSubmissions = 1;
|
||||||
|
GpuFlushTracker::m_minChunkCount = 1;
|
||||||
m_mode = std::make_unique<MinLatencyMode>(mode, &m_latencyMarkersStorage);
|
m_mode = std::make_unique<MinLatencyMode>(mode, &m_latencyMarkersStorage);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -53,9 +58,20 @@ namespace dxvk {
|
||||||
gpuStart.store(0);
|
gpuStart.store(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// be consistent that every frame has a gpuReady event from the previous frame
|
// be consistent that every frame has a gpuReady event from finishing the previous frame
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(DXGI_MAX_SWAP_CHAIN_BUFFERS+1);
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers( firstFrameId );
|
||||||
m->gpuReady.push_back(high_resolution_clock::now());
|
m->gpuReady.push_back( high_resolution_clock::now() );
|
||||||
|
m_gpuStarts[ firstFrameId % m_gpuStarts.size() ] = gpuReadyBit;
|
||||||
|
|
||||||
|
LatencyMarkersTimeline& timeline = m_latencyMarkersStorage.m_timeline;
|
||||||
|
timeline.cpuFinished.store ( firstFrameId-1 );
|
||||||
|
timeline.gpuStart.store ( firstFrameId-1 );
|
||||||
|
timeline.gpuFinished.store ( firstFrameId-1 );
|
||||||
|
timeline.frameFinished.store ( firstFrameId-1 );
|
||||||
|
|
||||||
|
m_mode->signalGpuStart ( firstFrameId-1 );
|
||||||
|
m_mode->signalRenderFinished ( firstFrameId-1 );
|
||||||
|
m_mode->signalCsFinished ( firstFrameId );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ namespace dxvk {
|
||||||
using microseconds = std::chrono::microseconds;
|
using microseconds = std::chrono::microseconds;
|
||||||
public:
|
public:
|
||||||
|
|
||||||
FramePacer( const DxvkOptions& options );
|
FramePacer( const DxvkOptions& options, uint64_t firstFrameId );
|
||||||
~FramePacer();
|
~FramePacer();
|
||||||
|
|
||||||
void sleepAndBeginFrame(
|
void sleepAndBeginFrame(
|
||||||
|
@ -34,13 +34,13 @@ namespace dxvk {
|
||||||
// potentially wait some more if the cpu gets too much ahead
|
// potentially wait some more if the cpu gets too much ahead
|
||||||
m_mode->startFrame(frameId);
|
m_mode->startFrame(frameId);
|
||||||
m_latencyMarkersStorage.registerFrameStart(frameId);
|
m_latencyMarkersStorage.registerFrameStart(frameId);
|
||||||
m_gpuStarts[ frameId % m_gpuStarts.size() ].store(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void notifyGpuPresentEnd( uint64_t frameId ) override {
|
void notifyGpuPresentEnd( uint64_t frameId ) override {
|
||||||
// the frame has been displayed to the screen
|
// the frame has been displayed to the screen
|
||||||
m_latencyMarkersStorage.registerFrameEnd(frameId);
|
m_latencyMarkersStorage.registerFrameEnd(frameId);
|
||||||
m_mode->endFrame(frameId);
|
m_mode->endFrame(frameId);
|
||||||
|
m_gpuStarts[ (frameId-1) % m_gpuStarts.size() ].store(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void notifyCsRenderBegin( uint64_t frameId ) override {
|
void notifyCsRenderBegin( uint64_t frameId ) override {
|
||||||
|
@ -56,8 +56,8 @@ namespace dxvk {
|
||||||
m_mode->signalCsFinished( frameId );
|
m_mode->signalCsFinished( frameId );
|
||||||
}
|
}
|
||||||
|
|
||||||
void notifySubmit() override {
|
void notifySubmit( uint64_t frameId ) override {
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastSubmitFrameId+1);
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||||
m->gpuSubmit.push_back(high_resolution_clock::now());
|
m->gpuSubmit.push_back(high_resolution_clock::now());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,7 +65,6 @@ namespace dxvk {
|
||||||
// dx to vk translation is finished
|
// dx to vk translation is finished
|
||||||
if (frameId != 0) {
|
if (frameId != 0) {
|
||||||
auto now = high_resolution_clock::now();
|
auto now = high_resolution_clock::now();
|
||||||
m_lastSubmitFrameId = frameId;
|
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||||
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
|
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
|
||||||
m->gpuSubmit.push_back(now);
|
m->gpuSubmit.push_back(now);
|
||||||
|
@ -77,7 +76,6 @@ namespace dxvk {
|
||||||
}
|
}
|
||||||
|
|
||||||
void notifyQueueSubmit( uint64_t frameId ) override {
|
void notifyQueueSubmit( uint64_t frameId ) override {
|
||||||
assert( frameId == m_lastQueueSubmitFrameId + 1 );
|
|
||||||
auto now = high_resolution_clock::now();
|
auto now = high_resolution_clock::now();
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||||
m->gpuQueueSubmit.push_back(now);
|
m->gpuQueueSubmit.push_back(now);
|
||||||
|
@ -87,7 +85,6 @@ namespace dxvk {
|
||||||
void notifyQueuePresentBegin( uint64_t frameId ) override {
|
void notifyQueuePresentBegin( uint64_t frameId ) override {
|
||||||
if (frameId != 0) {
|
if (frameId != 0) {
|
||||||
auto now = high_resolution_clock::now();
|
auto now = high_resolution_clock::now();
|
||||||
m_lastQueueSubmitFrameId = frameId;
|
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||||
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
|
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
|
||||||
m->gpuQueueSubmit.push_back(now);
|
m->gpuQueueSubmit.push_back(now);
|
||||||
|
@ -96,22 +93,15 @@ namespace dxvk {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void notifyGpuExecutionBegin( uint64_t frameId ) override {
|
|
||||||
assert( frameId == m_lastFinishedFrameId+1 );
|
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
|
||||||
gpuExecutionCheckGpuStart(frameId, m, high_resolution_clock::now());
|
|
||||||
}
|
|
||||||
|
|
||||||
void notifyGpuExecutionEnd( uint64_t frameId ) override {
|
void notifyGpuExecutionEnd( uint64_t frameId ) override {
|
||||||
auto now = high_resolution_clock::now();
|
auto now = high_resolution_clock::now();
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||||
m->gpuReady.push_back(now);
|
m->gpuReady.push_back(now);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void notifyGpuPresentBegin( uint64_t frameId ) override {
|
virtual void notifyGpuPresentBegin( uint64_t frameId ) override {
|
||||||
// we get frameId == 0 for repeated presents (SyncInterval)
|
// we get frameId == 0 for repeated presents (SyncInterval)
|
||||||
if (frameId != 0) {
|
if (frameId != 0) {
|
||||||
m_lastFinishedFrameId = frameId;
|
|
||||||
auto now = high_resolution_clock::now();
|
auto now = high_resolution_clock::now();
|
||||||
|
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||||
|
@ -121,7 +111,7 @@ namespace dxvk {
|
||||||
next->gpuReady.clear();
|
next->gpuReady.clear();
|
||||||
next->gpuReady.push_back(now);
|
next->gpuReady.push_back(now);
|
||||||
|
|
||||||
gpuExecutionCheckGpuStart(frameId, m, now);
|
gpuExecutionCheckGpuStart(frameId+1, next, now);
|
||||||
|
|
||||||
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
|
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
|
||||||
m_mode->finishRender(frameId);
|
m_mode->finishRender(frameId);
|
||||||
|
@ -147,9 +137,10 @@ namespace dxvk {
|
||||||
// not implemented methods
|
// not implemented methods
|
||||||
|
|
||||||
|
|
||||||
void notifyCpuPresentBegin( uint64_t frameId) override { }
|
void notifyCpuPresentBegin( uint64_t frameId ) override { }
|
||||||
void notifyCpuPresentEnd( uint64_t frameId ) override { }
|
void notifyCpuPresentEnd( uint64_t frameId ) override { }
|
||||||
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
|
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
|
||||||
|
void notifyGpuExecutionBegin( uint64_t frameId ) override { }
|
||||||
void discardTimings() override { }
|
void discardTimings() override { }
|
||||||
DxvkLatencyStats getStatistics( uint64_t frameId ) override
|
DxvkLatencyStats getStatistics( uint64_t frameId ) override
|
||||||
{ return DxvkLatencyStats(); }
|
{ return DxvkLatencyStats(); }
|
||||||
|
@ -178,11 +169,7 @@ namespace dxvk {
|
||||||
|
|
||||||
std::unique_ptr<FramePacerMode> m_mode;
|
std::unique_ptr<FramePacerMode> m_mode;
|
||||||
|
|
||||||
uint64_t m_lastSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
std::array< std::atomic< uint16_t >, 8 > m_gpuStarts = { };
|
||||||
uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
|
||||||
uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
|
||||||
|
|
||||||
std::array< std::atomic< uint16_t >, 16 > m_gpuStarts = { };
|
|
||||||
static constexpr uint16_t queueSubmitBit = 1;
|
static constexpr uint16_t queueSubmitBit = 1;
|
||||||
static constexpr uint16_t gpuReadyBit = 2;
|
static constexpr uint16_t gpuReadyBit = 2;
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ namespace dxvk {
|
||||||
|
|
||||||
sync::Fence m_fenceGpuStart = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
|
sync::Fence m_fenceGpuStart = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
|
||||||
sync::Fence m_fenceGpuFinished = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
|
sync::Fence m_fenceGpuFinished = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
|
||||||
sync::Fence m_fenceCsFinished = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS+50) };
|
sync::Fence m_fenceCsFinished = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -83,11 +83,19 @@ namespace dxvk {
|
||||||
// and calculate backwards when we want to start this frame
|
// and calculate backwards when we want to start this frame
|
||||||
|
|
||||||
const SyncProps props = getSyncPrediction();
|
const SyncProps props = getSyncPrediction();
|
||||||
int32_t gpuReadyPrediction = duration_cast<microseconds>(
|
int32_t lastFrameStart = duration_cast<microseconds>( m->start - now ).count();
|
||||||
m->start + microseconds(m->gpuStart+getGpuStartToFinishPrediction()) - now).count();
|
int32_t gpuReadyPrediction = lastFrameStart
|
||||||
|
+ std::max( props.cpuUntilGpuStart, m->gpuStart )
|
||||||
|
+ props.optimizedGpuTime;
|
||||||
|
|
||||||
int32_t targetGpuSync = gpuReadyPrediction + props.gpuSync;
|
int32_t targetGpuSync = gpuReadyPrediction + props.gpuSync;
|
||||||
int32_t delay = targetGpuSync - props.cpuUntilGpuSync + m_lowLatencyOffset;
|
int32_t gpuDelay = targetGpuSync - props.cpuUntilGpuSync;
|
||||||
|
|
||||||
|
int32_t cpuReadyPrediction = duration_cast<microseconds>(
|
||||||
|
m->start + microseconds(props.csFinished) - now).count();
|
||||||
|
int32_t cpuDelay = cpuReadyPrediction - props.csStart;
|
||||||
|
|
||||||
|
int32_t delay = std::max(gpuDelay, cpuDelay) + m_lowLatencyOffset;
|
||||||
|
|
||||||
m_lastStart = sleepFor( now, delay );
|
m_lastStart = sleepFor( now, delay );
|
||||||
|
|
||||||
|
@ -112,9 +120,7 @@ namespace dxvk {
|
||||||
// where gpuSubmit[i] <= gpuRun[i] for all i
|
// where gpuSubmit[i] <= gpuRun[i] for all i
|
||||||
|
|
||||||
std::vector<int32_t>& gpuRun = m_tempGpuRun;
|
std::vector<int32_t>& gpuRun = m_tempGpuRun;
|
||||||
std::vector<int32_t>& gpuRunDurations = m_tempGpuRunDurations;
|
|
||||||
gpuRun.clear();
|
gpuRun.clear();
|
||||||
gpuRunDurations.clear();
|
|
||||||
int32_t optimizedGpuTime = 0;
|
int32_t optimizedGpuTime = 0;
|
||||||
gpuRun.push_back(optimizedGpuTime);
|
gpuRun.push_back(optimizedGpuTime);
|
||||||
|
|
||||||
|
@ -123,7 +129,6 @@ namespace dxvk {
|
||||||
int32_t duration = duration_cast<microseconds>( m->gpuReady[i+1] - _gpuRun ).count();
|
int32_t duration = duration_cast<microseconds>( m->gpuReady[i+1] - _gpuRun ).count();
|
||||||
optimizedGpuTime += duration;
|
optimizedGpuTime += duration;
|
||||||
gpuRun.push_back(optimizedGpuTime);
|
gpuRun.push_back(optimizedGpuTime);
|
||||||
gpuRunDurations.push_back(duration);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t alignment = duration_cast<microseconds>( m->gpuSubmit[numLoop-1] - m->gpuSubmit[0] ).count()
|
int32_t alignment = duration_cast<microseconds>( m->gpuSubmit[numLoop-1] - m->gpuSubmit[0] ).count()
|
||||||
|
@ -142,7 +147,10 @@ namespace dxvk {
|
||||||
SyncProps& props = m_props[frameId % m_props.size()];
|
SyncProps& props = m_props[frameId % m_props.size()];
|
||||||
props.gpuSync = gpuRun[numLoop-1];
|
props.gpuSync = gpuRun[numLoop-1];
|
||||||
props.cpuUntilGpuSync = offset + duration_cast<microseconds>( m->gpuSubmit[numLoop-1] - m->start ).count();
|
props.cpuUntilGpuSync = offset + duration_cast<microseconds>( m->gpuSubmit[numLoop-1] - m->start ).count();
|
||||||
|
props.cpuUntilGpuStart = props.cpuUntilGpuSync - props.gpuSync;
|
||||||
props.optimizedGpuTime = optimizedGpuTime;
|
props.optimizedGpuTime = optimizedGpuTime;
|
||||||
|
props.csStart = m->csStart;
|
||||||
|
props.csFinished = m->csFinished;
|
||||||
props.isOutlier = isOutlier(frameId);
|
props.isOutlier = isOutlier(frameId);
|
||||||
|
|
||||||
m_propsFinished.store( frameId );
|
m_propsFinished.store( frameId );
|
||||||
|
@ -156,7 +164,8 @@ namespace dxvk {
|
||||||
int32_t frametime = std::chrono::duration_cast<microseconds>( t - m_lastStart ).count();
|
int32_t frametime = std::chrono::duration_cast<microseconds>( t - m_lastStart ).count();
|
||||||
int32_t frametimeDiff = std::max( 0, m_fpsLimitFrametime.load() - frametime );
|
int32_t frametimeDiff = std::max( 0, m_fpsLimitFrametime.load() - frametime );
|
||||||
delay = std::max( delay, frametimeDiff );
|
delay = std::max( delay, frametimeDiff );
|
||||||
delay = std::max( 0, std::min( delay, 20000 ) );
|
int32_t maxDelay = std::max( m_fpsLimitFrametime.load(), 20000 );
|
||||||
|
delay = std::max( 0, std::min( delay, maxDelay ) );
|
||||||
|
|
||||||
Sleep::TimePoint nextStart = t + microseconds(delay);
|
Sleep::TimePoint nextStart = t + microseconds(delay);
|
||||||
Sleep::sleepUntil( t, nextStart );
|
Sleep::sleepUntil( t, nextStart );
|
||||||
|
@ -169,16 +178,24 @@ namespace dxvk {
|
||||||
|
|
||||||
struct SyncProps {
|
struct SyncProps {
|
||||||
int32_t optimizedGpuTime; // gpu executing packed submits in one go
|
int32_t optimizedGpuTime; // gpu executing packed submits in one go
|
||||||
int32_t gpuSync; // us after gpuStart
|
int32_t gpuSync; // gpuStart to this sync point, in microseconds
|
||||||
int32_t cpuUntilGpuSync;
|
int32_t cpuUntilGpuSync;
|
||||||
|
int32_t cpuUntilGpuStart;
|
||||||
|
int32_t csStart;
|
||||||
|
int32_t csFinished;
|
||||||
bool isOutlier;
|
bool isOutlier;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
SyncProps getSyncPrediction() {
|
SyncProps getSyncPrediction() {
|
||||||
// in the future we might use more samples to get a prediction
|
// In the future we might use more samples to get a prediction.
|
||||||
// however, simple averaging gives a slightly artificial mouse input
|
// Possibly this will be optional, as until now, basing it on
|
||||||
// more advanced methods will be investigated
|
// just the previous frame gave us the best mouse input feel.
|
||||||
|
// Simple averaging or median filtering is surely not the way
|
||||||
|
// to go, but more advanced methods will be investigated.
|
||||||
|
// The best place to filter should be on the Present() timeline,
|
||||||
|
// so not sure if we really will do any filtering here other
|
||||||
|
// than outlier removal, which will dampen stuttering effects.
|
||||||
SyncProps res = {};
|
SyncProps res = {};
|
||||||
uint64_t id = m_propsFinished;
|
uint64_t id = m_propsFinished;
|
||||||
if (id < DXGI_MAX_SWAP_CHAIN_BUFFERS+7)
|
if (id < DXGI_MAX_SWAP_CHAIN_BUFFERS+7)
|
||||||
|
@ -196,41 +213,17 @@ namespace dxvk {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
int32_t getGpuStartToFinishPrediction() {
|
|
||||||
uint64_t id = m_propsFinished;
|
|
||||||
if (id < DXGI_MAX_SWAP_CHAIN_BUFFERS+7)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
for (size_t i=0; i<7; ++i) {
|
|
||||||
const SyncProps& props = m_props[ (id-i) % m_props.size() ];
|
|
||||||
if (!props.isOutlier) {
|
|
||||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(id-i);
|
|
||||||
if (m->gpuReady.empty() || m->gpuSubmit.empty())
|
|
||||||
return m->gpuFinished - m->gpuStart;
|
|
||||||
|
|
||||||
time_point t = std::max( m->gpuReady[0], m->gpuSubmit[0] );
|
|
||||||
return std::chrono::duration_cast<microseconds>( t - m->start ).count()
|
|
||||||
+ props.optimizedGpuTime
|
|
||||||
- m->gpuStart;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(id);
|
|
||||||
return m->gpuFinished - m->gpuStart;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
bool isOutlier( uint64_t frameId ) {
|
bool isOutlier( uint64_t frameId ) {
|
||||||
constexpr size_t numLoop = 7;
|
constexpr size_t numLoop = 7;
|
||||||
int32_t totalCpuTime = 0;
|
int32_t totalCpuTime = 0;
|
||||||
for (size_t i=0; i<numLoop; ++i) {
|
for (size_t i=1; i<numLoop; ++i) {
|
||||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId-i);
|
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId-i);
|
||||||
totalCpuTime += m->cpuFinished;
|
totalCpuTime += m->cpuFinished;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t avgCpuTime = totalCpuTime / numLoop;
|
int32_t avgCpuTime = totalCpuTime / (numLoop-1);
|
||||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId);
|
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId);
|
||||||
if (m->cpuFinished > 1.7*avgCpuTime || m->gpuSubmit.empty() || m->gpuReady.size() != (m->gpuSubmit.size()+1) )
|
if (m->cpuFinished > 1.3*avgCpuTime || m->gpuSubmit.empty() || m->gpuReady.size() != (m->gpuSubmit.size()+1) )
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -248,7 +241,6 @@ namespace dxvk {
|
||||||
std::atomic<uint64_t> m_propsFinished = { 0 };
|
std::atomic<uint64_t> m_propsFinished = { 0 };
|
||||||
|
|
||||||
std::vector<int32_t> m_tempGpuRun;
|
std::vector<int32_t> m_tempGpuRun;
|
||||||
std::vector<int32_t> m_tempGpuRunDurations;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,8 @@ namespace dxvk {
|
||||||
now - m_lastStart ).count();
|
now - m_lastStart ).count();
|
||||||
int32_t frametimeDiff = std::max( 0, m_fpsLimitFrametime.load() - frametime );
|
int32_t frametimeDiff = std::max( 0, m_fpsLimitFrametime.load() - frametime );
|
||||||
int32_t delay = std::max( 0, frametimeDiff );
|
int32_t delay = std::max( 0, frametimeDiff );
|
||||||
delay = std::min( delay, 20000 );
|
int32_t maxDelay = std::max( m_fpsLimitFrametime.load(), 20000 );
|
||||||
|
delay = std::min( delay, maxDelay );
|
||||||
|
|
||||||
Sleep::TimePoint nextStart = now + std::chrono::microseconds(delay);
|
Sleep::TimePoint nextStart = now + std::chrono::microseconds(delay);
|
||||||
Sleep::sleepUntil( now, nextStart );
|
Sleep::sleepUntil( now, nextStart );
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "com_include.h"
|
#include "com_include.h"
|
||||||
|
@ -9,7 +11,7 @@ namespace dxvk {
|
||||||
/**
|
/**
|
||||||
* \brief COM private data entry type
|
* \brief COM private data entry type
|
||||||
*/
|
*/
|
||||||
enum ComPrivateDataType {
|
enum class ComPrivateDataType : uint32_t {
|
||||||
None,
|
None,
|
||||||
Data,
|
Data,
|
||||||
Iface,
|
Iface,
|
||||||
|
|
|
@ -291,12 +291,11 @@ namespace dxvk {
|
||||||
/* Final Fantasy XV: VXAO does thousands of *
|
/* Final Fantasy XV: VXAO does thousands of *
|
||||||
* draw calls with the same UAV bound */
|
* draw calls with the same UAV bound */
|
||||||
{ R"(\\ffxv_s\.exe$)", {{
|
{ R"(\\ffxv_s\.exe$)", {{
|
||||||
{ "d3d11.ignoreGraphicsBarriers", "True" },
|
{ "d3d11.relaxedGraphicsBarriers", "True" },
|
||||||
}} },
|
}} },
|
||||||
/* God of War - relies on NVAPI/AMDAGS for *
|
/* God of War - relies on NVAPI/AMDAGS for *
|
||||||
* barrier stuff, needs nvapi for DLSS */
|
* barrier stuff, needs nvapi for DLSS */
|
||||||
{ R"(\\GoW\.exe$)", {{
|
{ R"(\\GoW\.exe$)", {{
|
||||||
{ "d3d11.ignoreGraphicsBarriers", "True" },
|
|
||||||
{ "d3d11.relaxedBarriers", "True" },
|
{ "d3d11.relaxedBarriers", "True" },
|
||||||
{ "dxgi.hideNvidiaGpu", "False" },
|
{ "dxgi.hideNvidiaGpu", "False" },
|
||||||
{ "dxgi.maxFrameLatency", "1" },
|
{ "dxgi.maxFrameLatency", "1" },
|
||||||
|
@ -334,7 +333,7 @@ namespace dxvk {
|
||||||
* presumably for culling, which doesn't play *
|
* presumably for culling, which doesn't play *
|
||||||
* nicely with D3D11 without vendor libraries */
|
* nicely with D3D11 without vendor libraries */
|
||||||
{ R"(\\Stray-Win64-Shipping\.exe$)", {{
|
{ R"(\\Stray-Win64-Shipping\.exe$)", {{
|
||||||
{ "d3d11.ignoreGraphicsBarriers", "True" },
|
{ "d3d11.relaxedGraphicsBarriers", "True" },
|
||||||
}} },
|
}} },
|
||||||
/* Metal Gear Solid V: Ground Zeroes *
|
/* Metal Gear Solid V: Ground Zeroes *
|
||||||
* Texture quality can break at high vram */
|
* Texture quality can break at high vram */
|
||||||
|
@ -433,7 +432,7 @@ namespace dxvk {
|
||||||
* and assumes that AMD GPUs do not expose *
|
* and assumes that AMD GPUs do not expose *
|
||||||
* native command lists for AGS usage */
|
* native command lists for AGS usage */
|
||||||
{ R"(\\granblue_fantasy_relink\.exe$)", {{
|
{ R"(\\granblue_fantasy_relink\.exe$)", {{
|
||||||
{ "d3d11.ignoreGraphicsBarriers", "True" },
|
{ "d3d11.relaxedGraphicsBarriers", "True" },
|
||||||
{ "d3d11.exposeDriverCommandLists", "False" },
|
{ "d3d11.exposeDriverCommandLists", "False" },
|
||||||
{ "dxgi.hideNvidiaGpu", "False" },
|
{ "dxgi.hideNvidiaGpu", "False" },
|
||||||
}} },
|
}} },
|
||||||
|
|
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
namespace dxvk {
|
namespace dxvk {
|
||||||
|
|
||||||
|
std::atomic<uint32_t> GpuFlushTracker::m_minPendingSubmissions = { 2 };
|
||||||
|
std::atomic<uint32_t> GpuFlushTracker::m_minChunkCount = { 3 };
|
||||||
|
std::atomic<uint32_t> GpuFlushTracker::m_maxChunkCount = { 20 };
|
||||||
|
|
||||||
GpuFlushTracker::GpuFlushTracker(GpuFlushType maxType)
|
GpuFlushTracker::GpuFlushTracker(GpuFlushType maxType)
|
||||||
: m_maxType(maxType) {
|
: m_maxType(maxType) {
|
||||||
|
|
||||||
|
@ -11,10 +15,6 @@ namespace dxvk {
|
||||||
GpuFlushType flushType,
|
GpuFlushType flushType,
|
||||||
uint64_t chunkId,
|
uint64_t chunkId,
|
||||||
uint32_t lastCompleteSubmissionId) {
|
uint32_t lastCompleteSubmissionId) {
|
||||||
constexpr uint32_t minPendingSubmissions = 2;
|
|
||||||
|
|
||||||
constexpr uint32_t minChunkCount = 3u;
|
|
||||||
constexpr uint32_t maxChunkCount = 20u;
|
|
||||||
|
|
||||||
// Do not flush if there is nothing to flush
|
// Do not flush if there is nothing to flush
|
||||||
uint32_t chunkCount = uint32_t(chunkId - m_lastFlushChunkId);
|
uint32_t chunkCount = uint32_t(chunkId - m_lastFlushChunkId);
|
||||||
|
@ -42,14 +42,14 @@ namespace dxvk {
|
||||||
|
|
||||||
case GpuFlushType::ImplicitStrongHint: {
|
case GpuFlushType::ImplicitStrongHint: {
|
||||||
// Flush aggressively with a strong hint to reduce readback latency.
|
// Flush aggressively with a strong hint to reduce readback latency.
|
||||||
return chunkCount >= minChunkCount;
|
return chunkCount >= m_minChunkCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GpuFlushType::ImplicitMediumHint:
|
case GpuFlushType::ImplicitMediumHint:
|
||||||
case GpuFlushType::ImplicitWeakHint: {
|
case GpuFlushType::ImplicitWeakHint: {
|
||||||
// Aim for a higher number of chunks per submission with
|
// Aim for a higher number of chunks per submission with
|
||||||
// a weak hint in order to avoid submitting too often.
|
// a weak hint in order to avoid submitting too often.
|
||||||
if (chunkCount < 2 * minChunkCount)
|
if (chunkCount < 2 * m_minChunkCount)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Actual heuristic is shared with synchronization commands
|
// Actual heuristic is shared with synchronization commands
|
||||||
|
@ -60,13 +60,13 @@ namespace dxvk {
|
||||||
// required if the application is spinning on a query or resource.
|
// required if the application is spinning on a query or resource.
|
||||||
uint32_t pendingSubmissions = uint32_t(m_lastFlushSubmissionId - lastCompleteSubmissionId);
|
uint32_t pendingSubmissions = uint32_t(m_lastFlushSubmissionId - lastCompleteSubmissionId);
|
||||||
|
|
||||||
if (pendingSubmissions < minPendingSubmissions)
|
if (pendingSubmissions < m_minPendingSubmissions)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// Use the number of pending submissions to decide whether to flush. Other
|
// Use the number of pending submissions to decide whether to flush. Other
|
||||||
// than ignoring the minimum chunk count condition, we should treat this
|
// than ignoring the minimum chunk count condition, we should treat this
|
||||||
// the same as weak hints to avoid unnecessary synchronization.
|
// the same as weak hints to avoid unnecessary synchronization.
|
||||||
uint32_t threshold = std::min(maxChunkCount, pendingSubmissions * minChunkCount);
|
uint32_t threshold = std::min(m_maxChunkCount.load(), pendingSubmissions * m_minChunkCount.load());
|
||||||
return chunkCount >= threshold;
|
return chunkCount >= threshold;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
namespace dxvk {
|
namespace dxvk {
|
||||||
|
|
||||||
|
@ -64,6 +65,10 @@ namespace dxvk {
|
||||||
uint64_t chunkId,
|
uint64_t chunkId,
|
||||||
uint64_t submissionId);
|
uint64_t submissionId);
|
||||||
|
|
||||||
|
static std::atomic<uint32_t> m_minPendingSubmissions;
|
||||||
|
static std::atomic<uint32_t> m_minChunkCount;
|
||||||
|
static std::atomic<uint32_t> m_maxChunkCount;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
GpuFlushType m_maxType = GpuFlushType::ImplicitWeakHint;
|
GpuFlushType m_maxType = GpuFlushType::ImplicitWeakHint;
|
||||||
|
|
|
@ -57,6 +57,8 @@ namespace dxvk {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_isActive.store(false);
|
||||||
|
|
||||||
std::unique_lock<dxvk::mutex> lock(m_mutex);
|
std::unique_lock<dxvk::mutex> lock(m_mutex);
|
||||||
auto interval = m_targetInterval;
|
auto interval = m_targetInterval;
|
||||||
auto latency = m_maxLatency;
|
auto latency = m_maxLatency;
|
||||||
|
@ -79,7 +81,6 @@ namespace dxvk {
|
||||||
// that can be written by setTargetFrameRate
|
// that can be written by setTargetFrameRate
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
m_isActive.store(false);
|
|
||||||
if (t1 < m_nextFrame) {
|
if (t1 < m_nextFrame) {
|
||||||
m_isActive.store(true);
|
m_isActive.store(true);
|
||||||
Sleep::sleepUntil(t1, m_nextFrame);
|
Sleep::sleepUntil(t1, m_nextFrame);
|
||||||
|
|
|
@ -40,6 +40,11 @@ namespace dxvk::vk {
|
||||||
= VK_ACCESS_HOST_READ_BIT
|
= VK_ACCESS_HOST_READ_BIT
|
||||||
| VK_ACCESS_HOST_WRITE_BIT;
|
| VK_ACCESS_HOST_WRITE_BIT;
|
||||||
|
|
||||||
|
constexpr static VkAccessFlags AccessGfxSideEffectMask
|
||||||
|
= VK_ACCESS_SHADER_WRITE_BIT
|
||||||
|
| VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
|
||||||
|
| VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
|
||||||
|
|
||||||
constexpr static VkPipelineStageFlags StageDeviceMask
|
constexpr static VkPipelineStageFlags StageDeviceMask
|
||||||
= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
|
= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
|
||||||
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
||||||
|
|
Loading…
Add table
Reference in a new issue