mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-06 20:58:37 +01:00
Merge branch 'doitsujin:master' into low-latency-framepacing
This commit is contained in:
commit
b59a1b9083
41 changed files with 1345 additions and 675 deletions
|
@ -1009,10 +1009,22 @@ namespace dxvk {
|
|||
if (!ctrBuf.defined())
|
||||
return;
|
||||
|
||||
EmitCs([=] (DxvkContext* ctx) {
|
||||
ctx->drawIndirectXfb(ctrBuf,
|
||||
// We bind the SO counter as an indirect count buffer,
|
||||
// so reset any tracking we may have been doing here.
|
||||
m_state.id.reset();
|
||||
|
||||
EmitCs([=] (DxvkContext* ctx) mutable {
|
||||
ctx->bindDrawBuffers(DxvkBufferSlice(),
|
||||
Forwarder::move(ctrBuf));
|
||||
|
||||
ctx->drawIndirectXfb(0u,
|
||||
vtxBuf.buffer()->getXfbVertexStride(),
|
||||
vtxBuf.offset());
|
||||
|
||||
// Reset draw buffer right away so we don't
|
||||
// keep the SO counter alive indefinitely
|
||||
ctx->bindDrawBuffers(DxvkBufferSlice(),
|
||||
DxvkBufferSlice());
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1109,7 +1121,7 @@ namespace dxvk {
|
|||
} else {
|
||||
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
|
||||
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
|
||||
ctx->drawIndexedIndirect(data->offset, data->count, data->stride);
|
||||
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
|
||||
});
|
||||
|
||||
cmdData->type = D3D11CmdType::DrawIndirectIndexed;
|
||||
|
@ -1144,7 +1156,7 @@ namespace dxvk {
|
|||
} else {
|
||||
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
|
||||
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
|
||||
ctx->drawIndirect(data->offset, data->count, data->stride);
|
||||
ctx->drawIndirect(data->offset, data->count, data->stride, true);
|
||||
});
|
||||
|
||||
cmdData->type = D3D11CmdType::DrawIndirect;
|
||||
|
@ -4632,10 +4644,6 @@ namespace dxvk {
|
|||
ApplyRasterizerSampleCount();
|
||||
ApplyViewportState();
|
||||
|
||||
BindDrawBuffers(
|
||||
m_state.id.argBuffer.ptr(),
|
||||
m_state.id.cntBuffer.ptr());
|
||||
|
||||
BindIndexBuffer(
|
||||
m_state.ia.indexBuffer.buffer.ptr(),
|
||||
m_state.ia.indexBuffer.offset,
|
||||
|
@ -4674,6 +4682,11 @@ namespace dxvk {
|
|||
RestoreSamplers<DxbcProgramType::GeometryShader>();
|
||||
RestoreSamplers<DxbcProgramType::PixelShader>();
|
||||
RestoreSamplers<DxbcProgramType::ComputeShader>();
|
||||
|
||||
// Draw buffer bindings aren't persistent at the API level, and
|
||||
// we can't meaningfully track them. Just reset this state here
|
||||
// and reapply on the next indirect draw.
|
||||
SetDrawBuffers(nullptr, nullptr);
|
||||
}
|
||||
|
||||
|
||||
|
@ -5000,10 +5013,13 @@ namespace dxvk {
|
|||
auto argBuffer = static_cast<D3D11Buffer*>(pBufferForArgs);
|
||||
auto cntBuffer = static_cast<D3D11Buffer*>(pBufferForCount);
|
||||
|
||||
if (m_state.id.argBuffer != argBuffer
|
||||
|| m_state.id.cntBuffer != cntBuffer) {
|
||||
m_state.id.argBuffer = argBuffer;
|
||||
m_state.id.cntBuffer = cntBuffer;
|
||||
auto argBufferCookie = argBuffer ? argBuffer->GetCookie() : 0u;
|
||||
auto cntBufferCookie = cntBuffer ? cntBuffer->GetCookie() : 0u;
|
||||
|
||||
if (m_state.id.argBufferCookie != argBufferCookie
|
||||
|| m_state.id.cntBufferCookie != cntBufferCookie) {
|
||||
m_state.id.argBufferCookie = argBufferCookie;
|
||||
m_state.id.cntBufferCookie = cntBufferCookie;
|
||||
|
||||
BindDrawBuffers(argBuffer, cntBuffer);
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ namespace dxvk {
|
|||
cOffset = ByteOffsetForArgs,
|
||||
cStride = ByteStrideForArgs
|
||||
] (DxvkContext* ctx) {
|
||||
ctx->drawIndirect(cOffset, cCount, cStride);
|
||||
ctx->drawIndirect(cOffset, cCount, cStride, false);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,7 @@ namespace dxvk {
|
|||
cOffset = ByteOffsetForArgs,
|
||||
cStride = ByteStrideForArgs
|
||||
] (DxvkContext* ctx) {
|
||||
ctx->drawIndexedIndirect(cOffset, cCount, cStride);
|
||||
ctx->drawIndexedIndirect(cOffset, cCount, cStride, false);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -146,11 +146,10 @@ namespace dxvk {
|
|||
D3D11Device* parent = static_cast<D3D11Device*>(m_ctx->GetParentInterface());
|
||||
DxvkBarrierControlFlags flags = parent->GetOptionsBarrierControlFlags();
|
||||
|
||||
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE)
|
||||
flags.set(DxvkBarrierControl::IgnoreWriteAfterWrite);
|
||||
|
||||
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV)
|
||||
flags.set(DxvkBarrierControl::IgnoreGraphicsBarriers);
|
||||
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE) {
|
||||
flags.set(DxvkBarrierControl::ComputeAllowReadWriteOverlap,
|
||||
DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
|
||||
}
|
||||
|
||||
m_ctx->EmitCs([cFlags = flags] (DxvkContext* ctx) {
|
||||
ctx->setBarrierControl(cFlags);
|
||||
|
|
|
@ -865,6 +865,11 @@ namespace dxvk {
|
|||
Rc<DxvkLatencyTracker> LatencyTracker) {
|
||||
D3D10DeviceLock lock = LockContext();
|
||||
|
||||
// Don't keep draw buffers alive indefinitely. This cannot be
|
||||
// done in ExecuteFlush because command recording itself might
|
||||
// flush, so no state changes are allowed to happen there.
|
||||
SetDrawBuffers(nullptr, nullptr);
|
||||
|
||||
EmitCs<false>([
|
||||
cTracker = std::move(LatencyTracker)
|
||||
] (DxvkContext* ctx) {
|
||||
|
|
|
@ -232,12 +232,12 @@ namespace dxvk {
|
|||
* argument and draw count buffer.
|
||||
*/
|
||||
struct D3D11ContextStateID {
|
||||
Com<D3D11Buffer, false> argBuffer = nullptr;
|
||||
Com<D3D11Buffer, false> cntBuffer = nullptr;
|
||||
uint64_t argBufferCookie = 0u;
|
||||
uint64_t cntBufferCookie = 0u;
|
||||
|
||||
void reset() {
|
||||
argBuffer = nullptr;
|
||||
cntBuffer = nullptr;
|
||||
argBufferCookie = 0u;
|
||||
cntBufferCookie = 0u;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -347,4 +347,4 @@ namespace dxvk {
|
|||
uint32_t soCount;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -472,13 +472,13 @@ namespace dxvk {
|
|||
const Rc<DxvkAdapter>& Adapter);
|
||||
|
||||
DxvkBarrierControlFlags GetOptionsBarrierControlFlags() {
|
||||
DxvkBarrierControlFlags barrierControl;
|
||||
DxvkBarrierControlFlags barrierControl = 0u;
|
||||
|
||||
if (m_d3d11Options.relaxedBarriers)
|
||||
barrierControl.set(DxvkBarrierControl::IgnoreWriteAfterWrite);
|
||||
barrierControl.set(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap);
|
||||
|
||||
if (m_d3d11Options.ignoreGraphicsBarriers)
|
||||
barrierControl.set(DxvkBarrierControl::IgnoreGraphicsBarriers);
|
||||
if (m_d3d11Options.relaxedBarriers || m_d3d11Options.relaxedGraphicsBarriers)
|
||||
barrierControl.set(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
|
||||
|
||||
return barrierControl;
|
||||
}
|
||||
|
|
|
@ -24,7 +24,9 @@ enum D3D11_VK_EXTENSION : uint32_t {
|
|||
*/
|
||||
enum D3D11_VK_BARRIER_CONTROL : uint32_t {
|
||||
D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE = 1 << 0,
|
||||
D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1,
|
||||
|
||||
// Removed:
|
||||
// D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ namespace dxvk {
|
|||
this->zeroInitWorkgroupMemory = config.getOption<bool>("d3d11.zeroInitWorkgroupMemory", false);
|
||||
this->forceVolatileTgsmAccess = config.getOption<bool>("d3d11.forceVolatileTgsmAccess", false);
|
||||
this->relaxedBarriers = config.getOption<bool>("d3d11.relaxedBarriers", false);
|
||||
this->ignoreGraphicsBarriers = config.getOption<bool>("d3d11.ignoreGraphicsBarriers", false);
|
||||
this->relaxedGraphicsBarriers = config.getOption<bool>("d3d11.relaxedGraphicsBarriers", false);
|
||||
this->maxTessFactor = config.getOption<int32_t>("d3d11.maxTessFactor", 0);
|
||||
this->samplerAnisotropy = config.getOption<int32_t>("d3d11.samplerAnisotropy", -1);
|
||||
this->samplerLodBias = config.getOption<float>("d3d11.samplerLodBias", 0.0f);
|
||||
|
@ -61,4 +61,4 @@ namespace dxvk {
|
|||
this->shaderDumpPath = env::getEnvVar("DXVK_SHADER_DUMP_PATH");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ namespace dxvk {
|
|||
///
|
||||
/// May improve performance in some games,
|
||||
/// but might also cause rendering issues.
|
||||
bool ignoreGraphicsBarriers = false;
|
||||
bool relaxedGraphicsBarriers = false;
|
||||
|
||||
/// Maximum tessellation factor.
|
||||
///
|
||||
|
@ -114,4 +114,4 @@ namespace dxvk {
|
|||
std::string shaderDumpPath;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,16 +31,28 @@ namespace dxvk {
|
|||
}
|
||||
|
||||
ULONG STDMETHODCALLTYPE Release() {
|
||||
// ignore Release calls on objects with 0 refCount
|
||||
if(unlikely(!this->m_refCount))
|
||||
return this->m_refCount;
|
||||
uint32_t oldRefCount, refCount;
|
||||
|
||||
do {
|
||||
oldRefCount = this->m_refCount.load(std::memory_order_acquire);
|
||||
|
||||
// clamp value to 0 to prevent underruns
|
||||
if (unlikely(!oldRefCount))
|
||||
return 0;
|
||||
|
||||
refCount = oldRefCount - 1;
|
||||
|
||||
} while (!this->m_refCount.compare_exchange_weak(oldRefCount,
|
||||
refCount,
|
||||
std::memory_order_release,
|
||||
std::memory_order_acquire));
|
||||
|
||||
uint32_t refCount = --this->m_refCount;
|
||||
if (unlikely(!refCount)) {
|
||||
auto* pDevice = GetDevice();
|
||||
this->ReleasePrivate();
|
||||
pDevice->Release();
|
||||
}
|
||||
|
||||
return refCount;
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
namespace dxvk {
|
||||
|
||||
static inline uint32_t parseDword(std::string_view str) {
|
||||
uint32_t value = UINT32_MAX;
|
||||
uint32_t value = std::numeric_limits<uint32_t>::max();
|
||||
std::from_chars(str.data(), str.data() + str.size(), value);
|
||||
return value;
|
||||
}
|
||||
|
|
|
@ -588,7 +588,7 @@ namespace dxvk {
|
|||
// Max Vertex Shader Const
|
||||
pCaps->MaxVertexShaderConst = MaxFloatConstantsVS;
|
||||
// Max PS1 Value
|
||||
pCaps->PixelShader1xMaxValue = options.shaderModel > 0 ? FLT_MAX : 0.0f;
|
||||
pCaps->PixelShader1xMaxValue = options.shaderModel > 0 ? std::numeric_limits<float>::max() : 0.0f;
|
||||
// Dev Caps 2
|
||||
pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET
|
||||
/* | D3DDEVCAPS2_DMAPNPATCH */
|
||||
|
|
|
@ -75,7 +75,7 @@ namespace dxvk {
|
|||
|
||||
public:
|
||||
|
||||
static constexpr UINT AllLayers = UINT32_MAX;
|
||||
static constexpr UINT AllLayers = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
D3D9CommonTexture(
|
||||
D3D9DeviceEx* pDevice,
|
||||
|
|
|
@ -1760,7 +1760,7 @@ namespace dxvk {
|
|||
|
||||
m_state.depthStencil = ds;
|
||||
|
||||
UpdateActiveHazardsDS(UINT32_MAX);
|
||||
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
|
||||
|
||||
return D3D_OK;
|
||||
}
|
||||
|
@ -2157,7 +2157,7 @@ namespace dxvk {
|
|||
if (m_state.IsLightEnabled(Index) == !!Enable)
|
||||
return D3D_OK;
|
||||
|
||||
uint32_t searchIndex = UINT32_MAX;
|
||||
uint32_t searchIndex = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t setIndex = Index;
|
||||
|
||||
if (!Enable)
|
||||
|
@ -2384,7 +2384,7 @@ namespace dxvk {
|
|||
|
||||
case D3DRS_ZWRITEENABLE:
|
||||
if (likely(!old != !Value))
|
||||
UpdateActiveHazardsDS(UINT32_MAX);
|
||||
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
|
||||
[[fallthrough]];
|
||||
case D3DRS_STENCILENABLE:
|
||||
case D3DRS_ZENABLE:
|
||||
|
@ -3387,7 +3387,7 @@ namespace dxvk {
|
|||
BindShader<DxsoProgramTypes::VertexShader>(GetCommonShader(shader));
|
||||
m_vsShaderMasks = newShader->GetShaderMask();
|
||||
|
||||
UpdateTextureTypeMismatchesForShader(newShader, m_vsShaderMasks.samplerMask, caps::MaxTexturesPS + 1);
|
||||
UpdateTextureTypeMismatchesForShader(newShader, m_vsShaderMasks.samplerMask, FirstVSSamplerSlot);
|
||||
}
|
||||
else {
|
||||
m_vsShaderMasks = D3D9ShaderMasks();
|
||||
|
@ -3795,8 +3795,8 @@ namespace dxvk {
|
|||
if (m_psShaderMasks.samplerMask != newShaderMasks.samplerMask ||
|
||||
m_psShaderMasks.rtMask != newShaderMasks.rtMask) {
|
||||
m_psShaderMasks = newShaderMasks;
|
||||
UpdateActiveHazardsRT(UINT32_MAX);
|
||||
UpdateActiveHazardsDS(UINT32_MAX);
|
||||
UpdateActiveHazardsRT(std::numeric_limits<uint32_t>::max());
|
||||
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
|
||||
}
|
||||
|
||||
return D3D_OK;
|
||||
|
@ -6415,12 +6415,15 @@ namespace dxvk {
|
|||
void D3D9DeviceEx::UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler) {
|
||||
uint32_t shaderTextureIndex;
|
||||
const D3D9CommonShader* shader;
|
||||
if (unlikely(stateSampler > caps::MaxTexturesPS + 1)) {
|
||||
if (likely(IsPSSampler(stateSampler))) {
|
||||
shader = GetCommonShader(m_state.pixelShader);
|
||||
shaderTextureIndex = stateSampler;
|
||||
} else if (unlikely(IsVSSampler(stateSampler))) {
|
||||
shader = GetCommonShader(m_state.vertexShader);
|
||||
shaderTextureIndex = stateSampler - caps::MaxTexturesPS - 1;
|
||||
} else {
|
||||
shader = GetCommonShader(m_state.pixelShader);
|
||||
shaderTextureIndex = stateSampler;
|
||||
// Do not type check the fixed function displacement map texture.
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(shader == nullptr || shader->GetInfo().majorVersion() < 2 || m_d3d9Options.forceSamplerTypeSpecConstants)) {
|
||||
|
@ -7815,7 +7818,7 @@ namespace dxvk {
|
|||
|
||||
if (key.Data.Contents.UseLighting) {
|
||||
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
|
||||
if (m_state.enabledLightIndices[i] != UINT32_MAX)
|
||||
if (m_state.enabledLightIndices[i] != std::numeric_limits<uint32_t>::max())
|
||||
lightCount++;
|
||||
}
|
||||
}
|
||||
|
@ -7912,7 +7915,7 @@ namespace dxvk {
|
|||
uint32_t lightIdx = 0;
|
||||
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
|
||||
auto idx = m_state.enabledLightIndices[i];
|
||||
if (idx == UINT32_MAX)
|
||||
if (idx == std::numeric_limits<uint32_t>::max())
|
||||
continue;
|
||||
|
||||
data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]);
|
||||
|
|
|
@ -653,15 +653,41 @@ namespace dxvk {
|
|||
const D3DDISPLAYMODEEX* pFullscreenDisplayMode,
|
||||
IDirect3DSwapChain9** ppSwapChain);
|
||||
|
||||
/**
|
||||
* @brief Sets the given sampler state
|
||||
*
|
||||
* @param StateSampler Sampler index (according to our internal way of storing samplers)
|
||||
* @param Type Sampler state type to change
|
||||
* @param Value State value
|
||||
*/
|
||||
HRESULT SetStateSamplerState(
|
||||
DWORD StateSampler,
|
||||
D3DSAMPLERSTATETYPE Type,
|
||||
DWORD Value);
|
||||
|
||||
/**
|
||||
* @brief Sets the given sampler texture
|
||||
*
|
||||
* @param StateSampler Sampler index (according to our internal way of storing samplers)
|
||||
* @param pTexture Texture to use
|
||||
*/
|
||||
HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture);
|
||||
|
||||
/**
|
||||
* @brief Sets the transform for the given sampler
|
||||
*
|
||||
* @param idx Sampler index (according to our internal way of storing samplers)
|
||||
* @param pMatrix Transform matrix
|
||||
*/
|
||||
HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix);
|
||||
|
||||
/**
|
||||
* @brief Sets the fixed function texture processing state
|
||||
*
|
||||
* @param Stage Sampler index (according to our internal way of storing samplers)
|
||||
* @param Type Fixed function texture stage type
|
||||
* @param Value Value for the state
|
||||
*/
|
||||
HRESULT SetStateTextureStageState(
|
||||
DWORD Stage,
|
||||
D3D9TextureStageStateTypes Type,
|
||||
|
@ -818,8 +844,40 @@ namespace dxvk {
|
|||
|
||||
void UpdateActiveFetch4(uint32_t stateSampler);
|
||||
|
||||
/**
|
||||
* @brief Sets the mismatching texture type bits for all samplers if necessary.
|
||||
*
|
||||
* This function will check all samplers the shader uses and set the set the mismatching texture type bit for the given sampler if it does not
|
||||
* match the texture type expected by the respective shader.
|
||||
*
|
||||
* It will *not* unset the bit if the texture type does match.
|
||||
*
|
||||
* @param stateSampler Sampler index (according to our internal way of storing samplers)
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Sets the mismatching texture type bits for all samplers if necessary.
|
||||
*
|
||||
* This function will check all samplers the shader uses and set the set the mismatching texture type bit for the given sampler if it does not
|
||||
* match the texture type expected by the shader.
|
||||
*
|
||||
* @param shader The shader
|
||||
* @param shaderSamplerMask Mask of all samplers that the shader uses (according to our internal way of storing samplers)
|
||||
* @param shaderSamplerOffset First index of the shader's samplers according to our internal way of storing samplers.
|
||||
* Used to transform the sampler indices that are relative to the entire pipeline to ones relative to the shader.
|
||||
*/
|
||||
void UpdateTextureTypeMismatchesForShader(const D3D9CommonShader* shader, uint32_t shaderSamplerMask, uint32_t shaderSamplerOffset);
|
||||
|
||||
/**
|
||||
* @brief Sets the mismatching texture type bit for the given sampler.
|
||||
*
|
||||
* This function will set the mismatching texture type bit for the given sampler if it does not
|
||||
* match the texture type expected by the respective shader.
|
||||
*
|
||||
* It will *not* unset the bit if the texture type does match.
|
||||
*
|
||||
* @param stateSampler Sampler index (according to our internal way of storing samplers)
|
||||
*/
|
||||
void UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler);
|
||||
|
||||
void UploadManagedTexture(D3D9CommonTexture* pResource);
|
||||
|
|
|
@ -25,12 +25,28 @@ namespace dxvk {
|
|||
}
|
||||
|
||||
ULONG STDMETHODCALLTYPE Release() {
|
||||
uint32_t refCount = --this->m_refCount;
|
||||
uint32_t oldRefCount, refCount;
|
||||
|
||||
do {
|
||||
oldRefCount = this->m_refCount.load(std::memory_order_acquire);
|
||||
|
||||
// clamp value to 0 to prevent underruns
|
||||
if (unlikely(!oldRefCount))
|
||||
return 0;
|
||||
|
||||
refCount = oldRefCount - 1;
|
||||
|
||||
} while (!this->m_refCount.compare_exchange_weak(oldRefCount,
|
||||
refCount,
|
||||
std::memory_order_release,
|
||||
std::memory_order_acquire));
|
||||
|
||||
if (unlikely(!refCount)) {
|
||||
auto* pDevice = GetDevice();
|
||||
this->ReleasePrivate();
|
||||
pDevice->Release();
|
||||
}
|
||||
|
||||
return refCount;
|
||||
}
|
||||
|
||||
|
|
|
@ -1297,7 +1297,7 @@ namespace dxvk {
|
|||
uint32_t atten = m_module.opFFma (m_floatType, d, atten2, atten1);
|
||||
atten = m_module.opFFma (m_floatType, d, atten, atten0);
|
||||
atten = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), atten);
|
||||
atten = m_module.opNMin (m_floatType, atten, m_module.constf32(FLT_MAX));
|
||||
atten = m_module.opNMin (m_floatType, atten, m_module.constf32(std::numeric_limits<float>::max()));
|
||||
|
||||
atten = m_module.opSelect(m_floatType, m_module.opFOrdGreaterThan(bool_t, d, range), m_module.constf32(0.0f), atten);
|
||||
atten = m_module.opSelect(m_floatType, isDirectional, m_module.constf32(1.0f), atten);
|
||||
|
|
|
@ -63,7 +63,7 @@ namespace dxvk {
|
|||
// do an or per-draw in the device.
|
||||
// We shift by 17 because 16 ps samplers + 1 dmap (tess)
|
||||
if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT)
|
||||
m_usedSamplers <<= caps::MaxTexturesPS + 1;
|
||||
m_usedSamplers <<= FirstVSSamplerSlot;
|
||||
|
||||
m_usedRTs = pModule->usedRTs();
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ namespace dxvk {
|
|||
streamFreq[i] = 1;
|
||||
|
||||
for (uint32_t i = 0; i < enabledLightIndices.size(); i++)
|
||||
enabledLightIndices[i] = UINT32_MAX;
|
||||
enabledLightIndices[i] = std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -204,7 +204,7 @@ namespace dxvk {
|
|||
if (m_state.IsLightEnabled(Index) == !!Enable)
|
||||
return D3D_OK;
|
||||
|
||||
uint32_t searchIndex = UINT32_MAX;
|
||||
uint32_t searchIndex = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t setIndex = Index;
|
||||
|
||||
if (!Enable)
|
||||
|
@ -436,7 +436,7 @@ namespace dxvk {
|
|||
void D3D9StateBlock::CapturePixelSamplerStates() {
|
||||
m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
|
||||
|
||||
for (uint32_t i = 0; i < caps::MaxTexturesPS + 1; i++) {
|
||||
for (uint32_t i = 0; i < FirstVSSamplerSlot; i++) {
|
||||
m_captures.samplers.set(i, true);
|
||||
|
||||
m_captures.samplerStates[i].set(D3DSAMP_ADDRESSU, true);
|
||||
|
@ -519,7 +519,7 @@ namespace dxvk {
|
|||
void D3D9StateBlock::CaptureVertexSamplerStates() {
|
||||
m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
|
||||
|
||||
for (uint32_t i = caps::MaxTexturesPS + 1; i < SamplerCount; i++) {
|
||||
for (uint32_t i = FirstVSSamplerSlot; i < SamplerCount; i++) {
|
||||
m_captures.samplers.set(i, true);
|
||||
m_captures.samplerStates[i].set(D3DSAMP_DMAPOFFSET, true);
|
||||
}
|
||||
|
|
|
@ -652,17 +652,17 @@ namespace dxvk {
|
|||
|
||||
static bool validateGammaRamp(const WORD (&ramp)[256]) {
|
||||
if (ramp[0] >= ramp[std::size(ramp) - 1]) {
|
||||
Logger::err("validateGammaRamp: ramp inverted or flat");
|
||||
Logger::warn("validateGammaRamp: ramp inverted or flat");
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < std::size(ramp); i++) {
|
||||
if (ramp[i] < ramp[i - 1]) {
|
||||
Logger::err("validateGammaRamp: ramp not monotonically increasing");
|
||||
Logger::warn("validateGammaRamp: ramp not monotonically increasing");
|
||||
return false;
|
||||
}
|
||||
if (ramp[i] - ramp[i - 1] >= UINT16_MAX / 2) {
|
||||
Logger::err("validateGammaRamp: huuuge jump");
|
||||
Logger::warn("validateGammaRamp: huuuge jump");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,6 +44,11 @@ namespace dxvk {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns whether or not the sampler index is valid
|
||||
*
|
||||
* @param Sampler Sampler index (according to the API)
|
||||
*/
|
||||
inline bool InvalidSampler(DWORD Sampler) {
|
||||
if (Sampler >= caps::MaxTexturesPS && Sampler < D3DDMAPSAMPLER)
|
||||
return true;
|
||||
|
@ -54,6 +59,19 @@ namespace dxvk {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief The first sampler that belongs to the vertex shader according to our internal way of storing samplers
|
||||
*/
|
||||
constexpr uint32_t FirstVSSamplerSlot = caps::MaxTexturesPS + 1;
|
||||
|
||||
/**
|
||||
* @brief Remaps a sampler index by the API to an internal one
|
||||
*
|
||||
* Remaps the sampler index according to the way the API counts them to how we count and store them internally.
|
||||
*
|
||||
* @param Sampler Sampler index (according to API)
|
||||
* @return DWORD Sampler index (according to our internal way of storing samplers)
|
||||
*/
|
||||
inline DWORD RemapSamplerState(DWORD Sampler) {
|
||||
if (Sampler >= D3DDMAPSAMPLER)
|
||||
Sampler = caps::MaxTexturesPS + (Sampler - D3DDMAPSAMPLER);
|
||||
|
@ -61,13 +79,62 @@ namespace dxvk {
|
|||
return Sampler;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Remaps the sampler from an index applying to the entire pipeline to one relative to the shader stage and returns the shader type
|
||||
*
|
||||
* The displacement map sampler will be treated as a 17th pixel shader sampler.
|
||||
*
|
||||
* @param Sampler Sampler index (according to our internal way of storing samplers)
|
||||
* @return std::pair<DxsoProgramType, DWORD> Shader stage that it belongs to and the relative sampler index
|
||||
*/
|
||||
inline std::pair<DxsoProgramType, DWORD> RemapStateSamplerShader(DWORD Sampler) {
|
||||
if (Sampler >= caps::MaxTexturesPS + 1)
|
||||
return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - caps::MaxTexturesPS - 1);
|
||||
if (Sampler >= FirstVSSamplerSlot)
|
||||
return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - FirstVSSamplerSlot);
|
||||
|
||||
return std::make_pair(DxsoProgramTypes::PixelShader, Sampler);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns whether the sampler belongs to the vertex shader.
|
||||
*
|
||||
* The displacement map sampler is part of a fixed function feature,
|
||||
* so it does not belong to the vertex shader.
|
||||
* Use IsDMAPSampler to check for that.
|
||||
*
|
||||
* @param Sampler Sampler index (according to our internal way of storing samplers)
|
||||
*/
|
||||
inline bool IsVSSampler(uint32_t Sampler) {
|
||||
return Sampler >= FirstVSSamplerSlot;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns whether the sampler belongs to the pixel shader.
|
||||
*
|
||||
* The displacement map sampler is part of a fixed function feature,
|
||||
* so (unlike in RemapStateSamplerShader) it does not belong to the pixel shader.
|
||||
* Use IsDMAPSampler to check for that.
|
||||
*
|
||||
* @param Sampler Sampler index (according to our internal way of storing samplers)
|
||||
*/
|
||||
inline bool IsPSSampler(uint32_t Sampler) {
|
||||
return Sampler <= caps::MaxTexturesPS;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns whether the sampler is the displacement map sampler
|
||||
*
|
||||
* @param Sampler Sampler index (according to our internal way of storing samplers)
|
||||
*/
|
||||
inline bool IsDMAPSampler(uint32_t Sampler) {
|
||||
return Sampler > caps::MaxTexturesPS;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Remaps the sampler from an index (counted according to the API) to one relative to the shader stage and returns the shader type
|
||||
*
|
||||
* @param Sampler Sampler index (according to the API)
|
||||
* @return std::pair<DxsoProgramType, DWORD> Shader stage that it belongs to and the relative sampler index
|
||||
*/
|
||||
inline std::pair<DxsoProgramType, DWORD> RemapSamplerShader(DWORD Sampler) {
|
||||
Sampler = RemapSamplerState(Sampler);
|
||||
|
||||
|
@ -243,6 +310,9 @@ namespace dxvk {
|
|||
uint32_t(offsets[1].y) > extent.height;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Mirrors D3DTEXTURESTAGESTATETYPE but starts at 0
|
||||
*/
|
||||
enum D3D9TextureStageStateTypes : uint32_t
|
||||
{
|
||||
DXVK_TSS_COLOROP = 0,
|
||||
|
@ -272,6 +342,12 @@ namespace dxvk {
|
|||
constexpr uint32_t DXVK_TSS_TCI_CAMERASPACEREFLECTIONVECTOR = 0x00030000;
|
||||
constexpr uint32_t DXVK_TSS_TCI_SPHEREMAP = 0x00040000;
|
||||
|
||||
/**
|
||||
* @brief Remaps a texture stage type by the API to an internal one
|
||||
*
|
||||
* @param Type Texture stage type according to the API
|
||||
* @return D3D9TextureStageStateTypes Texture stage type according to our internal way of storing them
|
||||
*/
|
||||
inline D3D9TextureStageStateTypes RemapTextureStageStateType(D3DTEXTURESTAGESTATETYPE Type) {
|
||||
return D3D9TextureStageStateTypes(Type - 1);
|
||||
}
|
||||
|
|
|
@ -30,26 +30,48 @@ namespace dxvk {
|
|||
switch (ins.opClass) {
|
||||
case DxbcInstClass::Atomic: {
|
||||
const uint32_t operandId = ins.dstCount - 1;
|
||||
|
||||
|
||||
if (ins.dst[operandId].type == DxbcOperandType::UnorderedAccessView) {
|
||||
const uint32_t registerId = ins.dst[operandId].idx[0].offset;
|
||||
m_analysis->uavInfos[registerId].accessAtomicOp = true;
|
||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
||||
|
||||
// Check whether the atomic operation is order-invariant
|
||||
DxvkAccessOp store = DxvkAccessOp::None;
|
||||
|
||||
switch (ins.op) {
|
||||
case DxbcOpcode::AtomicAnd: store = DxvkAccessOp::And; break;
|
||||
case DxbcOpcode::AtomicOr: store = DxvkAccessOp::Or; break;
|
||||
case DxbcOpcode::AtomicXor: store = DxvkAccessOp::Xor; break;
|
||||
case DxbcOpcode::AtomicIAdd: store = DxvkAccessOp::Add; break;
|
||||
case DxbcOpcode::AtomicIMax: store = DxvkAccessOp::IMax; break;
|
||||
case DxbcOpcode::AtomicIMin: store = DxvkAccessOp::IMin; break;
|
||||
case DxbcOpcode::AtomicUMax: store = DxvkAccessOp::UMax; break;
|
||||
case DxbcOpcode::AtomicUMin: store = DxvkAccessOp::UMin; break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (m_analysis->uavInfos[registerId].atomicStore == DxvkAccessOp::None)
|
||||
m_analysis->uavInfos[registerId].atomicStore = store;
|
||||
|
||||
// Maintain ordering if the UAV is accessed via other operations as well
|
||||
if (store == DxvkAccessOp::None || m_analysis->uavInfos[registerId].atomicStore != store)
|
||||
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||
}
|
||||
} break;
|
||||
|
||||
|
||||
case DxbcInstClass::TextureSample:
|
||||
case DxbcInstClass::TextureGather:
|
||||
case DxbcInstClass::TextureQueryLod:
|
||||
case DxbcInstClass::VectorDeriv: {
|
||||
m_analysis->usesDerivatives = true;
|
||||
} break;
|
||||
|
||||
|
||||
case DxbcInstClass::ControlFlow: {
|
||||
if (ins.op == DxbcOpcode::Discard)
|
||||
m_analysis->usesKill = true;
|
||||
} break;
|
||||
|
||||
|
||||
case DxbcInstClass::BufferLoad: {
|
||||
uint32_t operandId = ins.op == DxbcOpcode::LdStructured ? 2 : 1;
|
||||
bool sparseFeedback = ins.dstCount == 2;
|
||||
|
@ -58,16 +80,18 @@ namespace dxvk {
|
|||
const uint32_t registerId = ins.src[operandId].idx[0].offset;
|
||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT;
|
||||
m_analysis->uavInfos[registerId].sparseFeedback |= sparseFeedback;
|
||||
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||
} else if (ins.src[operandId].type == DxbcOperandType::Resource) {
|
||||
const uint32_t registerId = ins.src[operandId].idx[0].offset;
|
||||
m_analysis->srvInfos[registerId].sparseFeedback |= sparseFeedback;
|
||||
}
|
||||
} break;
|
||||
|
||||
|
||||
case DxbcInstClass::BufferStore: {
|
||||
if (ins.dst[0].type == DxbcOperandType::UnorderedAccessView) {
|
||||
const uint32_t registerId = ins.dst[0].idx[0].offset;
|
||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT;
|
||||
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||
}
|
||||
} break;
|
||||
|
||||
|
@ -75,13 +99,15 @@ namespace dxvk {
|
|||
const uint32_t registerId = ins.src[1].idx[0].offset;
|
||||
m_analysis->uavInfos[registerId].accessTypedLoad = true;
|
||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT;
|
||||
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||
} break;
|
||||
|
||||
case DxbcInstClass::TypedUavStore: {
|
||||
const uint32_t registerId = ins.dst[0].idx[0].offset;
|
||||
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT;
|
||||
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
|
||||
} break;
|
||||
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -17,9 +17,11 @@ namespace dxvk {
|
|||
* will be used to generate image types.
|
||||
*/
|
||||
struct DxbcUavInfo {
|
||||
bool accessTypedLoad = false;
|
||||
bool accessAtomicOp = false;
|
||||
bool sparseFeedback = false;
|
||||
bool accessTypedLoad = false;
|
||||
bool accessAtomicOp = false;
|
||||
bool sparseFeedback = false;
|
||||
bool nonInvariantAccess = false;
|
||||
DxvkAccessOp atomicStore = DxvkAccessOp::None;
|
||||
VkAccessFlags accessFlags = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -1098,6 +1098,9 @@ namespace dxvk {
|
|||
: VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
binding.access = m_analysis->uavInfos[registerId].accessFlags;
|
||||
|
||||
if (!m_analysis->uavInfos[registerId].nonInvariantAccess)
|
||||
binding.accessOp = m_analysis->uavInfos[registerId].atomicStore;
|
||||
|
||||
if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT))
|
||||
m_module.decorate(varId, spv::DecorationNonWritable);
|
||||
if (!(binding.access & VK_ACCESS_SHADER_READ_BIT))
|
||||
|
@ -1234,9 +1237,14 @@ namespace dxvk {
|
|||
: (isUav ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
|
||||
binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
|
||||
binding.resourceBinding = bindingId;
|
||||
binding.access = isUav
|
||||
? m_analysis->uavInfos[registerId].accessFlags
|
||||
: VkAccessFlags(VK_ACCESS_SHADER_READ_BIT);
|
||||
binding.access = VK_ACCESS_SHADER_READ_BIT;
|
||||
|
||||
if (isUav) {
|
||||
binding.access = m_analysis->uavInfos[registerId].accessFlags;
|
||||
|
||||
if (!m_analysis->uavInfos[registerId].nonInvariantAccess)
|
||||
binding.accessOp = m_analysis->uavInfos[registerId].atomicStore;
|
||||
}
|
||||
|
||||
if (useRawSsbo || isUav) {
|
||||
if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT))
|
||||
|
|
|
@ -1957,7 +1957,7 @@ namespace dxvk {
|
|||
|
||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||
result.id = m_module.opNMin(typeId, result.id,
|
||||
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
||||
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
|
||||
}
|
||||
break;
|
||||
case DxsoOpcode::Rsq:
|
||||
|
@ -1969,7 +1969,7 @@ namespace dxvk {
|
|||
|
||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||
result.id = m_module.opNMin(typeId, result.id,
|
||||
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
||||
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
|
||||
}
|
||||
break;
|
||||
case DxsoOpcode::Dp3: {
|
||||
|
@ -2029,7 +2029,7 @@ namespace dxvk {
|
|||
|
||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||
result.id = m_module.opNMin(typeId, result.id,
|
||||
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
||||
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2040,7 +2040,7 @@ namespace dxvk {
|
|||
|
||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||
result.id = m_module.opNMin(typeId, result.id,
|
||||
m_module.constfReplicant(FLT_MAX, result.type.ccount));
|
||||
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
|
||||
}
|
||||
break;
|
||||
case DxsoOpcode::Pow: {
|
||||
|
@ -2102,7 +2102,7 @@ namespace dxvk {
|
|||
rcpLength.type = scalarType;
|
||||
rcpLength.id = m_module.opInverseSqrt(scalarTypeId, dot.id);
|
||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||
rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(FLT_MAX));
|
||||
rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(std::numeric_limits<float>::max()));
|
||||
}
|
||||
|
||||
// r * rsq(r . r)
|
||||
|
@ -2216,7 +2216,7 @@ namespace dxvk {
|
|||
result.id = m_module.opLog2(typeId, result.id);
|
||||
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
|
||||
result.id = m_module.opNMax(typeId, result.id,
|
||||
m_module.constfReplicant(-FLT_MAX, result.type.ccount));
|
||||
m_module.constfReplicant(-std::numeric_limits<float>::max(), result.type.ccount));
|
||||
}
|
||||
break;
|
||||
case DxsoOpcode::Lrp:
|
||||
|
@ -2965,7 +2965,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
|
|||
uint32_t lOffset = m_module.opAccessChain(m_module.defPointerType(float_t, spv::StorageClassUniform),
|
||||
m_ps.sharedState, 1, &index);
|
||||
lOffset = m_module.opLoad(float_t, lOffset);
|
||||
|
||||
|
||||
uint32_t zIndex = 2;
|
||||
uint32_t scale = m_module.opCompositeExtract(float_t, result.id, 1, &zIndex);
|
||||
scale = m_module.opFMul(float_t, scale, lScale);
|
||||
|
@ -2980,7 +2980,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
|
|||
|
||||
auto SampleType = [&](DxsoSamplerType samplerType) {
|
||||
uint32_t bitOffset = m_programInfo.type() == DxsoProgramTypes::VertexShader
|
||||
? samplerIdx + caps::MaxTexturesPS + 1
|
||||
? samplerIdx + FirstVSSamplerSlot
|
||||
: samplerIdx;
|
||||
|
||||
uint32_t isNull = m_spec.get(m_module, m_specUbo, SpecSamplerNull, bitOffset, 1);
|
||||
|
|
|
@ -86,7 +86,7 @@ namespace dxvk {
|
|||
case DxsoOpcode::SetP: return 3;
|
||||
case DxsoOpcode::TexLdl: return 3;
|
||||
case DxsoOpcode::BreakP: return 2;
|
||||
default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return UINT32_MAX;
|
||||
default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
namespace dxvk {
|
||||
|
||||
constexpr uint32_t InvalidOpcodeLength = UINT32_MAX;
|
||||
constexpr uint32_t InvalidOpcodeLength = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
uint32_t DxsoGetDefaultOpcodeLength(DxsoOpcode opcode);
|
||||
|
||||
|
|
|
@ -20,20 +20,37 @@ namespace dxvk {
|
|||
|
||||
bool DxvkBarrierTracker::findRange(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess accessType) const {
|
||||
DxvkAccess accessType,
|
||||
DxvkAccessOp accessOp) const {
|
||||
uint32_t rootIndex = computeRootIndex(range, accessType);
|
||||
return findNode(range, rootIndex);
|
||||
uint32_t nodeIndex = findNode(range, rootIndex);
|
||||
|
||||
if (likely(!nodeIndex || accessOp == DxvkAccessOp::None))
|
||||
return nodeIndex;
|
||||
|
||||
// If we are checking for a specific order-invariant store
|
||||
// op, the op must have been the only op used to access the
|
||||
// resource, and the tracked range must cover the requested
|
||||
// range in its entirety so we can rule out that other parts
|
||||
// of the resource have been accessed in a different way.
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
|
||||
return node.payload.accessOps != DxvkAccessOps(accessOp)
|
||||
|| !node.addressRange.contains(range);
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierTracker::insertRange(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess accessType) {
|
||||
uint32_t rootIndex = computeRootIndex(range, accessType);
|
||||
DxvkAccess accessType,
|
||||
DxvkAccessOp accessOp) {
|
||||
DxvkBarrierPayload payload = { };
|
||||
payload.accessOps.set(accessOp);
|
||||
|
||||
// If we can just insert the node with no conflicts,
|
||||
// we don't have to do anything.
|
||||
uint32_t nodeIndex = insertNode(range, rootIndex);
|
||||
uint32_t rootIndex = computeRootIndex(range, accessType);
|
||||
uint32_t nodeIndex = insertNode(range, rootIndex, payload);
|
||||
|
||||
if (likely(!nodeIndex))
|
||||
return;
|
||||
|
@ -41,6 +58,7 @@ namespace dxvk {
|
|||
// If there's an existing node and it contains the entire
|
||||
// range we want to add already, also don't do anything.
|
||||
auto& node = m_nodes[nodeIndex];
|
||||
node.payload.accessOps.set(payload.accessOps);
|
||||
|
||||
if (node.addressRange.contains(range))
|
||||
return;
|
||||
|
@ -82,12 +100,14 @@ namespace dxvk {
|
|||
mergedRange.rangeStart = std::min(mergedRange.rangeStart, node.addressRange.rangeStart);
|
||||
mergedRange.rangeEnd = std::max(mergedRange.rangeEnd, node.addressRange.rangeEnd);
|
||||
|
||||
payload.accessOps.set(node.payload.accessOps);
|
||||
|
||||
removeNode(nodeIndex, rootIndex);
|
||||
|
||||
nodeIndex = findNode(range, rootIndex);
|
||||
}
|
||||
|
||||
insertNode(mergedRange, rootIndex);
|
||||
insertNode(mergedRange, rootIndex, payload);
|
||||
}
|
||||
|
||||
|
||||
|
@ -166,7 +186,8 @@ namespace dxvk {
|
|||
|
||||
uint32_t DxvkBarrierTracker::insertNode(
|
||||
const DxvkAddressRange& range,
|
||||
uint32_t rootIndex) {
|
||||
uint32_t rootIndex,
|
||||
DxvkBarrierPayload payload) {
|
||||
// Check if the given root is valid at all
|
||||
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
|
||||
|
||||
|
@ -178,6 +199,7 @@ namespace dxvk {
|
|||
auto& node = m_nodes[rootIndex];
|
||||
node.header = 0;
|
||||
node.addressRange = range;
|
||||
node.payload = payload;
|
||||
return 0;
|
||||
} else {
|
||||
// Traverse tree and abort if we find any range
|
||||
|
@ -209,6 +231,7 @@ namespace dxvk {
|
|||
node.setRed(true);
|
||||
node.setParent(parentIndex);
|
||||
node.addressRange = range;
|
||||
node.payload = payload;
|
||||
|
||||
// Only do the fixup to maintain red-black properties if
|
||||
// we haven't marked the root node as red in a deletion.
|
||||
|
@ -238,6 +261,7 @@ namespace dxvk {
|
|||
childIndex = m_nodes[childIndex].child(0);
|
||||
|
||||
node.addressRange = m_nodes[childIndex].addressRange;
|
||||
node.payload = m_nodes[childIndex].payload;
|
||||
removeNode(childIndex, rootIndex);
|
||||
} else {
|
||||
// Deletion is expected to be exceptionally rare, to the point of
|
||||
|
@ -268,6 +292,7 @@ namespace dxvk {
|
|||
node.setRed(child.isRed());
|
||||
|
||||
node.addressRange = child.addressRange;
|
||||
node.payload = child.payload;
|
||||
|
||||
if (cl) m_nodes[cl].setParent(nodeIndex);
|
||||
if (cr) m_nodes[cr].setParent(nodeIndex);
|
||||
|
@ -378,6 +403,7 @@ namespace dxvk {
|
|||
node.setChild(1, rr);
|
||||
|
||||
std::swap(node.addressRange, m_nodes[r].addressRange);
|
||||
std::swap(node.payload, m_nodes[r].payload);
|
||||
}
|
||||
|
||||
|
||||
|
@ -406,6 +432,7 @@ namespace dxvk {
|
|||
node.setChild(1, l);
|
||||
|
||||
std::swap(node.addressRange, m_nodes[l].addressRange);
|
||||
std::swap(node.payload, m_nodes[l].payload);
|
||||
}
|
||||
|
||||
|
||||
|
@ -498,4 +525,4 @@ namespace dxvk {
|
|||
flush(list);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,14 @@ namespace dxvk {
|
|||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Barrier node payload
|
||||
*/
|
||||
struct DxvkBarrierPayload {
|
||||
DxvkAccessOps accessOps = 0u;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Barrier tree node
|
||||
*
|
||||
|
@ -62,6 +70,9 @@ namespace dxvk {
|
|||
// Address range of the node
|
||||
DxvkAddressRange addressRange = { };
|
||||
|
||||
// Node payload
|
||||
DxvkBarrierPayload payload = { };
|
||||
|
||||
void setRed(bool red) {
|
||||
header &= ~uint64_t(1u);
|
||||
header |= uint64_t(red);
|
||||
|
@ -117,21 +128,25 @@ namespace dxvk {
|
|||
*
|
||||
* \param [in] range Resource range
|
||||
* \param [in] accessType Access type
|
||||
* \param [in] accessOp Access operation
|
||||
* \returns \c true if the range has a pending access
|
||||
*/
|
||||
bool findRange(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess accessType) const;
|
||||
DxvkAccess accessType,
|
||||
DxvkAccessOp accessOp) const;
|
||||
|
||||
/**
|
||||
* \brief Inserts address range for a given access type
|
||||
*
|
||||
* \param [in] range Resource range
|
||||
* \param [in] accessType Access type
|
||||
* \param [in] accessOp Access operation
|
||||
*/
|
||||
void insertRange(
|
||||
const DxvkAddressRange& range,
|
||||
DxvkAccess accessType);
|
||||
DxvkAccess accessType,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
/**
|
||||
* \brief Clears the entire structure
|
||||
|
@ -166,7 +181,8 @@ namespace dxvk {
|
|||
|
||||
uint32_t insertNode(
|
||||
const DxvkAddressRange& range,
|
||||
uint32_t rootIndex);
|
||||
uint32_t rootIndex,
|
||||
DxvkBarrierPayload payload);
|
||||
|
||||
void removeNode(
|
||||
uint32_t nodeIndex,
|
||||
|
@ -285,4 +301,4 @@ namespace dxvk {
|
|||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -763,11 +763,14 @@ namespace dxvk {
|
|||
* \param [in] offset Draw buffer offset
|
||||
* \param [in] count Number of draws
|
||||
* \param [in] stride Stride between dispatch calls
|
||||
* \param [in] unroll Whether to unroll multiple draws if
|
||||
* there are any potential data dependencies between them.
|
||||
*/
|
||||
void drawIndirect(
|
||||
VkDeviceSize offset,
|
||||
uint32_t count,
|
||||
uint32_t stride);
|
||||
uint32_t stride,
|
||||
bool unroll);
|
||||
|
||||
/**
|
||||
* \brief Indirect draw call
|
||||
|
@ -809,12 +812,15 @@ namespace dxvk {
|
|||
* \param [in] offset Draw buffer offset
|
||||
* \param [in] count Number of draws
|
||||
* \param [in] stride Stride between dispatch calls
|
||||
* \param [in] unroll Whether to unroll multiple draws if
|
||||
* there are any potential data dependencies between them.
|
||||
*/
|
||||
void drawIndexedIndirect(
|
||||
VkDeviceSize offset,
|
||||
uint32_t count,
|
||||
uint32_t stride);
|
||||
|
||||
uint32_t stride,
|
||||
bool unroll);
|
||||
|
||||
/**
|
||||
* \brief Indirect indexed draw call
|
||||
*
|
||||
|
@ -832,14 +838,14 @@ namespace dxvk {
|
|||
uint32_t stride);
|
||||
|
||||
/**
|
||||
* \brief Transform feddback draw call
|
||||
|
||||
* \param [in] counterBuffer Xfb counter buffer
|
||||
* \brief Transform feedback draw call
|
||||
*
|
||||
* \param [in] counterOffset Draw count offset
|
||||
* \param [in] counterDivisor Vertex stride
|
||||
* \param [in] counterBias Counter bias
|
||||
*/
|
||||
void drawIndirectXfb(
|
||||
const DxvkBufferSlice& counterBuffer,
|
||||
VkDeviceSize counterOffset,
|
||||
uint32_t counterDivisor,
|
||||
uint32_t counterBias);
|
||||
|
||||
|
@ -1434,9 +1440,9 @@ namespace dxvk {
|
|||
DxvkBarrierControlFlags m_barrierControl;
|
||||
|
||||
DxvkGpuQueryManager m_queryManager;
|
||||
|
||||
DxvkGlobalPipelineBarrier m_globalRoGraphicsBarrier;
|
||||
DxvkGlobalPipelineBarrier m_globalRwGraphicsBarrier;
|
||||
|
||||
DxvkGlobalPipelineBarrier m_renderPassBarrierSrc = { };
|
||||
DxvkGlobalPipelineBarrier m_renderPassBarrierDst = { };
|
||||
|
||||
DxvkRenderTargetLayouts m_rtLayouts = { };
|
||||
|
||||
|
@ -1453,7 +1459,6 @@ namespace dxvk {
|
|||
std::vector<VkImageMemoryBarrier2> m_imageLayoutTransitions;
|
||||
|
||||
std::vector<util::DxvkDebugLabel> m_debugLabelStack;
|
||||
bool m_debugLabelInternalActive = false;
|
||||
|
||||
Rc<DxvkLatencyTracker> m_latencyTracker;
|
||||
uint64_t m_latencyFrameId = 0u;
|
||||
|
@ -1590,6 +1595,20 @@ namespace dxvk {
|
|||
const Rc<DxvkBuffer>& buffer,
|
||||
VkDeviceSize offset);
|
||||
|
||||
template<bool Indexed>
|
||||
void drawIndirectGeneric(
|
||||
VkDeviceSize offset,
|
||||
uint32_t count,
|
||||
uint32_t stride,
|
||||
bool unroll);
|
||||
|
||||
template<bool Indexed>
|
||||
void drawIndirectCountGeneric(
|
||||
VkDeviceSize offset,
|
||||
VkDeviceSize countOffset,
|
||||
uint32_t maxCount,
|
||||
uint32_t stride);
|
||||
|
||||
void resolveImageHw(
|
||||
const Rc<DxvkImage>& dstImage,
|
||||
const Rc<DxvkImage>& srcImage,
|
||||
|
@ -1691,7 +1710,9 @@ namespace dxvk {
|
|||
|
||||
void unbindGraphicsPipeline();
|
||||
bool updateGraphicsPipeline();
|
||||
bool updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier);
|
||||
bool updateGraphicsPipelineState();
|
||||
|
||||
uint32_t getGraphicsPipelineDebugColor() const;
|
||||
|
||||
template<VkPipelineBindPoint BindPoint>
|
||||
void resetSpecConstants(
|
||||
|
@ -1757,34 +1778,61 @@ namespace dxvk {
|
|||
template<bool Indexed, bool Indirect>
|
||||
bool commitGraphicsState();
|
||||
|
||||
template<bool DoEmit>
|
||||
void commitComputeBarriers();
|
||||
template<VkPipelineBindPoint BindPoint>
|
||||
bool checkResourceHazards(
|
||||
const DxvkBindingLayout& layout,
|
||||
uint32_t setMask);
|
||||
|
||||
void commitComputePostBarriers();
|
||||
|
||||
template<bool Indexed, bool Indirect, bool DoEmit>
|
||||
void commitGraphicsBarriers();
|
||||
bool checkComputeHazards();
|
||||
|
||||
template<bool DoEmit>
|
||||
template<bool Indexed, bool Indirect>
|
||||
bool checkGraphicsHazards();
|
||||
|
||||
template<VkPipelineBindPoint BindPoint>
|
||||
bool checkBufferBarrier(
|
||||
const DxvkBufferSlice& bufferSlice,
|
||||
VkPipelineStageFlags stages,
|
||||
VkAccessFlags access);
|
||||
VkAccessFlags access,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
template<bool DoEmit>
|
||||
template<VkPipelineBindPoint BindPoint>
|
||||
bool checkBufferViewBarrier(
|
||||
const Rc<DxvkBufferView>& bufferView,
|
||||
VkPipelineStageFlags stages,
|
||||
VkAccessFlags access);
|
||||
VkAccessFlags access,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
template<bool DoEmit>
|
||||
template<VkPipelineBindPoint BindPoint>
|
||||
bool checkImageViewBarrier(
|
||||
const Rc<DxvkImageView>& imageView,
|
||||
VkPipelineStageFlags stages,
|
||||
VkAccessFlags access);
|
||||
VkAccessFlags access,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
template<VkPipelineBindPoint BindPoint>
|
||||
DxvkAccessFlags getAllowedStorageHazards() {
|
||||
if (m_barrierControl.isClear() || m_flags.test(DxvkContextFlag::ForceWriteAfterWriteSync))
|
||||
return DxvkAccessFlags();
|
||||
|
||||
if constexpr (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE) {
|
||||
// If there are any pending accesses that are not directly related
|
||||
// to shader dispatches, always insert a barrier if there is a hazard.
|
||||
VkPipelineStageFlags2 stageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
|
||||
| VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
|
||||
|
||||
if (!m_execBarriers.hasPendingStages(~stageMask)) {
|
||||
if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowReadWriteOverlap))
|
||||
return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read);
|
||||
else if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap))
|
||||
return DxvkAccessFlags(DxvkAccess::Write);
|
||||
}
|
||||
} else {
|
||||
// For graphics, the only type of unrelated access we have to worry about
|
||||
// is transform feedback writes, in which case inserting a barrier is fine.
|
||||
if (m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap))
|
||||
return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read);
|
||||
}
|
||||
|
||||
return DxvkAccessFlags();
|
||||
}
|
||||
|
||||
bool canIgnoreWawHazards(
|
||||
VkPipelineStageFlags stages);
|
||||
|
||||
void emitMemoryBarrier(
|
||||
VkPipelineStageFlags srcStages,
|
||||
|
@ -1876,7 +1924,15 @@ namespace dxvk {
|
|||
const VkImageSubresourceRange& subresources,
|
||||
VkImageLayout srcLayout,
|
||||
VkPipelineStageFlags2 srcStages,
|
||||
VkAccessFlags2 srcAccess);
|
||||
VkAccessFlags2 srcAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessImage(
|
||||
DxvkCmdBuffer cmdBuffer,
|
||||
const DxvkImageView& imageView,
|
||||
VkPipelineStageFlags2 srcStages,
|
||||
VkAccessFlags2 srcAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessImage(
|
||||
DxvkCmdBuffer cmdBuffer,
|
||||
|
@ -1887,7 +1943,8 @@ namespace dxvk {
|
|||
VkAccessFlags2 srcAccess,
|
||||
VkImageLayout dstLayout,
|
||||
VkPipelineStageFlags2 dstStages,
|
||||
VkAccessFlags2 dstAccess);
|
||||
VkAccessFlags2 dstAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessBuffer(
|
||||
DxvkCmdBuffer cmdBuffer,
|
||||
|
@ -1895,7 +1952,8 @@ namespace dxvk {
|
|||
VkDeviceSize offset,
|
||||
VkDeviceSize size,
|
||||
VkPipelineStageFlags2 srcStages,
|
||||
VkAccessFlags2 srcAccess);
|
||||
VkAccessFlags2 srcAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessBuffer(
|
||||
DxvkCmdBuffer cmdBuffer,
|
||||
|
@ -1905,13 +1963,31 @@ namespace dxvk {
|
|||
VkPipelineStageFlags2 srcStages,
|
||||
VkAccessFlags2 srcAccess,
|
||||
VkPipelineStageFlags2 dstStages,
|
||||
VkAccessFlags2 dstAccess);
|
||||
VkAccessFlags2 dstAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessBuffer(
|
||||
DxvkCmdBuffer cmdBuffer,
|
||||
const DxvkBufferSlice& bufferSlice,
|
||||
VkPipelineStageFlags2 srcStages,
|
||||
VkAccessFlags2 srcAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessBuffer(
|
||||
DxvkCmdBuffer cmdBuffer,
|
||||
const DxvkBufferSlice& bufferSlice,
|
||||
VkPipelineStageFlags2 srcStages,
|
||||
VkAccessFlags2 srcAccess,
|
||||
VkPipelineStageFlags2 dstStages,
|
||||
VkAccessFlags2 dstAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessBuffer(
|
||||
DxvkCmdBuffer cmdBuffer,
|
||||
DxvkBufferView& bufferView,
|
||||
VkPipelineStageFlags2 srcStages,
|
||||
VkAccessFlags2 srcAccess);
|
||||
VkAccessFlags2 srcAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessBuffer(
|
||||
DxvkCmdBuffer cmdBuffer,
|
||||
|
@ -1919,7 +1995,17 @@ namespace dxvk {
|
|||
VkPipelineStageFlags2 srcStages,
|
||||
VkAccessFlags2 srcAccess,
|
||||
VkPipelineStageFlags2 dstStages,
|
||||
VkAccessFlags2 dstAccess);
|
||||
VkAccessFlags2 dstAccess,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
void accessDrawBuffer(
|
||||
VkDeviceSize offset,
|
||||
uint32_t count,
|
||||
uint32_t stride,
|
||||
uint32_t size);
|
||||
|
||||
void accessDrawCountBuffer(
|
||||
VkDeviceSize offset);
|
||||
|
||||
void flushPendingAccesses(
|
||||
DxvkBuffer& buffer,
|
||||
|
@ -1946,20 +2032,24 @@ namespace dxvk {
|
|||
DxvkBuffer& buffer,
|
||||
VkDeviceSize offset,
|
||||
VkDeviceSize size,
|
||||
DxvkAccess access);
|
||||
DxvkAccess access,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
bool resourceHasAccess(
|
||||
DxvkBufferView& bufferView,
|
||||
DxvkAccess access);
|
||||
DxvkAccess access,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
bool resourceHasAccess(
|
||||
DxvkImage& image,
|
||||
const VkImageSubresourceRange& subresources,
|
||||
DxvkAccess access);
|
||||
DxvkAccess access,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
bool resourceHasAccess(
|
||||
DxvkImageView& imageView,
|
||||
DxvkAccess access);
|
||||
DxvkAccess access,
|
||||
DxvkAccessOp accessOp);
|
||||
|
||||
DxvkBarrierBatch& getBarrierBatch(
|
||||
DxvkCmdBuffer cmdBuffer);
|
||||
|
@ -1980,34 +2070,55 @@ namespace dxvk {
|
|||
const Rc<DxvkImage>& image,
|
||||
DxvkAccess access);
|
||||
|
||||
template<typename Pred>
|
||||
template<VkPipelineBindPoint BindPoint, typename Pred>
|
||||
bool checkResourceBarrier(
|
||||
const Pred& pred,
|
||||
VkPipelineStageFlags stages,
|
||||
VkAccessFlags access) {
|
||||
// Check for read-after-write first, this is common
|
||||
// If we're only reading the resource, only pending
|
||||
// writes matter for synchronization purposes.
|
||||
bool hasPendingWrite = pred(DxvkAccess::Write);
|
||||
|
||||
if (access & vk::AccessReadMask)
|
||||
if (!(access & vk::AccessWriteMask))
|
||||
return hasPendingWrite;
|
||||
|
||||
// Check for a write-after-write hazard, but
|
||||
// ignore it if there are no reads involved.
|
||||
bool ignoreWaW = canIgnoreWawHazards(stages);
|
||||
if (hasPendingWrite) {
|
||||
// If there is a write-after-write hazard and synchronization
|
||||
// for those is not explicitly disabled, insert a barrier.
|
||||
DxvkAccessFlags allowedHazards = getAllowedStorageHazards<BindPoint>();
|
||||
|
||||
if (hasPendingWrite && !ignoreWaW)
|
||||
return true;
|
||||
if (!allowedHazards.test(DxvkAccess::Write))
|
||||
return true;
|
||||
|
||||
// Check whether there are any pending reads.
|
||||
// Skip barrier if overlapping read-modify-write ops are allowed.
|
||||
// This includes shader atomics, but also non-atomic load-stores.
|
||||
if (allowedHazards.test(DxvkAccess::Read))
|
||||
return false;
|
||||
|
||||
// Otherwise, check if there is a read-after-write hazard.
|
||||
if (access & vk::AccessReadMask)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if there are any pending reads to avoid write-after-read issues.
|
||||
return pred(DxvkAccess::Read);
|
||||
}
|
||||
|
||||
void invalidateWriteAfterWriteTracking();
|
||||
|
||||
void beginRenderPassDebugRegion();
|
||||
|
||||
void beginInternalDebugRegion(
|
||||
const VkDebugUtilsLabelEXT& label);
|
||||
template<VkPipelineBindPoint BindPoint>
|
||||
void beginBarrierControlDebugRegion();
|
||||
|
||||
void endInternalDebugRegion();
|
||||
void pushDebugRegion(
|
||||
const VkDebugUtilsLabelEXT& label,
|
||||
util::DxvkDebugLabelType type);
|
||||
|
||||
void popDebugRegion(
|
||||
util::DxvkDebugLabelType type);
|
||||
|
||||
bool hasDebugRegion(
|
||||
util::DxvkDebugLabelType type);
|
||||
|
||||
void beginActiveDebugRegions();
|
||||
|
||||
|
|
|
@ -20,10 +20,11 @@ namespace dxvk {
|
|||
* of the graphics and compute pipelines
|
||||
* has changed and/or needs to be updated.
|
||||
*/
|
||||
enum class DxvkContextFlag : uint32_t {
|
||||
enum class DxvkContextFlag : uint64_t {
|
||||
GpRenderPassBound, ///< Render pass is currently bound
|
||||
GpRenderPassSuspended, ///< Render pass is currently suspended
|
||||
GpRenderPassSecondaryCmd, ///< Render pass uses secondary command buffer
|
||||
GpRenderPassSideEffects, ///< Render pass has side effects
|
||||
GpXfbActive, ///< Transform feedback is enabled
|
||||
GpDirtyFramebuffer, ///< Framebuffer binding is out of date
|
||||
GpDirtyPipeline, ///< Graphics pipeline binding is out of date
|
||||
|
@ -56,10 +57,12 @@ namespace dxvk {
|
|||
DirtyDrawBuffer, ///< Indirect argument buffer is dirty
|
||||
DirtyPushConstants, ///< Push constant data has changed
|
||||
|
||||
ForceWriteAfterWriteSync, ///< Ignores barrier control flags for write-after-write hazards
|
||||
|
||||
Count
|
||||
};
|
||||
|
||||
static_assert(uint32_t(DxvkContextFlag::Count) <= 32u);
|
||||
static_assert(uint32_t(DxvkContextFlag::Count) <= 64u);
|
||||
|
||||
using DxvkContextFlags = Flags<DxvkContextFlag>;
|
||||
|
||||
|
@ -85,8 +88,11 @@ namespace dxvk {
|
|||
* synchronize implicitly.
|
||||
*/
|
||||
enum class DxvkBarrierControl : uint32_t {
|
||||
IgnoreWriteAfterWrite = 1,
|
||||
IgnoreGraphicsBarriers = 2,
|
||||
// Ignores write-after-write hazard
|
||||
ComputeAllowWriteOnlyOverlap = 0,
|
||||
ComputeAllowReadWriteOverlap = 1,
|
||||
|
||||
GraphicsAllowReadWriteOverlap = 2,
|
||||
};
|
||||
|
||||
using DxvkBarrierControlFlags = Flags<DxvkBarrierControl>;
|
||||
|
|
|
@ -955,8 +955,7 @@ namespace dxvk {
|
|||
if (m_shaders.gs->flags().test(DxvkShaderFlag::HasTransformFeedback)) {
|
||||
m_flags.set(DxvkGraphicsPipelineFlag::HasTransformFeedback);
|
||||
|
||||
m_barrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
||||
| VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
|
||||
m_barrier.stages |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
|
||||
m_barrier.access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
|
||||
| VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
|
||||
| VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT;
|
||||
|
@ -966,9 +965,13 @@ namespace dxvk {
|
|||
m_flags.set(DxvkGraphicsPipelineFlag::HasRasterizerDiscard);
|
||||
}
|
||||
|
||||
if (m_barrier.access & VK_ACCESS_SHADER_WRITE_BIT)
|
||||
if (m_barrier.access & VK_ACCESS_SHADER_WRITE_BIT) {
|
||||
m_flags.set(DxvkGraphicsPipelineFlag::HasStorageDescriptors);
|
||||
|
||||
if (layout->layout().getHazardousSetMask())
|
||||
m_flags.set(DxvkGraphicsPipelineFlag::UnrollMergedDraws);
|
||||
}
|
||||
|
||||
if (m_shaders.fs != nullptr) {
|
||||
if (m_shaders.fs->flags().test(DxvkShaderFlag::HasSampleRateShading))
|
||||
m_flags.set(DxvkGraphicsPipelineFlag::HasSampleRateShading);
|
||||
|
|
|
@ -31,6 +31,7 @@ namespace dxvk {
|
|||
HasStorageDescriptors,
|
||||
HasSampleRateShading,
|
||||
HasSampleMaskExport,
|
||||
UnrollMergedDraws,
|
||||
};
|
||||
|
||||
using DxvkGraphicsPipelineFlags = Flags<DxvkGraphicsPipelineFlag>;
|
||||
|
@ -660,4 +661,4 @@ namespace dxvk {
|
|||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -205,7 +205,7 @@ namespace dxvk {
|
|||
|
||||
|
||||
DxvkBindingLayout::DxvkBindingLayout(VkShaderStageFlags stages)
|
||||
: m_pushConst { 0, 0, 0 }, m_pushConstStages(0), m_stages(stages) {
|
||||
: m_pushConst { 0, 0, 0 }, m_pushConstStages(0), m_stages(stages), m_hazards(0u) {
|
||||
|
||||
}
|
||||
|
||||
|
@ -236,6 +236,9 @@ namespace dxvk {
|
|||
void DxvkBindingLayout::addBinding(const DxvkBindingInfo& binding) {
|
||||
uint32_t set = binding.computeSetIndex();
|
||||
m_bindings[set].addBinding(binding);
|
||||
|
||||
if ((binding.access & VK_ACCESS_2_SHADER_WRITE_BIT) && binding.accessOp == DxvkAccessOp::None)
|
||||
m_hazards |= 1u << set;
|
||||
}
|
||||
|
||||
|
||||
|
@ -260,6 +263,8 @@ namespace dxvk {
|
|||
|
||||
addPushConstantRange(layout.m_pushConst);
|
||||
m_pushConstStages |= layout.m_pushConstStages;
|
||||
|
||||
m_hazards |= layout.m_hazards;
|
||||
}
|
||||
|
||||
|
||||
|
@ -400,4 +405,4 @@ namespace dxvk {
|
|||
return barrier;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,27 @@ namespace dxvk {
|
|||
|
||||
class DxvkDevice;
|
||||
|
||||
/**
|
||||
* \brief Order-invariant atomic access operation
|
||||
*
|
||||
* Information used to optimize barriers when a resource
|
||||
* is accessed exlusively via order-invariant stores.
|
||||
*/
|
||||
enum class DxvkAccessOp : uint32_t {
|
||||
None = 0,
|
||||
Or = 1,
|
||||
And = 2,
|
||||
Xor = 3,
|
||||
Add = 4,
|
||||
IMin = 5,
|
||||
IMax = 6,
|
||||
UMin = 7,
|
||||
UMax = 8,
|
||||
};
|
||||
|
||||
using DxvkAccessOps = Flags<DxvkAccessOp>;
|
||||
|
||||
|
||||
/**
|
||||
* \brief Descriptor set indices
|
||||
*/
|
||||
|
@ -37,6 +58,7 @@ namespace dxvk {
|
|||
VkShaderStageFlagBits stage; ///< Shader stage
|
||||
VkAccessFlags access; ///< Access mask for the resource
|
||||
VkBool32 uboSet; ///< Whether to include this in the UBO set
|
||||
DxvkAccessOp accessOp; ///< Order-invariant store type, if any
|
||||
|
||||
/**
|
||||
* \brief Computes descriptor set index for the given binding
|
||||
|
@ -315,6 +337,16 @@ namespace dxvk {
|
|||
return m_stages;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Queries hazardous sets
|
||||
*
|
||||
* \returns Mask of sets with storage descriptors
|
||||
* that are not accessed in an order-invariant way.
|
||||
*/
|
||||
uint32_t getHazardousSetMask() const {
|
||||
return m_hazards;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Queries defined descriptor set layouts
|
||||
*
|
||||
|
@ -372,6 +404,7 @@ namespace dxvk {
|
|||
VkPushConstantRange m_pushConst;
|
||||
VkShaderStageFlags m_pushConstStages;
|
||||
VkShaderStageFlags m_stages;
|
||||
uint32_t m_hazards;
|
||||
|
||||
};
|
||||
|
||||
|
@ -590,4 +623,4 @@ namespace dxvk {
|
|||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -580,6 +580,27 @@ namespace dxvk {
|
|||
m_trackId = 0u;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Checks whether the buffer has been used for gfx stores
|
||||
*
|
||||
* \returns \c true if any graphics pipeline has written this
|
||||
* resource via transform feedback or a storage descriptor.
|
||||
*/
|
||||
bool hasGfxStores() const {
|
||||
return m_hasGfxStores;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Tracks graphics pipeline side effects
|
||||
*
|
||||
* Must be called whenever the resource is written via graphics
|
||||
* pipeline storage descriptors or transform feedback.
|
||||
* \returns \c true if side effects were already tracked.
|
||||
*/
|
||||
bool trackGfxStores() {
|
||||
return std::exchange(m_hasGfxStores, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Queries sparse page table
|
||||
*
|
||||
|
@ -622,6 +643,8 @@ namespace dxvk {
|
|||
uint64_t m_trackId = { 0u };
|
||||
uint64_t m_cookie = { 0u };
|
||||
|
||||
bool m_hasGfxStores = false;
|
||||
|
||||
static constexpr uint64_t getIncrement(DxvkAccess access) {
|
||||
return uint64_t(1u) << (uint32_t(access) * 20u);
|
||||
}
|
||||
|
|
|
@ -4,6 +4,15 @@
|
|||
|
||||
namespace dxvk::util {
|
||||
|
||||
/**
|
||||
* \brief Debug utils label type
|
||||
*/
|
||||
enum class DxvkDebugLabelType : uint32_t {
|
||||
External, ///< App-provided scope
|
||||
InternalRenderPass, ///< Internal render pass markers
|
||||
InternalBarrierControl, ///< Barrier control markers
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Debug label wrapper
|
||||
*
|
||||
|
@ -16,12 +25,16 @@ namespace dxvk::util {
|
|||
|
||||
DxvkDebugLabel() = default;
|
||||
|
||||
DxvkDebugLabel(const VkDebugUtilsLabelEXT& label)
|
||||
: m_text(label.pLabelName ? label.pLabelName : "") {
|
||||
DxvkDebugLabel(const VkDebugUtilsLabelEXT& label, DxvkDebugLabelType type)
|
||||
: m_text(label.pLabelName ? label.pLabelName : ""), m_type(type) {
|
||||
for (uint32_t i = 0; i < m_color.size(); i++)
|
||||
m_color[i] = label.color[i];
|
||||
}
|
||||
|
||||
DxvkDebugLabelType type() const {
|
||||
return m_type;
|
||||
}
|
||||
|
||||
VkDebugUtilsLabelEXT get() const {
|
||||
VkDebugUtilsLabelEXT label = { VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
|
||||
label.pLabelName = m_text.c_str();
|
||||
|
@ -34,6 +47,7 @@ namespace dxvk::util {
|
|||
|
||||
std::string m_text;
|
||||
std::array<float, 4> m_color = { };
|
||||
DxvkDebugLabelType m_type;
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "com_include.h"
|
||||
|
@ -9,7 +11,7 @@ namespace dxvk {
|
|||
/**
|
||||
* \brief COM private data entry type
|
||||
*/
|
||||
enum ComPrivateDataType {
|
||||
enum class ComPrivateDataType : uint32_t {
|
||||
None,
|
||||
Data,
|
||||
Iface,
|
||||
|
|
|
@ -291,12 +291,11 @@ namespace dxvk {
|
|||
/* Final Fantasy XV: VXAO does thousands of *
|
||||
* draw calls with the same UAV bound */
|
||||
{ R"(\\ffxv_s\.exe$)", {{
|
||||
{ "d3d11.ignoreGraphicsBarriers", "True" },
|
||||
{ "d3d11.relaxedGraphicsBarriers", "True" },
|
||||
}} },
|
||||
/* God of War - relies on NVAPI/AMDAGS for *
|
||||
* barrier stuff, needs nvapi for DLSS */
|
||||
{ R"(\\GoW\.exe$)", {{
|
||||
{ "d3d11.ignoreGraphicsBarriers", "True" },
|
||||
{ "d3d11.relaxedBarriers", "True" },
|
||||
{ "dxgi.hideNvidiaGpu", "False" },
|
||||
{ "dxgi.maxFrameLatency", "1" },
|
||||
|
@ -334,7 +333,7 @@ namespace dxvk {
|
|||
* presumably for culling, which doesn't play *
|
||||
* nicely with D3D11 without vendor libraries */
|
||||
{ R"(\\Stray-Win64-Shipping\.exe$)", {{
|
||||
{ "d3d11.ignoreGraphicsBarriers", "True" },
|
||||
{ "d3d11.relaxedGraphicsBarriers", "True" },
|
||||
}} },
|
||||
/* Metal Gear Solid V: Ground Zeroes *
|
||||
* Texture quality can break at high vram */
|
||||
|
@ -433,7 +432,7 @@ namespace dxvk {
|
|||
* and assumes that AMD GPUs do not expose *
|
||||
* native command lists for AGS usage */
|
||||
{ R"(\\granblue_fantasy_relink\.exe$)", {{
|
||||
{ "d3d11.ignoreGraphicsBarriers", "True" },
|
||||
{ "d3d11.relaxedGraphicsBarriers", "True" },
|
||||
{ "d3d11.exposeDriverCommandLists", "False" },
|
||||
{ "dxgi.hideNvidiaGpu", "False" },
|
||||
}} },
|
||||
|
|
|
@ -40,6 +40,11 @@ namespace dxvk::vk {
|
|||
= VK_ACCESS_HOST_READ_BIT
|
||||
| VK_ACCESS_HOST_WRITE_BIT;
|
||||
|
||||
constexpr static VkAccessFlags AccessGfxSideEffectMask
|
||||
= VK_ACCESS_SHADER_WRITE_BIT
|
||||
| VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
|
||||
| VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
|
||||
|
||||
constexpr static VkPipelineStageFlags StageDeviceMask
|
||||
= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
|
||||
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
||||
|
|
Loading…
Add table
Reference in a new issue