Merge branch 'doitsujin:master' into low-latency-framepacing

This commit is contained in:
netborg-afps 2025-02-21 14:47:36 +01:00 committed by GitHub
commit b59a1b9083
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
41 changed files with 1345 additions and 675 deletions

View file

@ -1009,10 +1009,22 @@ namespace dxvk {
if (!ctrBuf.defined())
return;
EmitCs([=] (DxvkContext* ctx) {
ctx->drawIndirectXfb(ctrBuf,
// We bind the SO counter as an indirect count buffer,
// so reset any tracking we may have been doing here.
m_state.id.reset();
EmitCs([=] (DxvkContext* ctx) mutable {
ctx->bindDrawBuffers(DxvkBufferSlice(),
Forwarder::move(ctrBuf));
ctx->drawIndirectXfb(0u,
vtxBuf.buffer()->getXfbVertexStride(),
vtxBuf.offset());
// Reset draw buffer right away so we don't
// keep the SO counter alive indefinitely
ctx->bindDrawBuffers(DxvkBufferSlice(),
DxvkBufferSlice());
});
}
@ -1109,7 +1121,7 @@ namespace dxvk {
} else {
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
ctx->drawIndexedIndirect(data->offset, data->count, data->stride);
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
});
cmdData->type = D3D11CmdType::DrawIndirectIndexed;
@ -1144,7 +1156,7 @@ namespace dxvk {
} else {
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
ctx->drawIndirect(data->offset, data->count, data->stride);
ctx->drawIndirect(data->offset, data->count, data->stride, true);
});
cmdData->type = D3D11CmdType::DrawIndirect;
@ -4632,10 +4644,6 @@ namespace dxvk {
ApplyRasterizerSampleCount();
ApplyViewportState();
BindDrawBuffers(
m_state.id.argBuffer.ptr(),
m_state.id.cntBuffer.ptr());
BindIndexBuffer(
m_state.ia.indexBuffer.buffer.ptr(),
m_state.ia.indexBuffer.offset,
@ -4674,6 +4682,11 @@ namespace dxvk {
RestoreSamplers<DxbcProgramType::GeometryShader>();
RestoreSamplers<DxbcProgramType::PixelShader>();
RestoreSamplers<DxbcProgramType::ComputeShader>();
// Draw buffer bindings aren't persistent at the API level, and
// we can't meaningfully track them. Just reset this state here
// and reapply on the next indirect draw.
SetDrawBuffers(nullptr, nullptr);
}
@ -5000,10 +5013,13 @@ namespace dxvk {
auto argBuffer = static_cast<D3D11Buffer*>(pBufferForArgs);
auto cntBuffer = static_cast<D3D11Buffer*>(pBufferForCount);
if (m_state.id.argBuffer != argBuffer
|| m_state.id.cntBuffer != cntBuffer) {
m_state.id.argBuffer = argBuffer;
m_state.id.cntBuffer = cntBuffer;
auto argBufferCookie = argBuffer ? argBuffer->GetCookie() : 0u;
auto cntBufferCookie = cntBuffer ? cntBuffer->GetCookie() : 0u;
if (m_state.id.argBufferCookie != argBufferCookie
|| m_state.id.cntBufferCookie != cntBufferCookie) {
m_state.id.argBufferCookie = argBufferCookie;
m_state.id.cntBufferCookie = cntBufferCookie;
BindDrawBuffers(argBuffer, cntBuffer);
}

View file

@ -53,7 +53,7 @@ namespace dxvk {
cOffset = ByteOffsetForArgs,
cStride = ByteStrideForArgs
] (DxvkContext* ctx) {
ctx->drawIndirect(cOffset, cCount, cStride);
ctx->drawIndirect(cOffset, cCount, cStride, false);
});
}
@ -72,7 +72,7 @@ namespace dxvk {
cOffset = ByteOffsetForArgs,
cStride = ByteStrideForArgs
] (DxvkContext* ctx) {
ctx->drawIndexedIndirect(cOffset, cCount, cStride);
ctx->drawIndexedIndirect(cOffset, cCount, cStride, false);
});
}
@ -146,11 +146,10 @@ namespace dxvk {
D3D11Device* parent = static_cast<D3D11Device*>(m_ctx->GetParentInterface());
DxvkBarrierControlFlags flags = parent->GetOptionsBarrierControlFlags();
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE)
flags.set(DxvkBarrierControl::IgnoreWriteAfterWrite);
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV)
flags.set(DxvkBarrierControl::IgnoreGraphicsBarriers);
if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE) {
flags.set(DxvkBarrierControl::ComputeAllowReadWriteOverlap,
DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
}
m_ctx->EmitCs([cFlags = flags] (DxvkContext* ctx) {
ctx->setBarrierControl(cFlags);

View file

@ -865,6 +865,11 @@ namespace dxvk {
Rc<DxvkLatencyTracker> LatencyTracker) {
D3D10DeviceLock lock = LockContext();
// Don't keep draw buffers alive indefinitely. This cannot be
// done in ExecuteFlush because command recording itself might
// flush, so no state changes are allowed to happen there.
SetDrawBuffers(nullptr, nullptr);
EmitCs<false>([
cTracker = std::move(LatencyTracker)
] (DxvkContext* ctx) {

View file

@ -232,12 +232,12 @@ namespace dxvk {
* argument and draw count buffer.
*/
struct D3D11ContextStateID {
Com<D3D11Buffer, false> argBuffer = nullptr;
Com<D3D11Buffer, false> cntBuffer = nullptr;
uint64_t argBufferCookie = 0u;
uint64_t cntBufferCookie = 0u;
void reset() {
argBuffer = nullptr;
cntBuffer = nullptr;
argBufferCookie = 0u;
cntBufferCookie = 0u;
}
};

View file

@ -472,13 +472,13 @@ namespace dxvk {
const Rc<DxvkAdapter>& Adapter);
DxvkBarrierControlFlags GetOptionsBarrierControlFlags() {
DxvkBarrierControlFlags barrierControl;
DxvkBarrierControlFlags barrierControl = 0u;
if (m_d3d11Options.relaxedBarriers)
barrierControl.set(DxvkBarrierControl::IgnoreWriteAfterWrite);
barrierControl.set(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap);
if (m_d3d11Options.ignoreGraphicsBarriers)
barrierControl.set(DxvkBarrierControl::IgnoreGraphicsBarriers);
if (m_d3d11Options.relaxedBarriers || m_d3d11Options.relaxedGraphicsBarriers)
barrierControl.set(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
return barrierControl;
}

View file

@ -24,7 +24,9 @@ enum D3D11_VK_EXTENSION : uint32_t {
*/
enum D3D11_VK_BARRIER_CONTROL : uint32_t {
D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE = 1 << 0,
D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1,
// Removed:
// D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1,
};

View file

@ -17,7 +17,7 @@ namespace dxvk {
this->zeroInitWorkgroupMemory = config.getOption<bool>("d3d11.zeroInitWorkgroupMemory", false);
this->forceVolatileTgsmAccess = config.getOption<bool>("d3d11.forceVolatileTgsmAccess", false);
this->relaxedBarriers = config.getOption<bool>("d3d11.relaxedBarriers", false);
this->ignoreGraphicsBarriers = config.getOption<bool>("d3d11.ignoreGraphicsBarriers", false);
this->relaxedGraphicsBarriers = config.getOption<bool>("d3d11.relaxedGraphicsBarriers", false);
this->maxTessFactor = config.getOption<int32_t>("d3d11.maxTessFactor", 0);
this->samplerAnisotropy = config.getOption<int32_t>("d3d11.samplerAnisotropy", -1);
this->samplerLodBias = config.getOption<float>("d3d11.samplerLodBias", 0.0f);

View file

@ -43,7 +43,7 @@ namespace dxvk {
///
/// May improve performance in some games,
/// but might also cause rendering issues.
bool ignoreGraphicsBarriers = false;
bool relaxedGraphicsBarriers = false;
/// Maximum tessellation factor.
///

View file

@ -31,16 +31,28 @@ namespace dxvk {
}
ULONG STDMETHODCALLTYPE Release() {
// ignore Release calls on objects with 0 refCount
if(unlikely(!this->m_refCount))
return this->m_refCount;
uint32_t oldRefCount, refCount;
do {
oldRefCount = this->m_refCount.load(std::memory_order_acquire);
// clamp value to 0 to prevent underruns
if (unlikely(!oldRefCount))
return 0;
refCount = oldRefCount - 1;
} while (!this->m_refCount.compare_exchange_weak(oldRefCount,
refCount,
std::memory_order_release,
std::memory_order_acquire));
uint32_t refCount = --this->m_refCount;
if (unlikely(!refCount)) {
auto* pDevice = GetDevice();
this->ReleasePrivate();
pDevice->Release();
}
return refCount;
}

View file

@ -9,7 +9,7 @@
namespace dxvk {
static inline uint32_t parseDword(std::string_view str) {
uint32_t value = UINT32_MAX;
uint32_t value = std::numeric_limits<uint32_t>::max();
std::from_chars(str.data(), str.data() + str.size(), value);
return value;
}

View file

@ -588,7 +588,7 @@ namespace dxvk {
// Max Vertex Shader Const
pCaps->MaxVertexShaderConst = MaxFloatConstantsVS;
// Max PS1 Value
pCaps->PixelShader1xMaxValue = options.shaderModel > 0 ? FLT_MAX : 0.0f;
pCaps->PixelShader1xMaxValue = options.shaderModel > 0 ? std::numeric_limits<float>::max() : 0.0f;
// Dev Caps 2
pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET
/* | D3DDEVCAPS2_DMAPNPATCH */

View file

@ -75,7 +75,7 @@ namespace dxvk {
public:
static constexpr UINT AllLayers = UINT32_MAX;
static constexpr UINT AllLayers = std::numeric_limits<uint32_t>::max();
D3D9CommonTexture(
D3D9DeviceEx* pDevice,

View file

@ -1760,7 +1760,7 @@ namespace dxvk {
m_state.depthStencil = ds;
UpdateActiveHazardsDS(UINT32_MAX);
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
return D3D_OK;
}
@ -2157,7 +2157,7 @@ namespace dxvk {
if (m_state.IsLightEnabled(Index) == !!Enable)
return D3D_OK;
uint32_t searchIndex = UINT32_MAX;
uint32_t searchIndex = std::numeric_limits<uint32_t>::max();
uint32_t setIndex = Index;
if (!Enable)
@ -2384,7 +2384,7 @@ namespace dxvk {
case D3DRS_ZWRITEENABLE:
if (likely(!old != !Value))
UpdateActiveHazardsDS(UINT32_MAX);
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
[[fallthrough]];
case D3DRS_STENCILENABLE:
case D3DRS_ZENABLE:
@ -3387,7 +3387,7 @@ namespace dxvk {
BindShader<DxsoProgramTypes::VertexShader>(GetCommonShader(shader));
m_vsShaderMasks = newShader->GetShaderMask();
UpdateTextureTypeMismatchesForShader(newShader, m_vsShaderMasks.samplerMask, caps::MaxTexturesPS + 1);
UpdateTextureTypeMismatchesForShader(newShader, m_vsShaderMasks.samplerMask, FirstVSSamplerSlot);
}
else {
m_vsShaderMasks = D3D9ShaderMasks();
@ -3795,8 +3795,8 @@ namespace dxvk {
if (m_psShaderMasks.samplerMask != newShaderMasks.samplerMask ||
m_psShaderMasks.rtMask != newShaderMasks.rtMask) {
m_psShaderMasks = newShaderMasks;
UpdateActiveHazardsRT(UINT32_MAX);
UpdateActiveHazardsDS(UINT32_MAX);
UpdateActiveHazardsRT(std::numeric_limits<uint32_t>::max());
UpdateActiveHazardsDS(std::numeric_limits<uint32_t>::max());
}
return D3D_OK;
@ -6415,12 +6415,15 @@ namespace dxvk {
void D3D9DeviceEx::UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler) {
uint32_t shaderTextureIndex;
const D3D9CommonShader* shader;
if (unlikely(stateSampler > caps::MaxTexturesPS + 1)) {
if (likely(IsPSSampler(stateSampler))) {
shader = GetCommonShader(m_state.pixelShader);
shaderTextureIndex = stateSampler;
} else if (unlikely(IsVSSampler(stateSampler))) {
shader = GetCommonShader(m_state.vertexShader);
shaderTextureIndex = stateSampler - caps::MaxTexturesPS - 1;
} else {
shader = GetCommonShader(m_state.pixelShader);
shaderTextureIndex = stateSampler;
// Do not type check the fixed function displacement map texture.
return;
}
if (unlikely(shader == nullptr || shader->GetInfo().majorVersion() < 2 || m_d3d9Options.forceSamplerTypeSpecConstants)) {
@ -7815,7 +7818,7 @@ namespace dxvk {
if (key.Data.Contents.UseLighting) {
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
if (m_state.enabledLightIndices[i] != UINT32_MAX)
if (m_state.enabledLightIndices[i] != std::numeric_limits<uint32_t>::max())
lightCount++;
}
}
@ -7912,7 +7915,7 @@ namespace dxvk {
uint32_t lightIdx = 0;
for (uint32_t i = 0; i < caps::MaxEnabledLights; i++) {
auto idx = m_state.enabledLightIndices[i];
if (idx == UINT32_MAX)
if (idx == std::numeric_limits<uint32_t>::max())
continue;
data->Lights[lightIdx++] = D3D9Light(m_state.lights[idx].value(), m_state.transforms[GetTransformIndex(D3DTS_VIEW)]);

View file

@ -653,15 +653,41 @@ namespace dxvk {
const D3DDISPLAYMODEEX* pFullscreenDisplayMode,
IDirect3DSwapChain9** ppSwapChain);
/**
* @brief Sets the given sampler state
*
* @param StateSampler Sampler index (according to our internal way of storing samplers)
* @param Type Sampler state type to change
* @param Value State value
*/
HRESULT SetStateSamplerState(
DWORD StateSampler,
D3DSAMPLERSTATETYPE Type,
DWORD Value);
/**
* @brief Sets the given sampler texture
*
* @param StateSampler Sampler index (according to our internal way of storing samplers)
* @param pTexture Texture to use
*/
HRESULT SetStateTexture(DWORD StateSampler, IDirect3DBaseTexture9* pTexture);
/**
* @brief Sets the transform for the given sampler
*
* @param idx Sampler index (according to our internal way of storing samplers)
* @param pMatrix Transform matrix
*/
HRESULT SetStateTransform(uint32_t idx, const D3DMATRIX* pMatrix);
/**
* @brief Sets the fixed function texture processing state
*
* @param Stage Sampler index (according to our internal way of storing samplers)
* @param Type Fixed function texture stage type
* @param Value Value for the state
*/
HRESULT SetStateTextureStageState(
DWORD Stage,
D3D9TextureStageStateTypes Type,
@ -818,8 +844,40 @@ namespace dxvk {
void UpdateActiveFetch4(uint32_t stateSampler);
/**
* @brief Sets the mismatching texture type bits for all samplers if necessary.
*
* This function will check all samplers the shader uses and set the set the mismatching texture type bit for the given sampler if it does not
* match the texture type expected by the respective shader.
*
* It will *not* unset the bit if the texture type does match.
*
* @param stateSampler Sampler index (according to our internal way of storing samplers)
*/
/**
* @brief Sets the mismatching texture type bits for all samplers if necessary.
*
* This function will check all samplers the shader uses and set the set the mismatching texture type bit for the given sampler if it does not
* match the texture type expected by the shader.
*
* @param shader The shader
* @param shaderSamplerMask Mask of all samplers that the shader uses (according to our internal way of storing samplers)
* @param shaderSamplerOffset First index of the shader's samplers according to our internal way of storing samplers.
* Used to transform the sampler indices that are relative to the entire pipeline to ones relative to the shader.
*/
void UpdateTextureTypeMismatchesForShader(const D3D9CommonShader* shader, uint32_t shaderSamplerMask, uint32_t shaderSamplerOffset);
/**
* @brief Sets the mismatching texture type bit for the given sampler.
*
* This function will set the mismatching texture type bit for the given sampler if it does not
* match the texture type expected by the respective shader.
*
* It will *not* unset the bit if the texture type does match.
*
* @param stateSampler Sampler index (according to our internal way of storing samplers)
*/
void UpdateTextureTypeMismatchesForTexture(uint32_t stateSampler);
void UploadManagedTexture(D3D9CommonTexture* pResource);

View file

@ -25,12 +25,28 @@ namespace dxvk {
}
ULONG STDMETHODCALLTYPE Release() {
uint32_t refCount = --this->m_refCount;
uint32_t oldRefCount, refCount;
do {
oldRefCount = this->m_refCount.load(std::memory_order_acquire);
// clamp value to 0 to prevent underruns
if (unlikely(!oldRefCount))
return 0;
refCount = oldRefCount - 1;
} while (!this->m_refCount.compare_exchange_weak(oldRefCount,
refCount,
std::memory_order_release,
std::memory_order_acquire));
if (unlikely(!refCount)) {
auto* pDevice = GetDevice();
this->ReleasePrivate();
pDevice->Release();
}
return refCount;
}

View file

@ -1297,7 +1297,7 @@ namespace dxvk {
uint32_t atten = m_module.opFFma (m_floatType, d, atten2, atten1);
atten = m_module.opFFma (m_floatType, d, atten, atten0);
atten = m_module.opFDiv (m_floatType, m_module.constf32(1.0f), atten);
atten = m_module.opNMin (m_floatType, atten, m_module.constf32(FLT_MAX));
atten = m_module.opNMin (m_floatType, atten, m_module.constf32(std::numeric_limits<float>::max()));
atten = m_module.opSelect(m_floatType, m_module.opFOrdGreaterThan(bool_t, d, range), m_module.constf32(0.0f), atten);
atten = m_module.opSelect(m_floatType, isDirectional, m_module.constf32(1.0f), atten);

View file

@ -63,7 +63,7 @@ namespace dxvk {
// do an or per-draw in the device.
// We shift by 17 because 16 ps samplers + 1 dmap (tess)
if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT)
m_usedSamplers <<= caps::MaxTexturesPS + 1;
m_usedSamplers <<= FirstVSSamplerSlot;
m_usedRTs = pModule->usedRTs();

View file

@ -10,7 +10,7 @@ namespace dxvk {
streamFreq[i] = 1;
for (uint32_t i = 0; i < enabledLightIndices.size(); i++)
enabledLightIndices[i] = UINT32_MAX;
enabledLightIndices[i] = std::numeric_limits<uint32_t>::max();
}

View file

@ -204,7 +204,7 @@ namespace dxvk {
if (m_state.IsLightEnabled(Index) == !!Enable)
return D3D_OK;
uint32_t searchIndex = UINT32_MAX;
uint32_t searchIndex = std::numeric_limits<uint32_t>::max();
uint32_t setIndex = Index;
if (!Enable)
@ -436,7 +436,7 @@ namespace dxvk {
void D3D9StateBlock::CapturePixelSamplerStates() {
m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
for (uint32_t i = 0; i < caps::MaxTexturesPS + 1; i++) {
for (uint32_t i = 0; i < FirstVSSamplerSlot; i++) {
m_captures.samplers.set(i, true);
m_captures.samplerStates[i].set(D3DSAMP_ADDRESSU, true);
@ -519,7 +519,7 @@ namespace dxvk {
void D3D9StateBlock::CaptureVertexSamplerStates() {
m_captures.flags.set(D3D9CapturedStateFlag::SamplerStates);
for (uint32_t i = caps::MaxTexturesPS + 1; i < SamplerCount; i++) {
for (uint32_t i = FirstVSSamplerSlot; i < SamplerCount; i++) {
m_captures.samplers.set(i, true);
m_captures.samplerStates[i].set(D3DSAMP_DMAPOFFSET, true);
}

View file

@ -652,17 +652,17 @@ namespace dxvk {
static bool validateGammaRamp(const WORD (&ramp)[256]) {
if (ramp[0] >= ramp[std::size(ramp) - 1]) {
Logger::err("validateGammaRamp: ramp inverted or flat");
Logger::warn("validateGammaRamp: ramp inverted or flat");
return false;
}
for (size_t i = 1; i < std::size(ramp); i++) {
if (ramp[i] < ramp[i - 1]) {
Logger::err("validateGammaRamp: ramp not monotonically increasing");
Logger::warn("validateGammaRamp: ramp not monotonically increasing");
return false;
}
if (ramp[i] - ramp[i - 1] >= UINT16_MAX / 2) {
Logger::err("validateGammaRamp: huuuge jump");
Logger::warn("validateGammaRamp: huuuge jump");
return false;
}
}

View file

@ -44,6 +44,11 @@ namespace dxvk {
}
}
/**
* @brief Returns whether or not the sampler index is valid
*
* @param Sampler Sampler index (according to the API)
*/
inline bool InvalidSampler(DWORD Sampler) {
if (Sampler >= caps::MaxTexturesPS && Sampler < D3DDMAPSAMPLER)
return true;
@ -54,6 +59,19 @@ namespace dxvk {
return false;
}
/**
* @brief The first sampler that belongs to the vertex shader according to our internal way of storing samplers
*/
constexpr uint32_t FirstVSSamplerSlot = caps::MaxTexturesPS + 1;
/**
* @brief Remaps a sampler index by the API to an internal one
*
* Remaps the sampler index according to the way the API counts them to how we count and store them internally.
*
* @param Sampler Sampler index (according to API)
* @return DWORD Sampler index (according to our internal way of storing samplers)
*/
inline DWORD RemapSamplerState(DWORD Sampler) {
if (Sampler >= D3DDMAPSAMPLER)
Sampler = caps::MaxTexturesPS + (Sampler - D3DDMAPSAMPLER);
@ -61,13 +79,62 @@ namespace dxvk {
return Sampler;
}
/**
* @brief Remaps the sampler from an index applying to the entire pipeline to one relative to the shader stage and returns the shader type
*
* The displacement map sampler will be treated as a 17th pixel shader sampler.
*
* @param Sampler Sampler index (according to our internal way of storing samplers)
* @return std::pair<DxsoProgramType, DWORD> Shader stage that it belongs to and the relative sampler index
*/
inline std::pair<DxsoProgramType, DWORD> RemapStateSamplerShader(DWORD Sampler) {
if (Sampler >= caps::MaxTexturesPS + 1)
return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - caps::MaxTexturesPS - 1);
if (Sampler >= FirstVSSamplerSlot)
return std::make_pair(DxsoProgramTypes::VertexShader, Sampler - FirstVSSamplerSlot);
return std::make_pair(DxsoProgramTypes::PixelShader, Sampler);
}
/**
* @brief Returns whether the sampler belongs to the vertex shader.
*
* The displacement map sampler is part of a fixed function feature,
* so it does not belong to the vertex shader.
* Use IsDMAPSampler to check for that.
*
* @param Sampler Sampler index (according to our internal way of storing samplers)
*/
inline bool IsVSSampler(uint32_t Sampler) {
return Sampler >= FirstVSSamplerSlot;
}
/**
* @brief Returns whether the sampler belongs to the pixel shader.
*
* The displacement map sampler is part of a fixed function feature,
* so (unlike in RemapStateSamplerShader) it does not belong to the pixel shader.
* Use IsDMAPSampler to check for that.
*
* @param Sampler Sampler index (according to our internal way of storing samplers)
*/
inline bool IsPSSampler(uint32_t Sampler) {
return Sampler <= caps::MaxTexturesPS;
}
/**
* @brief Returns whether the sampler is the displacement map sampler
*
* @param Sampler Sampler index (according to our internal way of storing samplers)
*/
inline bool IsDMAPSampler(uint32_t Sampler) {
return Sampler > caps::MaxTexturesPS;
}
/**
* @brief Remaps the sampler from an index (counted according to the API) to one relative to the shader stage and returns the shader type
*
* @param Sampler Sampler index (according to the API)
* @return std::pair<DxsoProgramType, DWORD> Shader stage that it belongs to and the relative sampler index
*/
inline std::pair<DxsoProgramType, DWORD> RemapSamplerShader(DWORD Sampler) {
Sampler = RemapSamplerState(Sampler);
@ -243,6 +310,9 @@ namespace dxvk {
uint32_t(offsets[1].y) > extent.height;
}
/**
* @brief Mirrors D3DTEXTURESTAGESTATETYPE but starts at 0
*/
enum D3D9TextureStageStateTypes : uint32_t
{
DXVK_TSS_COLOROP = 0,
@ -272,6 +342,12 @@ namespace dxvk {
constexpr uint32_t DXVK_TSS_TCI_CAMERASPACEREFLECTIONVECTOR = 0x00030000;
constexpr uint32_t DXVK_TSS_TCI_SPHEREMAP = 0x00040000;
/**
* @brief Remaps a texture stage type by the API to an internal one
*
* @param Type Texture stage type according to the API
* @return D3D9TextureStageStateTypes Texture stage type according to our internal way of storing them
*/
inline D3D9TextureStageStateTypes RemapTextureStageStateType(D3DTEXTURESTAGESTATETYPE Type) {
return D3D9TextureStageStateTypes(Type - 1);
}

View file

@ -35,6 +35,28 @@ namespace dxvk {
const uint32_t registerId = ins.dst[operandId].idx[0].offset;
m_analysis->uavInfos[registerId].accessAtomicOp = true;
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
// Check whether the atomic operation is order-invariant
DxvkAccessOp store = DxvkAccessOp::None;
switch (ins.op) {
case DxbcOpcode::AtomicAnd: store = DxvkAccessOp::And; break;
case DxbcOpcode::AtomicOr: store = DxvkAccessOp::Or; break;
case DxbcOpcode::AtomicXor: store = DxvkAccessOp::Xor; break;
case DxbcOpcode::AtomicIAdd: store = DxvkAccessOp::Add; break;
case DxbcOpcode::AtomicIMax: store = DxvkAccessOp::IMax; break;
case DxbcOpcode::AtomicIMin: store = DxvkAccessOp::IMin; break;
case DxbcOpcode::AtomicUMax: store = DxvkAccessOp::UMax; break;
case DxbcOpcode::AtomicUMin: store = DxvkAccessOp::UMin; break;
default: break;
}
if (m_analysis->uavInfos[registerId].atomicStore == DxvkAccessOp::None)
m_analysis->uavInfos[registerId].atomicStore = store;
// Maintain ordering if the UAV is accessed via other operations as well
if (store == DxvkAccessOp::None || m_analysis->uavInfos[registerId].atomicStore != store)
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
}
} break;
@ -58,6 +80,7 @@ namespace dxvk {
const uint32_t registerId = ins.src[operandId].idx[0].offset;
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT;
m_analysis->uavInfos[registerId].sparseFeedback |= sparseFeedback;
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
} else if (ins.src[operandId].type == DxbcOperandType::Resource) {
const uint32_t registerId = ins.src[operandId].idx[0].offset;
m_analysis->srvInfos[registerId].sparseFeedback |= sparseFeedback;
@ -68,6 +91,7 @@ namespace dxvk {
if (ins.dst[0].type == DxbcOperandType::UnorderedAccessView) {
const uint32_t registerId = ins.dst[0].idx[0].offset;
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT;
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
}
} break;
@ -75,11 +99,13 @@ namespace dxvk {
const uint32_t registerId = ins.src[1].idx[0].offset;
m_analysis->uavInfos[registerId].accessTypedLoad = true;
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_READ_BIT;
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
} break;
case DxbcInstClass::TypedUavStore: {
const uint32_t registerId = ins.dst[0].idx[0].offset;
m_analysis->uavInfos[registerId].accessFlags |= VK_ACCESS_SHADER_WRITE_BIT;
m_analysis->uavInfos[registerId].nonInvariantAccess = true;
} break;
default:

View file

@ -20,6 +20,8 @@ namespace dxvk {
bool accessTypedLoad = false;
bool accessAtomicOp = false;
bool sparseFeedback = false;
bool nonInvariantAccess = false;
DxvkAccessOp atomicStore = DxvkAccessOp::None;
VkAccessFlags accessFlags = 0;
};

View file

@ -1098,6 +1098,9 @@ namespace dxvk {
: VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
binding.access = m_analysis->uavInfos[registerId].accessFlags;
if (!m_analysis->uavInfos[registerId].nonInvariantAccess)
binding.accessOp = m_analysis->uavInfos[registerId].atomicStore;
if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT))
m_module.decorate(varId, spv::DecorationNonWritable);
if (!(binding.access & VK_ACCESS_SHADER_READ_BIT))
@ -1234,9 +1237,14 @@ namespace dxvk {
: (isUav ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
binding.resourceBinding = bindingId;
binding.access = isUav
? m_analysis->uavInfos[registerId].accessFlags
: VkAccessFlags(VK_ACCESS_SHADER_READ_BIT);
binding.access = VK_ACCESS_SHADER_READ_BIT;
if (isUav) {
binding.access = m_analysis->uavInfos[registerId].accessFlags;
if (!m_analysis->uavInfos[registerId].nonInvariantAccess)
binding.accessOp = m_analysis->uavInfos[registerId].atomicStore;
}
if (useRawSsbo || isUav) {
if (!(binding.access & VK_ACCESS_SHADER_WRITE_BIT))

View file

@ -1957,7 +1957,7 @@ namespace dxvk {
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
result.id = m_module.opNMin(typeId, result.id,
m_module.constfReplicant(FLT_MAX, result.type.ccount));
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
}
break;
case DxsoOpcode::Rsq:
@ -1969,7 +1969,7 @@ namespace dxvk {
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
result.id = m_module.opNMin(typeId, result.id,
m_module.constfReplicant(FLT_MAX, result.type.ccount));
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
}
break;
case DxsoOpcode::Dp3: {
@ -2029,7 +2029,7 @@ namespace dxvk {
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
result.id = m_module.opNMin(typeId, result.id,
m_module.constfReplicant(FLT_MAX, result.type.ccount));
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
}
break;
}
@ -2040,7 +2040,7 @@ namespace dxvk {
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
result.id = m_module.opNMin(typeId, result.id,
m_module.constfReplicant(FLT_MAX, result.type.ccount));
m_module.constfReplicant(std::numeric_limits<float>::max(), result.type.ccount));
}
break;
case DxsoOpcode::Pow: {
@ -2102,7 +2102,7 @@ namespace dxvk {
rcpLength.type = scalarType;
rcpLength.id = m_module.opInverseSqrt(scalarTypeId, dot.id);
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(FLT_MAX));
rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(std::numeric_limits<float>::max()));
}
// r * rsq(r . r)
@ -2216,7 +2216,7 @@ namespace dxvk {
result.id = m_module.opLog2(typeId, result.id);
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
result.id = m_module.opNMax(typeId, result.id,
m_module.constfReplicant(-FLT_MAX, result.type.ccount));
m_module.constfReplicant(-std::numeric_limits<float>::max(), result.type.ccount));
}
break;
case DxsoOpcode::Lrp:
@ -2980,7 +2980,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
auto SampleType = [&](DxsoSamplerType samplerType) {
uint32_t bitOffset = m_programInfo.type() == DxsoProgramTypes::VertexShader
? samplerIdx + caps::MaxTexturesPS + 1
? samplerIdx + FirstVSSamplerSlot
: samplerIdx;
uint32_t isNull = m_spec.get(m_module, m_specUbo, SpecSamplerNull, bitOffset, 1);

View file

@ -86,7 +86,7 @@ namespace dxvk {
case DxsoOpcode::SetP: return 3;
case DxsoOpcode::TexLdl: return 3;
case DxsoOpcode::BreakP: return 2;
default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return UINT32_MAX;
default: Logger::warn("DxsoGetDefaultOpcodeLength: unknown opcode to get default length for."); return std::numeric_limits<uint32_t>::max();
}
}

View file

@ -4,7 +4,7 @@
namespace dxvk {
constexpr uint32_t InvalidOpcodeLength = UINT32_MAX;
constexpr uint32_t InvalidOpcodeLength = std::numeric_limits<uint32_t>::max();
uint32_t DxsoGetDefaultOpcodeLength(DxsoOpcode opcode);

View file

@ -20,20 +20,37 @@ namespace dxvk {
bool DxvkBarrierTracker::findRange(
const DxvkAddressRange& range,
DxvkAccess accessType) const {
DxvkAccess accessType,
DxvkAccessOp accessOp) const {
uint32_t rootIndex = computeRootIndex(range, accessType);
return findNode(range, rootIndex);
uint32_t nodeIndex = findNode(range, rootIndex);
if (likely(!nodeIndex || accessOp == DxvkAccessOp::None))
return nodeIndex;
// If we are checking for a specific order-invariant store
// op, the op must have been the only op used to access the
// resource, and the tracked range must cover the requested
// range in its entirety so we can rule out that other parts
// of the resource have been accessed in a different way.
auto& node = m_nodes[nodeIndex];
return node.payload.accessOps != DxvkAccessOps(accessOp)
|| !node.addressRange.contains(range);
}
void DxvkBarrierTracker::insertRange(
const DxvkAddressRange& range,
DxvkAccess accessType) {
uint32_t rootIndex = computeRootIndex(range, accessType);
DxvkAccess accessType,
DxvkAccessOp accessOp) {
DxvkBarrierPayload payload = { };
payload.accessOps.set(accessOp);
// If we can just insert the node with no conflicts,
// we don't have to do anything.
uint32_t nodeIndex = insertNode(range, rootIndex);
uint32_t rootIndex = computeRootIndex(range, accessType);
uint32_t nodeIndex = insertNode(range, rootIndex, payload);
if (likely(!nodeIndex))
return;
@ -41,6 +58,7 @@ namespace dxvk {
// If there's an existing node and it contains the entire
// range we want to add already, also don't do anything.
auto& node = m_nodes[nodeIndex];
node.payload.accessOps.set(payload.accessOps);
if (node.addressRange.contains(range))
return;
@ -82,12 +100,14 @@ namespace dxvk {
mergedRange.rangeStart = std::min(mergedRange.rangeStart, node.addressRange.rangeStart);
mergedRange.rangeEnd = std::max(mergedRange.rangeEnd, node.addressRange.rangeEnd);
payload.accessOps.set(node.payload.accessOps);
removeNode(nodeIndex, rootIndex);
nodeIndex = findNode(range, rootIndex);
}
insertNode(mergedRange, rootIndex);
insertNode(mergedRange, rootIndex, payload);
}
@ -166,7 +186,8 @@ namespace dxvk {
uint32_t DxvkBarrierTracker::insertNode(
const DxvkAddressRange& range,
uint32_t rootIndex) {
uint32_t rootIndex,
DxvkBarrierPayload payload) {
// Check if the given root is valid at all
uint64_t rootBit = uint64_t(1u) << (rootIndex - 1u);
@ -178,6 +199,7 @@ namespace dxvk {
auto& node = m_nodes[rootIndex];
node.header = 0;
node.addressRange = range;
node.payload = payload;
return 0;
} else {
// Traverse tree and abort if we find any range
@ -209,6 +231,7 @@ namespace dxvk {
node.setRed(true);
node.setParent(parentIndex);
node.addressRange = range;
node.payload = payload;
// Only do the fixup to maintain red-black properties if
// we haven't marked the root node as red in a deletion.
@ -238,6 +261,7 @@ namespace dxvk {
childIndex = m_nodes[childIndex].child(0);
node.addressRange = m_nodes[childIndex].addressRange;
node.payload = m_nodes[childIndex].payload;
removeNode(childIndex, rootIndex);
} else {
// Deletion is expected to be exceptionally rare, to the point of
@ -268,6 +292,7 @@ namespace dxvk {
node.setRed(child.isRed());
node.addressRange = child.addressRange;
node.payload = child.payload;
if (cl) m_nodes[cl].setParent(nodeIndex);
if (cr) m_nodes[cr].setParent(nodeIndex);
@ -378,6 +403,7 @@ namespace dxvk {
node.setChild(1, rr);
std::swap(node.addressRange, m_nodes[r].addressRange);
std::swap(node.payload, m_nodes[r].payload);
}
@ -406,6 +432,7 @@ namespace dxvk {
node.setChild(1, l);
std::swap(node.addressRange, m_nodes[l].addressRange);
std::swap(node.payload, m_nodes[l].payload);
}

View file

@ -42,6 +42,14 @@ namespace dxvk {
};
/**
* \brief Barrier node payload
*/
struct DxvkBarrierPayload {
DxvkAccessOps accessOps = 0u;
};
/**
* \brief Barrier tree node
*
@ -62,6 +70,9 @@ namespace dxvk {
// Address range of the node
DxvkAddressRange addressRange = { };
// Node payload
DxvkBarrierPayload payload = { };
void setRed(bool red) {
header &= ~uint64_t(1u);
header |= uint64_t(red);
@ -117,21 +128,25 @@ namespace dxvk {
*
* \param [in] range Resource range
* \param [in] accessType Access type
* \param [in] accessOp Access operation
* \returns \c true if the range has a pending access
*/
bool findRange(
const DxvkAddressRange& range,
DxvkAccess accessType) const;
DxvkAccess accessType,
DxvkAccessOp accessOp) const;
/**
* \brief Inserts address range for a given access type
*
* \param [in] range Resource range
* \param [in] accessType Access type
* \param [in] accessOp Access operation
*/
void insertRange(
const DxvkAddressRange& range,
DxvkAccess accessType);
DxvkAccess accessType,
DxvkAccessOp accessOp);
/**
* \brief Clears the entire structure
@ -166,7 +181,8 @@ namespace dxvk {
uint32_t insertNode(
const DxvkAddressRange& range,
uint32_t rootIndex);
uint32_t rootIndex,
DxvkBarrierPayload payload);
void removeNode(
uint32_t nodeIndex,

File diff suppressed because it is too large Load diff

View file

@ -763,11 +763,14 @@ namespace dxvk {
* \param [in] offset Draw buffer offset
* \param [in] count Number of draws
* \param [in] stride Stride between dispatch calls
* \param [in] unroll Whether to unroll multiple draws if
* there are any potential data dependencies between them.
*/
void drawIndirect(
VkDeviceSize offset,
uint32_t count,
uint32_t stride);
uint32_t stride,
bool unroll);
/**
* \brief Indirect draw call
@ -809,11 +812,14 @@ namespace dxvk {
* \param [in] offset Draw buffer offset
* \param [in] count Number of draws
* \param [in] stride Stride between dispatch calls
* \param [in] unroll Whether to unroll multiple draws if
* there are any potential data dependencies between them.
*/
void drawIndexedIndirect(
VkDeviceSize offset,
uint32_t count,
uint32_t stride);
uint32_t stride,
bool unroll);
/**
* \brief Indirect indexed draw call
@ -832,14 +838,14 @@ namespace dxvk {
uint32_t stride);
/**
* \brief Transform feddback draw call
* \param [in] counterBuffer Xfb counter buffer
* \brief Transform feedback draw call
*
* \param [in] counterOffset Draw count offset
* \param [in] counterDivisor Vertex stride
* \param [in] counterBias Counter bias
*/
void drawIndirectXfb(
const DxvkBufferSlice& counterBuffer,
VkDeviceSize counterOffset,
uint32_t counterDivisor,
uint32_t counterBias);
@ -1435,8 +1441,8 @@ namespace dxvk {
DxvkGpuQueryManager m_queryManager;
DxvkGlobalPipelineBarrier m_globalRoGraphicsBarrier;
DxvkGlobalPipelineBarrier m_globalRwGraphicsBarrier;
DxvkGlobalPipelineBarrier m_renderPassBarrierSrc = { };
DxvkGlobalPipelineBarrier m_renderPassBarrierDst = { };
DxvkRenderTargetLayouts m_rtLayouts = { };
@ -1453,7 +1459,6 @@ namespace dxvk {
std::vector<VkImageMemoryBarrier2> m_imageLayoutTransitions;
std::vector<util::DxvkDebugLabel> m_debugLabelStack;
bool m_debugLabelInternalActive = false;
Rc<DxvkLatencyTracker> m_latencyTracker;
uint64_t m_latencyFrameId = 0u;
@ -1590,6 +1595,20 @@ namespace dxvk {
const Rc<DxvkBuffer>& buffer,
VkDeviceSize offset);
template<bool Indexed>
void drawIndirectGeneric(
VkDeviceSize offset,
uint32_t count,
uint32_t stride,
bool unroll);
template<bool Indexed>
void drawIndirectCountGeneric(
VkDeviceSize offset,
VkDeviceSize countOffset,
uint32_t maxCount,
uint32_t stride);
void resolveImageHw(
const Rc<DxvkImage>& dstImage,
const Rc<DxvkImage>& srcImage,
@ -1691,7 +1710,9 @@ namespace dxvk {
void unbindGraphicsPipeline();
bool updateGraphicsPipeline();
bool updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier);
bool updateGraphicsPipelineState();
uint32_t getGraphicsPipelineDebugColor() const;
template<VkPipelineBindPoint BindPoint>
void resetSpecConstants(
@ -1757,34 +1778,61 @@ namespace dxvk {
template<bool Indexed, bool Indirect>
bool commitGraphicsState();
template<bool DoEmit>
void commitComputeBarriers();
template<VkPipelineBindPoint BindPoint>
bool checkResourceHazards(
const DxvkBindingLayout& layout,
uint32_t setMask);
void commitComputePostBarriers();
bool checkComputeHazards();
template<bool Indexed, bool Indirect, bool DoEmit>
void commitGraphicsBarriers();
template<bool Indexed, bool Indirect>
bool checkGraphicsHazards();
template<bool DoEmit>
template<VkPipelineBindPoint BindPoint>
bool checkBufferBarrier(
const DxvkBufferSlice& bufferSlice,
VkPipelineStageFlags stages,
VkAccessFlags access);
VkAccessFlags access,
DxvkAccessOp accessOp);
template<bool DoEmit>
template<VkPipelineBindPoint BindPoint>
bool checkBufferViewBarrier(
const Rc<DxvkBufferView>& bufferView,
VkPipelineStageFlags stages,
VkAccessFlags access);
VkAccessFlags access,
DxvkAccessOp accessOp);
template<bool DoEmit>
template<VkPipelineBindPoint BindPoint>
bool checkImageViewBarrier(
const Rc<DxvkImageView>& imageView,
VkPipelineStageFlags stages,
VkAccessFlags access);
VkAccessFlags access,
DxvkAccessOp accessOp);
template<VkPipelineBindPoint BindPoint>
DxvkAccessFlags getAllowedStorageHazards() {
if (m_barrierControl.isClear() || m_flags.test(DxvkContextFlag::ForceWriteAfterWriteSync))
return DxvkAccessFlags();
if constexpr (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE) {
// If there are any pending accesses that are not directly related
// to shader dispatches, always insert a barrier if there is a hazard.
VkPipelineStageFlags2 stageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT
| VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT;
if (!m_execBarriers.hasPendingStages(~stageMask)) {
if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowReadWriteOverlap))
return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read);
else if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap))
return DxvkAccessFlags(DxvkAccess::Write);
}
} else {
// For graphics, the only type of unrelated access we have to worry about
// is transform feedback writes, in which case inserting a barrier is fine.
if (m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap))
return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read);
}
return DxvkAccessFlags();
}
bool canIgnoreWawHazards(
VkPipelineStageFlags stages);
void emitMemoryBarrier(
VkPipelineStageFlags srcStages,
@ -1876,7 +1924,15 @@ namespace dxvk {
const VkImageSubresourceRange& subresources,
VkImageLayout srcLayout,
VkPipelineStageFlags2 srcStages,
VkAccessFlags2 srcAccess);
VkAccessFlags2 srcAccess,
DxvkAccessOp accessOp);
void accessImage(
DxvkCmdBuffer cmdBuffer,
const DxvkImageView& imageView,
VkPipelineStageFlags2 srcStages,
VkAccessFlags2 srcAccess,
DxvkAccessOp accessOp);
void accessImage(
DxvkCmdBuffer cmdBuffer,
@ -1887,7 +1943,8 @@ namespace dxvk {
VkAccessFlags2 srcAccess,
VkImageLayout dstLayout,
VkPipelineStageFlags2 dstStages,
VkAccessFlags2 dstAccess);
VkAccessFlags2 dstAccess,
DxvkAccessOp accessOp);
void accessBuffer(
DxvkCmdBuffer cmdBuffer,
@ -1895,7 +1952,8 @@ namespace dxvk {
VkDeviceSize offset,
VkDeviceSize size,
VkPipelineStageFlags2 srcStages,
VkAccessFlags2 srcAccess);
VkAccessFlags2 srcAccess,
DxvkAccessOp accessOp);
void accessBuffer(
DxvkCmdBuffer cmdBuffer,
@ -1905,13 +1963,31 @@ namespace dxvk {
VkPipelineStageFlags2 srcStages,
VkAccessFlags2 srcAccess,
VkPipelineStageFlags2 dstStages,
VkAccessFlags2 dstAccess);
VkAccessFlags2 dstAccess,
DxvkAccessOp accessOp);
void accessBuffer(
DxvkCmdBuffer cmdBuffer,
const DxvkBufferSlice& bufferSlice,
VkPipelineStageFlags2 srcStages,
VkAccessFlags2 srcAccess,
DxvkAccessOp accessOp);
void accessBuffer(
DxvkCmdBuffer cmdBuffer,
const DxvkBufferSlice& bufferSlice,
VkPipelineStageFlags2 srcStages,
VkAccessFlags2 srcAccess,
VkPipelineStageFlags2 dstStages,
VkAccessFlags2 dstAccess,
DxvkAccessOp accessOp);
void accessBuffer(
DxvkCmdBuffer cmdBuffer,
DxvkBufferView& bufferView,
VkPipelineStageFlags2 srcStages,
VkAccessFlags2 srcAccess);
VkAccessFlags2 srcAccess,
DxvkAccessOp accessOp);
void accessBuffer(
DxvkCmdBuffer cmdBuffer,
@ -1919,7 +1995,17 @@ namespace dxvk {
VkPipelineStageFlags2 srcStages,
VkAccessFlags2 srcAccess,
VkPipelineStageFlags2 dstStages,
VkAccessFlags2 dstAccess);
VkAccessFlags2 dstAccess,
DxvkAccessOp accessOp);
void accessDrawBuffer(
VkDeviceSize offset,
uint32_t count,
uint32_t stride,
uint32_t size);
void accessDrawCountBuffer(
VkDeviceSize offset);
void flushPendingAccesses(
DxvkBuffer& buffer,
@ -1946,20 +2032,24 @@ namespace dxvk {
DxvkBuffer& buffer,
VkDeviceSize offset,
VkDeviceSize size,
DxvkAccess access);
DxvkAccess access,
DxvkAccessOp accessOp);
bool resourceHasAccess(
DxvkBufferView& bufferView,
DxvkAccess access);
DxvkAccess access,
DxvkAccessOp accessOp);
bool resourceHasAccess(
DxvkImage& image,
const VkImageSubresourceRange& subresources,
DxvkAccess access);
DxvkAccess access,
DxvkAccessOp accessOp);
bool resourceHasAccess(
DxvkImageView& imageView,
DxvkAccess access);
DxvkAccess access,
DxvkAccessOp accessOp);
DxvkBarrierBatch& getBarrierBatch(
DxvkCmdBuffer cmdBuffer);
@ -1980,34 +2070,55 @@ namespace dxvk {
const Rc<DxvkImage>& image,
DxvkAccess access);
template<typename Pred>
template<VkPipelineBindPoint BindPoint, typename Pred>
bool checkResourceBarrier(
const Pred& pred,
VkPipelineStageFlags stages,
VkAccessFlags access) {
// Check for read-after-write first, this is common
// If we're only reading the resource, only pending
// writes matter for synchronization purposes.
bool hasPendingWrite = pred(DxvkAccess::Write);
if (access & vk::AccessReadMask)
if (!(access & vk::AccessWriteMask))
return hasPendingWrite;
// Check for a write-after-write hazard, but
// ignore it if there are no reads involved.
bool ignoreWaW = canIgnoreWawHazards(stages);
if (hasPendingWrite) {
// If there is a write-after-write hazard and synchronization
// for those is not explicitly disabled, insert a barrier.
DxvkAccessFlags allowedHazards = getAllowedStorageHazards<BindPoint>();
if (hasPendingWrite && !ignoreWaW)
if (!allowedHazards.test(DxvkAccess::Write))
return true;
// Check whether there are any pending reads.
// Skip barrier if overlapping read-modify-write ops are allowed.
// This includes shader atomics, but also non-atomic load-stores.
if (allowedHazards.test(DxvkAccess::Read))
return false;
// Otherwise, check if there is a read-after-write hazard.
if (access & vk::AccessReadMask)
return true;
}
// Check if there are any pending reads to avoid write-after-read issues.
return pred(DxvkAccess::Read);
}
void invalidateWriteAfterWriteTracking();
void beginRenderPassDebugRegion();
void beginInternalDebugRegion(
const VkDebugUtilsLabelEXT& label);
template<VkPipelineBindPoint BindPoint>
void beginBarrierControlDebugRegion();
void endInternalDebugRegion();
void pushDebugRegion(
const VkDebugUtilsLabelEXT& label,
util::DxvkDebugLabelType type);
void popDebugRegion(
util::DxvkDebugLabelType type);
bool hasDebugRegion(
util::DxvkDebugLabelType type);
void beginActiveDebugRegions();

View file

@ -20,10 +20,11 @@ namespace dxvk {
* of the graphics and compute pipelines
* has changed and/or needs to be updated.
*/
enum class DxvkContextFlag : uint32_t {
enum class DxvkContextFlag : uint64_t {
GpRenderPassBound, ///< Render pass is currently bound
GpRenderPassSuspended, ///< Render pass is currently suspended
GpRenderPassSecondaryCmd, ///< Render pass uses secondary command buffer
GpRenderPassSideEffects, ///< Render pass has side effects
GpXfbActive, ///< Transform feedback is enabled
GpDirtyFramebuffer, ///< Framebuffer binding is out of date
GpDirtyPipeline, ///< Graphics pipeline binding is out of date
@ -56,10 +57,12 @@ namespace dxvk {
DirtyDrawBuffer, ///< Indirect argument buffer is dirty
DirtyPushConstants, ///< Push constant data has changed
ForceWriteAfterWriteSync, ///< Ignores barrier control flags for write-after-write hazards
Count
};
static_assert(uint32_t(DxvkContextFlag::Count) <= 32u);
static_assert(uint32_t(DxvkContextFlag::Count) <= 64u);
using DxvkContextFlags = Flags<DxvkContextFlag>;
@ -85,8 +88,11 @@ namespace dxvk {
* synchronize implicitly.
*/
enum class DxvkBarrierControl : uint32_t {
IgnoreWriteAfterWrite = 1,
IgnoreGraphicsBarriers = 2,
// Ignores write-after-write hazard
ComputeAllowWriteOnlyOverlap = 0,
ComputeAllowReadWriteOverlap = 1,
GraphicsAllowReadWriteOverlap = 2,
};
using DxvkBarrierControlFlags = Flags<DxvkBarrierControl>;

View file

@ -955,8 +955,7 @@ namespace dxvk {
if (m_shaders.gs->flags().test(DxvkShaderFlag::HasTransformFeedback)) {
m_flags.set(DxvkGraphicsPipelineFlag::HasTransformFeedback);
m_barrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
| VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
m_barrier.stages |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
m_barrier.access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT
| VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT
| VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT;
@ -966,9 +965,13 @@ namespace dxvk {
m_flags.set(DxvkGraphicsPipelineFlag::HasRasterizerDiscard);
}
if (m_barrier.access & VK_ACCESS_SHADER_WRITE_BIT)
if (m_barrier.access & VK_ACCESS_SHADER_WRITE_BIT) {
m_flags.set(DxvkGraphicsPipelineFlag::HasStorageDescriptors);
if (layout->layout().getHazardousSetMask())
m_flags.set(DxvkGraphicsPipelineFlag::UnrollMergedDraws);
}
if (m_shaders.fs != nullptr) {
if (m_shaders.fs->flags().test(DxvkShaderFlag::HasSampleRateShading))
m_flags.set(DxvkGraphicsPipelineFlag::HasSampleRateShading);

View file

@ -31,6 +31,7 @@ namespace dxvk {
HasStorageDescriptors,
HasSampleRateShading,
HasSampleMaskExport,
UnrollMergedDraws,
};
using DxvkGraphicsPipelineFlags = Flags<DxvkGraphicsPipelineFlag>;

View file

@ -205,7 +205,7 @@ namespace dxvk {
DxvkBindingLayout::DxvkBindingLayout(VkShaderStageFlags stages)
: m_pushConst { 0, 0, 0 }, m_pushConstStages(0), m_stages(stages) {
: m_pushConst { 0, 0, 0 }, m_pushConstStages(0), m_stages(stages), m_hazards(0u) {
}
@ -236,6 +236,9 @@ namespace dxvk {
void DxvkBindingLayout::addBinding(const DxvkBindingInfo& binding) {
uint32_t set = binding.computeSetIndex();
m_bindings[set].addBinding(binding);
if ((binding.access & VK_ACCESS_2_SHADER_WRITE_BIT) && binding.accessOp == DxvkAccessOp::None)
m_hazards |= 1u << set;
}
@ -260,6 +263,8 @@ namespace dxvk {
addPushConstantRange(layout.m_pushConst);
m_pushConstStages |= layout.m_pushConstStages;
m_hazards |= layout.m_hazards;
}

View file

@ -11,6 +11,27 @@ namespace dxvk {
class DxvkDevice;
/**
* \brief Order-invariant atomic access operation
*
* Information used to optimize barriers when a resource
* is accessed exlusively via order-invariant stores.
*/
enum class DxvkAccessOp : uint32_t {
None = 0,
Or = 1,
And = 2,
Xor = 3,
Add = 4,
IMin = 5,
IMax = 6,
UMin = 7,
UMax = 8,
};
using DxvkAccessOps = Flags<DxvkAccessOp>;
/**
* \brief Descriptor set indices
*/
@ -37,6 +58,7 @@ namespace dxvk {
VkShaderStageFlagBits stage; ///< Shader stage
VkAccessFlags access; ///< Access mask for the resource
VkBool32 uboSet; ///< Whether to include this in the UBO set
DxvkAccessOp accessOp; ///< Order-invariant store type, if any
/**
* \brief Computes descriptor set index for the given binding
@ -315,6 +337,16 @@ namespace dxvk {
return m_stages;
}
/**
* \brief Queries hazardous sets
*
* \returns Mask of sets with storage descriptors
* that are not accessed in an order-invariant way.
*/
uint32_t getHazardousSetMask() const {
return m_hazards;
}
/**
* \brief Queries defined descriptor set layouts
*
@ -372,6 +404,7 @@ namespace dxvk {
VkPushConstantRange m_pushConst;
VkShaderStageFlags m_pushConstStages;
VkShaderStageFlags m_stages;
uint32_t m_hazards;
};

View file

@ -580,6 +580,27 @@ namespace dxvk {
m_trackId = 0u;
}
/**
* \brief Checks whether the buffer has been used for gfx stores
*
* \returns \c true if any graphics pipeline has written this
* resource via transform feedback or a storage descriptor.
*/
bool hasGfxStores() const {
return m_hasGfxStores;
}
/**
* \brief Tracks graphics pipeline side effects
*
* Must be called whenever the resource is written via graphics
* pipeline storage descriptors or transform feedback.
* \returns \c true if side effects were already tracked.
*/
bool trackGfxStores() {
return std::exchange(m_hasGfxStores, true);
}
/**
* \brief Queries sparse page table
*
@ -622,6 +643,8 @@ namespace dxvk {
uint64_t m_trackId = { 0u };
uint64_t m_cookie = { 0u };
bool m_hasGfxStores = false;
static constexpr uint64_t getIncrement(DxvkAccess access) {
return uint64_t(1u) << (uint32_t(access) * 20u);
}

View file

@ -4,6 +4,15 @@
namespace dxvk::util {
/**
* \brief Debug utils label type
*/
enum class DxvkDebugLabelType : uint32_t {
External, ///< App-provided scope
InternalRenderPass, ///< Internal render pass markers
InternalBarrierControl, ///< Barrier control markers
};
/**
* \brief Debug label wrapper
*
@ -16,12 +25,16 @@ namespace dxvk::util {
DxvkDebugLabel() = default;
DxvkDebugLabel(const VkDebugUtilsLabelEXT& label)
: m_text(label.pLabelName ? label.pLabelName : "") {
DxvkDebugLabel(const VkDebugUtilsLabelEXT& label, DxvkDebugLabelType type)
: m_text(label.pLabelName ? label.pLabelName : ""), m_type(type) {
for (uint32_t i = 0; i < m_color.size(); i++)
m_color[i] = label.color[i];
}
DxvkDebugLabelType type() const {
return m_type;
}
VkDebugUtilsLabelEXT get() const {
VkDebugUtilsLabelEXT label = { VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
label.pLabelName = m_text.c_str();
@ -34,6 +47,7 @@ namespace dxvk::util {
std::string m_text;
std::array<float, 4> m_color = { };
DxvkDebugLabelType m_type;
};

View file

@ -1,5 +1,7 @@
#pragma once
#include <cstddef>
#include <cstdint>
#include <vector>
#include "com_include.h"
@ -9,7 +11,7 @@ namespace dxvk {
/**
* \brief COM private data entry type
*/
enum ComPrivateDataType {
enum class ComPrivateDataType : uint32_t {
None,
Data,
Iface,

View file

@ -291,12 +291,11 @@ namespace dxvk {
/* Final Fantasy XV: VXAO does thousands of *
* draw calls with the same UAV bound */
{ R"(\\ffxv_s\.exe$)", {{
{ "d3d11.ignoreGraphicsBarriers", "True" },
{ "d3d11.relaxedGraphicsBarriers", "True" },
}} },
/* God of War - relies on NVAPI/AMDAGS for *
* barrier stuff, needs nvapi for DLSS */
{ R"(\\GoW\.exe$)", {{
{ "d3d11.ignoreGraphicsBarriers", "True" },
{ "d3d11.relaxedBarriers", "True" },
{ "dxgi.hideNvidiaGpu", "False" },
{ "dxgi.maxFrameLatency", "1" },
@ -334,7 +333,7 @@ namespace dxvk {
* presumably for culling, which doesn't play *
* nicely with D3D11 without vendor libraries */
{ R"(\\Stray-Win64-Shipping\.exe$)", {{
{ "d3d11.ignoreGraphicsBarriers", "True" },
{ "d3d11.relaxedGraphicsBarriers", "True" },
}} },
/* Metal Gear Solid V: Ground Zeroes *
* Texture quality can break at high vram */
@ -433,7 +432,7 @@ namespace dxvk {
* and assumes that AMD GPUs do not expose *
* native command lists for AGS usage */
{ R"(\\granblue_fantasy_relink\.exe$)", {{
{ "d3d11.ignoreGraphicsBarriers", "True" },
{ "d3d11.relaxedGraphicsBarriers", "True" },
{ "d3d11.exposeDriverCommandLists", "False" },
{ "dxgi.hideNvidiaGpu", "False" },
}} },

View file

@ -40,6 +40,11 @@ namespace dxvk::vk {
= VK_ACCESS_HOST_READ_BIT
| VK_ACCESS_HOST_WRITE_BIT;
constexpr static VkAccessFlags AccessGfxSideEffectMask
= VK_ACCESS_SHADER_WRITE_BIT
| VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
| VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
constexpr static VkPipelineStageFlags StageDeviceMask
= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT