mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-06 20:58:37 +01:00
[dxbc,d3d11] Add option to explicitly emit long dot products
This commit is contained in:
parent
5c987ea3d1
commit
4ee907a6df
6 changed files with 56 additions and 11 deletions
11
dxvk.conf
11
dxvk.conf
|
@ -529,6 +529,17 @@
|
||||||
# d3d11.longMad = False
|
# d3d11.longMad = False
|
||||||
# d3d9.longMad = False
|
# d3d9.longMad = False
|
||||||
|
|
||||||
|
|
||||||
|
# Long Dot
|
||||||
|
#
|
||||||
|
# Whether to emit dot products as an FMA chain or as a plain SPIR-V dot product.
|
||||||
|
#
|
||||||
|
# Supported values:
|
||||||
|
# - True/False
|
||||||
|
|
||||||
|
# d3d11.longDot = False
|
||||||
|
|
||||||
|
|
||||||
# Device Local Constant Buffers
|
# Device Local Constant Buffers
|
||||||
#
|
#
|
||||||
# Enables using device local, host accessible memory for constant buffers in D3D9.
|
# Enables using device local, host accessible memory for constant buffers in D3D9.
|
||||||
|
|
|
@ -32,6 +32,7 @@ namespace dxvk {
|
||||||
this->maxFrameLatency = config.getOption<int32_t>("dxgi.maxFrameLatency", 0);
|
this->maxFrameLatency = config.getOption<int32_t>("dxgi.maxFrameLatency", 0);
|
||||||
this->exposeDriverCommandLists = config.getOption<bool>("d3d11.exposeDriverCommandLists", true);
|
this->exposeDriverCommandLists = config.getOption<bool>("d3d11.exposeDriverCommandLists", true);
|
||||||
this->longMad = config.getOption<bool>("d3d11.longMad", false);
|
this->longMad = config.getOption<bool>("d3d11.longMad", false);
|
||||||
|
this->longDot = config.getOption<bool>("d3d11.longDot", false);
|
||||||
this->reproducibleCommandStream = config.getOption<bool>("d3d11.reproducibleCommandStream", false);
|
this->reproducibleCommandStream = config.getOption<bool>("d3d11.reproducibleCommandStream", false);
|
||||||
|
|
||||||
// Clamp LOD bias so that people don't abuse this in unintended ways
|
// Clamp LOD bias so that people don't abuse this in unintended ways
|
||||||
|
|
|
@ -118,9 +118,12 @@ namespace dxvk {
|
||||||
/// Shader dump path
|
/// Shader dump path
|
||||||
std::string shaderDumpPath;
|
std::string shaderDumpPath;
|
||||||
|
|
||||||
/// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
|
/// Translate Mad/Dfma to separate FMul+FAdd
|
||||||
bool longMad;
|
bool longMad;
|
||||||
|
|
||||||
|
/// Translate DpX to a precise FMul+FFma chain
|
||||||
|
bool longDot;
|
||||||
|
|
||||||
/// Ensure that for the same D3D commands the output VK commands
|
/// Ensure that for the same D3D commands the output VK commands
|
||||||
/// don't change between runs. Useful for comparative benchmarking,
|
/// don't change between runs. Useful for comparative benchmarking,
|
||||||
/// can negatively affect performance.
|
/// can negatively affect performance.
|
||||||
|
|
|
@ -2044,15 +2044,41 @@ namespace dxvk {
|
||||||
DxbcRegisterValue dst;
|
DxbcRegisterValue dst;
|
||||||
dst.type.ctype = ins.dst[0].dataType;
|
dst.type.ctype = ins.dst[0].dataType;
|
||||||
dst.type.ccount = 1;
|
dst.type.ccount = 1;
|
||||||
|
dst.id = 0;
|
||||||
dst.id = m_module.opDot(
|
|
||||||
getVectorTypeId(dst.type),
|
if (!m_moduleInfo.options.longDot) {
|
||||||
src.at(0).id,
|
dst.id = m_module.opDot(
|
||||||
src.at(1).id);
|
getVectorTypeId(dst.type),
|
||||||
|
src.at(0).id,
|
||||||
if (ins.controls.precise() || m_precise)
|
src.at(1).id);
|
||||||
m_module.decorate(dst.id, spv::DecorationNoContraction);
|
|
||||||
|
if (ins.controls.precise() || m_precise)
|
||||||
|
m_module.decorate(dst.id, spv::DecorationNoContraction);
|
||||||
|
} else {
|
||||||
|
uint32_t componentType = getVectorTypeId(dst.type);
|
||||||
|
uint32_t componentCount = srcMask.popCount();
|
||||||
|
|
||||||
|
for (uint32_t i = 1; i <= componentCount; i++) {
|
||||||
|
uint32_t idx = componentCount - i;
|
||||||
|
|
||||||
|
if (dst.id) {
|
||||||
|
dst.id = m_module.opFFma(componentType,
|
||||||
|
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
|
||||||
|
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx),
|
||||||
|
dst.id);
|
||||||
|
} else {
|
||||||
|
dst.id = m_module.opFMul(componentType,
|
||||||
|
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
|
||||||
|
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unconditionally mark as precise since the exact order of operation
|
||||||
|
// matters for some games, even if the instruction itself is not marked
|
||||||
|
// as precise.
|
||||||
|
m_module.decorate(dst.id, spv::DecorationNoContraction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dst = emitDstOperandModifiers(dst, ins.modifiers);
|
dst = emitDstOperandModifiers(dst, ins.modifiers);
|
||||||
emitRegisterStore(ins.dst[0], dst);
|
emitRegisterStore(ins.dst[0], dst);
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,7 @@ namespace dxvk {
|
||||||
forceSampleRateShading = options.forceSampleRateShading;
|
forceSampleRateShading = options.forceSampleRateShading;
|
||||||
enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;
|
enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;
|
||||||
longMad = options.longMad;
|
longMad = options.longMad;
|
||||||
|
longDot = options.longDot;
|
||||||
|
|
||||||
// Figure out float control flags to match D3D11 rules
|
// Figure out float control flags to match D3D11 rules
|
||||||
if (options.floatControls) {
|
if (options.floatControls) {
|
||||||
|
|
|
@ -55,8 +55,11 @@ namespace dxvk {
|
||||||
/// Minimum storage buffer alignment
|
/// Minimum storage buffer alignment
|
||||||
VkDeviceSize minSsboAlignment = 0;
|
VkDeviceSize minSsboAlignment = 0;
|
||||||
|
|
||||||
/// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
|
/// Translate Mad/Dfma to separate FMul+FAdd
|
||||||
bool longMad;
|
bool longMad;
|
||||||
|
|
||||||
|
/// Translate DpX to a precise FMul+FFma chain
|
||||||
|
bool longDot;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
Loading…
Add table
Reference in a new issue