mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-06 20:58:37 +01:00
[dxbc] Rework embedded immediate constant buffers
Considerably reduces the size of immediate constant buffer arrays when not all vector components are used. Also adds bound-checking.
This commit is contained in:
parent
31a4679960
commit
31192b6d3f
2 changed files with 108 additions and 51 deletions
|
@ -241,8 +241,8 @@ namespace dxvk {
|
||||||
info.bindings = m_bindings.data();
|
info.bindings = m_bindings.data();
|
||||||
info.inputMask = m_inputMask;
|
info.inputMask = m_inputMask;
|
||||||
info.outputMask = m_outputMask;
|
info.outputMask = m_outputMask;
|
||||||
info.uniformSize = m_immConstData.size();
|
info.uniformSize = m_icbData.size();
|
||||||
info.uniformData = m_immConstData.data();
|
info.uniformData = m_icbData.data();
|
||||||
info.pushConstStages = VK_SHADER_STAGE_FRAGMENT_BIT;
|
info.pushConstStages = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
info.pushConstSize = sizeof(DxbcPushConstants);
|
info.pushConstSize = sizeof(DxbcPushConstants);
|
||||||
info.outputTopology = m_outputTopology;
|
info.outputTopology = m_outputTopology;
|
||||||
|
@ -1491,77 +1491,108 @@ namespace dxvk {
|
||||||
|
|
||||||
|
|
||||||
void DxbcCompiler::emitDclImmediateConstantBuffer(const DxbcShaderInstruction& ins) {
|
void DxbcCompiler::emitDclImmediateConstantBuffer(const DxbcShaderInstruction& ins) {
|
||||||
if (m_immConstBuf != 0)
|
if (m_icbArray)
|
||||||
throw DxvkError("DxbcCompiler: Immediate constant buffer already declared");
|
throw DxvkError("DxbcCompiler: Immediate constant buffer already declared");
|
||||||
|
|
||||||
if ((ins.customDataSize & 0x3) != 0)
|
if ((ins.customDataSize & 0x3) != 0)
|
||||||
throw DxvkError("DxbcCompiler: Immediate constant buffer size not a multiple of four DWORDs");
|
throw DxvkError("DxbcCompiler: Immediate constant buffer size not a multiple of four DWORDs");
|
||||||
|
|
||||||
if (ins.customDataSize <= Icb_MaxBakedDwords) {
|
// A lot of the time we'll be dealing with a scalar or vec2
|
||||||
|
// array here, there's no reason to emit all those zeroes.
|
||||||
|
uint32_t componentCount = 1u;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < ins.customDataSize; i += 4u) {
|
||||||
|
for (uint32_t c = componentCount; c < 4u; c++) {
|
||||||
|
if (ins.customData[i + c])
|
||||||
|
componentCount = c + 1u;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (componentCount == 4u)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t vectorCount = (ins.customDataSize / 4u);
|
||||||
|
uint32_t dwordCount = vectorCount * componentCount;
|
||||||
|
|
||||||
|
if (dwordCount <= Icb_MaxBakedDwords) {
|
||||||
this->emitDclImmediateConstantBufferBaked(
|
this->emitDclImmediateConstantBufferBaked(
|
||||||
ins.customDataSize, ins.customData);
|
ins.customDataSize, ins.customData, componentCount);
|
||||||
} else {
|
} else {
|
||||||
this->emitDclImmediateConstantBufferUbo(
|
this->emitDclImmediateConstantBufferUbo(
|
||||||
ins.customDataSize, ins.customData);
|
ins.customDataSize, ins.customData, componentCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DxbcCompiler::emitDclImmediateConstantBufferBaked(
|
void DxbcCompiler::emitDclImmediateConstantBufferBaked(
|
||||||
uint32_t dwordCount,
|
uint32_t dwordCount,
|
||||||
const uint32_t* dwordArray) {
|
const uint32_t* dwordArray,
|
||||||
|
uint32_t componentCount) {
|
||||||
// Declare individual vector constants as 4x32-bit vectors
|
// Declare individual vector constants as 4x32-bit vectors
|
||||||
std::array<uint32_t, 4096> vectorIds;
|
small_vector<uint32_t, Icb_MaxBakedDwords> vectorIds;
|
||||||
|
|
||||||
DxbcVectorType vecType;
|
DxbcVectorType vecType;
|
||||||
vecType.ctype = DxbcScalarType::Uint32;
|
vecType.ctype = DxbcScalarType::Uint32;
|
||||||
vecType.ccount = 4;
|
vecType.ccount = componentCount;
|
||||||
|
|
||||||
const uint32_t vectorTypeId = getVectorTypeId(vecType);
|
uint32_t vectorTypeId = getVectorTypeId(vecType);
|
||||||
const uint32_t vectorCount = dwordCount / 4;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < vectorCount; i++) {
|
for (uint32_t i = 0; i < dwordCount; i += 4u) {
|
||||||
std::array<uint32_t, 4> scalarIds = {
|
std::array<uint32_t, 4> scalarIds = { };
|
||||||
m_module.constu32(dwordArray[4 * i + 0]),
|
|
||||||
m_module.constu32(dwordArray[4 * i + 1]),
|
for (uint32_t c = 0; c < componentCount; c++)
|
||||||
m_module.constu32(dwordArray[4 * i + 2]),
|
scalarIds[c] = m_module.constu32(dwordArray[i + c]);
|
||||||
m_module.constu32(dwordArray[4 * i + 3]),
|
|
||||||
};
|
uint32_t id = scalarIds[0];
|
||||||
|
|
||||||
vectorIds.at(i) = m_module.constComposite(
|
if (componentCount > 1u)
|
||||||
vectorTypeId, scalarIds.size(), scalarIds.data());
|
id = m_module.constComposite(vectorTypeId, componentCount, scalarIds.data());
|
||||||
|
|
||||||
|
vectorIds.push_back(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pad array with one entry of zeroes so that we can
|
||||||
|
// handle out-of-bounds accesses more conveniently.
|
||||||
|
vectorIds.push_back(emitBuildZeroVector(vecType).id);
|
||||||
|
|
||||||
// Declare the array that contains all the vectors
|
// Declare the array that contains all the vectors
|
||||||
DxbcArrayType arrInfo;
|
DxbcArrayType arrInfo;
|
||||||
arrInfo.ctype = DxbcScalarType::Uint32;
|
arrInfo.ctype = DxbcScalarType::Uint32;
|
||||||
arrInfo.ccount = 4;
|
arrInfo.ccount = componentCount;
|
||||||
arrInfo.alength = vectorCount;
|
arrInfo.alength = vectorIds.size();
|
||||||
|
|
||||||
const uint32_t arrayTypeId = getArrayTypeId(arrInfo);
|
uint32_t arrayTypeId = getArrayTypeId(arrInfo);
|
||||||
const uint32_t arrayId = m_module.constComposite(
|
uint32_t arrayId = m_module.constComposite(
|
||||||
arrayTypeId, vectorCount, vectorIds.data());
|
arrayTypeId, vectorIds.size(), vectorIds.data());
|
||||||
|
|
||||||
// Declare the variable that will hold the constant
|
// Declare the variable that will hold the constant
|
||||||
// data and initialize it with the constant array.
|
// data and initialize it with the constant array.
|
||||||
const uint32_t pointerTypeId = m_module.defPointerType(
|
uint32_t pointerTypeId = m_module.defPointerType(
|
||||||
arrayTypeId, spv::StorageClassPrivate);
|
arrayTypeId, spv::StorageClassPrivate);
|
||||||
|
|
||||||
m_immConstBuf = m_module.newVarInit(
|
m_icbArray = m_module.newVarInit(
|
||||||
pointerTypeId, spv::StorageClassPrivate,
|
pointerTypeId, spv::StorageClassPrivate,
|
||||||
arrayId);
|
arrayId);
|
||||||
|
|
||||||
m_module.setDebugName(m_immConstBuf, "icb");
|
m_module.setDebugName(m_icbArray, "icb");
|
||||||
m_module.decorate(m_immConstBuf, spv::DecorationNonWritable);
|
m_module.decorate(m_icbArray, spv::DecorationNonWritable);
|
||||||
|
|
||||||
|
m_icbComponents = componentCount;
|
||||||
|
m_icbSize = dwordCount / 4u;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void DxbcCompiler::emitDclImmediateConstantBufferUbo(
|
void DxbcCompiler::emitDclImmediateConstantBufferUbo(
|
||||||
uint32_t dwordCount,
|
uint32_t dwordCount,
|
||||||
const uint32_t* dwordArray) {
|
const uint32_t* dwordArray,
|
||||||
|
uint32_t componentCount) {
|
||||||
this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb");
|
this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb");
|
||||||
m_immConstData.resize(dwordCount * sizeof(uint32_t));
|
|
||||||
std::memcpy(m_immConstData.data(), dwordArray, m_immConstData.size());
|
m_icbData.resize(dwordCount * sizeof(uint32_t));
|
||||||
|
std::memcpy(m_icbData.data(), dwordArray, m_icbData.size());
|
||||||
|
|
||||||
|
m_icbComponents = 4u;
|
||||||
|
m_icbSize = dwordCount / 4u;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -5282,13 +5313,17 @@ namespace dxvk {
|
||||||
|
|
||||||
DxbcRegisterPointer DxbcCompiler::emitGetImmConstBufPtr(
|
DxbcRegisterPointer DxbcCompiler::emitGetImmConstBufPtr(
|
||||||
const DxbcRegister& operand) {
|
const DxbcRegister& operand) {
|
||||||
const DxbcRegisterValue constId
|
DxbcRegisterValue constId = emitIndexLoad(operand.idx[0]);
|
||||||
= emitIndexLoad(operand.idx[0]);
|
|
||||||
|
if (m_icbArray) {
|
||||||
if (m_immConstBuf != 0) {
|
// We pad the icb array with an extra zero vector, so we can
|
||||||
|
// clamp the index and get correct robustness behaviour.
|
||||||
|
constId.id = m_module.opUMin(getVectorTypeId(constId.type),
|
||||||
|
constId.id, m_module.constu32(m_icbSize));
|
||||||
|
|
||||||
DxbcRegisterInfo ptrInfo;
|
DxbcRegisterInfo ptrInfo;
|
||||||
ptrInfo.type.ctype = DxbcScalarType::Uint32;
|
ptrInfo.type.ctype = DxbcScalarType::Uint32;
|
||||||
ptrInfo.type.ccount = 4;
|
ptrInfo.type.ccount = m_icbComponents;
|
||||||
ptrInfo.type.alength = 0;
|
ptrInfo.type.alength = 0;
|
||||||
ptrInfo.sclass = spv::StorageClassPrivate;
|
ptrInfo.sclass = spv::StorageClassPrivate;
|
||||||
|
|
||||||
|
@ -5297,7 +5332,7 @@ namespace dxvk {
|
||||||
result.type.ccount = ptrInfo.type.ccount;
|
result.type.ccount = ptrInfo.type.ccount;
|
||||||
result.id = m_module.opAccessChain(
|
result.id = m_module.opAccessChain(
|
||||||
getPointerTypeId(ptrInfo),
|
getPointerTypeId(ptrInfo),
|
||||||
m_immConstBuf, 1, &constId.id);
|
m_icbArray, 1, &constId.id);
|
||||||
return result;
|
return result;
|
||||||
} else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) {
|
} else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) {
|
||||||
const std::array<uint32_t, 2> indices =
|
const std::array<uint32_t, 2> indices =
|
||||||
|
@ -5305,7 +5340,7 @@ namespace dxvk {
|
||||||
|
|
||||||
DxbcRegisterInfo ptrInfo;
|
DxbcRegisterInfo ptrInfo;
|
||||||
ptrInfo.type.ctype = DxbcScalarType::Float32;
|
ptrInfo.type.ctype = DxbcScalarType::Float32;
|
||||||
ptrInfo.type.ccount = 4;
|
ptrInfo.type.ccount = m_icbComponents;
|
||||||
ptrInfo.type.alength = 0;
|
ptrInfo.type.alength = 0;
|
||||||
ptrInfo.sclass = spv::StorageClassUniform;
|
ptrInfo.sclass = spv::StorageClassUniform;
|
||||||
|
|
||||||
|
@ -5343,7 +5378,7 @@ namespace dxvk {
|
||||||
|
|
||||||
case DxbcOperandType::ImmediateConstantBuffer:
|
case DxbcOperandType::ImmediateConstantBuffer:
|
||||||
return emitGetImmConstBufPtr(operand);
|
return emitGetImmConstBufPtr(operand);
|
||||||
|
|
||||||
case DxbcOperandType::InputThreadId:
|
case DxbcOperandType::InputThreadId:
|
||||||
return DxbcRegisterPointer {
|
return DxbcRegisterPointer {
|
||||||
{ DxbcScalarType::Uint32, 3 },
|
{ DxbcScalarType::Uint32, 3 },
|
||||||
|
@ -5812,7 +5847,24 @@ namespace dxvk {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return emitValueLoad(emitGetOperandPtr(reg));
|
DxbcRegisterValue value = emitValueLoad(emitGetOperandPtr(reg));
|
||||||
|
|
||||||
|
// Pad icb values to a vec4 since the app may access components that are always 0
|
||||||
|
if (reg.type == DxbcOperandType::ImmediateConstantBuffer && value.type.ccount < 4u) {
|
||||||
|
DxbcVectorType zeroType;
|
||||||
|
zeroType.ctype = value.type.ctype;
|
||||||
|
zeroType.ccount = 4u - value.type.ccount;
|
||||||
|
|
||||||
|
uint32_t zeroVector = emitBuildZeroVector(zeroType).id;
|
||||||
|
|
||||||
|
std::array<uint32_t, 2> constituents = { value.id, zeroVector };
|
||||||
|
|
||||||
|
value.type.ccount = 4u;
|
||||||
|
value.id = m_module.opCompositeConstruct(getVectorTypeId(value.type),
|
||||||
|
constituents.size(), constituents.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -503,8 +503,11 @@ namespace dxvk {
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// Immediate constant buffer. If defined, this is
|
// Immediate constant buffer. If defined, this is
|
||||||
// an array of four-component uint32 vectors.
|
// an array of four-component uint32 vectors.
|
||||||
uint32_t m_immConstBuf = 0;
|
uint32_t m_icbArray = 0;
|
||||||
std::vector<char> m_immConstData;
|
std::vector<char> m_icbData;
|
||||||
|
|
||||||
|
uint32_t m_icbComponents = 0u;
|
||||||
|
uint32_t m_icbSize = 0u;
|
||||||
|
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
// Sample pos array. If defined, this iis an array
|
// Sample pos array. If defined, this iis an array
|
||||||
|
@ -650,11 +653,13 @@ namespace dxvk {
|
||||||
|
|
||||||
void emitDclImmediateConstantBufferBaked(
|
void emitDclImmediateConstantBufferBaked(
|
||||||
uint32_t dwordCount,
|
uint32_t dwordCount,
|
||||||
const uint32_t* dwordArray);
|
const uint32_t* dwordArray,
|
||||||
|
uint32_t componentCount);
|
||||||
|
|
||||||
void emitDclImmediateConstantBufferUbo(
|
void emitDclImmediateConstantBufferUbo(
|
||||||
uint32_t dwordCount,
|
uint32_t dwordCount,
|
||||||
const uint32_t* dwordArray);
|
const uint32_t* dwordArray,
|
||||||
|
uint32_t componentCount);
|
||||||
|
|
||||||
void emitCustomData(
|
void emitCustomData(
|
||||||
const DxbcShaderInstruction& ins);
|
const DxbcShaderInstruction& ins);
|
||||||
|
|
Loading…
Add table
Reference in a new issue