Merge branch 'doitsujin:master' into filter-uuid

This commit is contained in:
meladath 2022-04-29 18:04:27 +01:00 committed by GitHub
commit 85d6ea36ad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
34 changed files with 601 additions and 573 deletions

View file

@ -184,7 +184,7 @@
# Supported values: True, False
# d3d11.invariantPosition = True
# d3d9.invariantPosition = False
# d3d9.invariantPosition = True
# Forces the sample count of all textures to 1, and performs
@ -426,16 +426,6 @@
# d3d9.deviceLocalConstantBuffers = False
# Allow Read Only
#
# Enables using the D3DLOCK_READONLY flag. Some apps use this
# incorrectly, and write when they should be reading.
#
# Supported values:
# - True/False
# d3d9.allowLockFlagReadonly = True
# No Explicit Front Buffer
#
# Disables the front buffer
@ -566,4 +556,4 @@
# Supported values:
# - True/False
# dxvk.enableDebugUtils = True
# dxvk.enableDebugUtils = False

View file

@ -578,25 +578,22 @@ namespace dxvk {
return;
}
// Set up clear color struct
VkClearValue clearValue;
clearValue.color.uint32[0] = Values[0];
clearValue.color.uint32[1] = Values[1];
clearValue.color.uint32[2] = Values[2];
clearValue.color.uint32[3] = Values[3];
// R11G11B10 is a special cases since there's no corresponding integer format
// with the same bit layout, and creating an R32 view may disable compression,
// so if we can't preserve the bit pattern for non-zero values, we can create
// a temporary buffer instead and perform a copy from that.
bool useBuffer = false;
if (rawFormat == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
useBuffer = (Values[0] | Values[1] | Values[2]) != 0;
// R11G11B10 is a special case since there's no corresponding
// integer format with the same bit layout. Use R32 instead.
if (uavFormat == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
clearValue.color.uint32[0] = ((Values[0] & 0x7FF) << 0)
| ((Values[1] & 0x7FF) << 11)
| ((Values[2] & 0x3FF) << 22);
clearValue.color.uint32[1] = 0;
clearValue.color.uint32[2] = 0;
clearValue.color.uint32[3] = 0;
} else {
clearValue.color.uint32[0] = Values[0];
clearValue.color.uint32[1] = Values[1];
clearValue.color.uint32[2] = Values[2];
clearValue.color.uint32[3] = Values[3];
}
if (uav->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) {
@ -639,57 +636,85 @@ namespace dxvk {
cClearValue.color);
});
}
} else if (useBuffer) {
Rc<DxvkImageView> imageView = uav->GetImageView();
DxvkBufferCreateInfo bufferInfo;
bufferInfo.size = imageView->formatInfo()->elementSize
* imageView->info().numLayers
* util::flattenImageExtent(imageView->mipLevelExtent(0));
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT
| VK_BUFFER_USAGE_TRANSFER_DST_BIT;
bufferInfo.stages = VK_PIPELINE_STAGE_TRANSFER_BIT;
bufferInfo.access = VK_ACCESS_TRANSFER_READ_BIT
| VK_ACCESS_TRANSFER_WRITE_BIT;
Rc<DxvkBuffer> buffer = m_device->createBuffer(bufferInfo,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
EmitCs([
cDstView = std::move(imageView),
cSrcBuffer = std::move(buffer),
cClearValue = clearValue.color.uint32[0]
] (DxvkContext* ctx) {
ctx->clearBuffer(cSrcBuffer, 0,
cSrcBuffer->info().size, cClearValue);
ctx->copyBufferToImage(cDstView->image(),
vk::pickSubresourceLayers(cDstView->subresources(), 0),
VkOffset3D { 0, 0, 0 },
cDstView->mipLevelExtent(0),
cSrcBuffer, 0, 0, 0);
});
} else {
// Create a view with an integer format if necessary
Rc<DxvkImageView> imageView = uav->GetImageView();
if (uavFormat != rawFormat) {
DxvkImageViewCreateInfo info = imageView->info();
info.format = rawFormat;
imageView = m_device->createImageView(imageView->image(), info);
// If the clear value is zero, we can use the original view regardless of
// the format since the bit pattern will not change in any supported format.
bool isZeroClearValue = !(clearValue.color.uint32[0] | clearValue.color.uint32[1]
| clearValue.color.uint32[2] | clearValue.color.uint32[3]);
// Check if we can create an image view with the given raw format. If not,
// we'll have to use a fallback using a texel buffer view and buffer copies.
bool isViewCompatible = uavFormat == rawFormat;
if (!isViewCompatible && (imageView->imageInfo().flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)) {
uint32_t formatCount = imageView->imageInfo().viewFormatCount;
isViewCompatible = formatCount == 0;
for (uint32_t i = 0; i < formatCount && !isViewCompatible; i++)
isViewCompatible = imageView->imageInfo().viewFormats[i] == rawFormat;
}
if (isViewCompatible || isZeroClearValue) {
// Create a view with an integer format if necessary
if (uavFormat != rawFormat && !isZeroClearValue) {
DxvkImageViewCreateInfo info = imageView->info();
info.format = rawFormat;
imageView = m_device->createImageView(imageView->image(), info);
}
EmitCs([
cClearValue = clearValue,
cDstView = imageView
] (DxvkContext* ctx) {
ctx->clearImageView(cDstView,
VkOffset3D { 0, 0, 0 },
cDstView->mipLevelExtent(0),
VK_IMAGE_ASPECT_COLOR_BIT,
cClearValue);
});
} else {
DxvkBufferCreateInfo bufferInfo;
bufferInfo.size = imageView->formatInfo()->elementSize
* imageView->info().numLayers
* util::flattenImageExtent(imageView->mipLevelExtent(0));
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT
| VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
bufferInfo.stages = VK_PIPELINE_STAGE_TRANSFER_BIT
| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
bufferInfo.access = VK_ACCESS_TRANSFER_READ_BIT
| VK_ACCESS_SHADER_WRITE_BIT;
Rc<DxvkBuffer> buffer = m_device->createBuffer(bufferInfo,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
DxvkBufferViewCreateInfo bufferViewInfo;
bufferViewInfo.format = rawFormat;
bufferViewInfo.rangeOffset = 0;
bufferViewInfo.rangeLength = bufferInfo.size;
Rc<DxvkBufferView> bufferView = m_device->createBufferView(buffer,
bufferViewInfo);
EmitCs([
cDstView = std::move(imageView),
cSrcView = std::move(bufferView),
cClearValue = clearValue.color
] (DxvkContext* ctx) {
ctx->clearBufferView(
cSrcView, 0,
cSrcView->elementCount(),
cClearValue);
ctx->copyBufferToImage(cDstView->image(),
vk::pickSubresourceLayers(cDstView->subresources(), 0),
VkOffset3D { 0, 0, 0 },
cDstView->mipLevelExtent(0),
cSrcView->buffer(), 0, 0, 0);
});
}
EmitCs([
cClearValue = clearValue,
cDstView = imageView
] (DxvkContext* ctx) {
ctx->clearImageView(cDstView,
VkOffset3D { 0, 0, 0 },
cDstView->mipLevelExtent(0),
VK_IMAGE_ASPECT_COLOR_BIT,
cClearValue);
});
}
}

View file

@ -54,11 +54,13 @@ namespace dxvk {
}
// Create shader constant buffer if necessary
if (m_shader->shaderConstants().data() != nullptr) {
const DxvkShaderCreateInfo& shaderInfo = m_shader->info();
if (shaderInfo.uniformSize) {
DxvkBufferCreateInfo info;
info.size = m_shader->shaderConstants().sizeInBytes();
info.size = shaderInfo.uniformSize;
info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
info.stages = util::pipelineStages(m_shader->stage());
info.stages = util::pipelineStages(shaderInfo.stage);
info.access = VK_ACCESS_UNIFORM_READ_BIT;
VkMemoryPropertyFlags memFlags
@ -67,10 +69,7 @@ namespace dxvk {
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
m_buffer = pDevice->GetDXVKDevice()->createBuffer(info, memFlags);
std::memcpy(m_buffer->mapPtr(0),
m_shader->shaderConstants().data(),
m_shader->shaderConstants().sizeInBytes());
std::memcpy(m_buffer->mapPtr(0), shaderInfo.uniformData, shaderInfo.uniformSize);
}
pDevice->GetDXVKDevice()->registerShader(m_shader);

View file

@ -60,25 +60,11 @@ namespace dxvk {
if (!pDevice->GetOptions()->disableMsaa)
DecodeSampleCount(m_desc.SampleDesc.Count, &imageInfo.sampleCount);
// Integer clear operations on UAVs are implemented using
// a view with a bit-compatible integer format, so we'll
// have to include that format in the format family
if (m_desc.BindFlags & D3D11_BIND_UNORDERED_ACCESS) {
DXGI_VK_FORMAT_INFO formatBase = m_device->LookupFormat(
m_desc.Format, DXGI_VK_FORMAT_MODE_RAW);
if (formatBase.Format != formatInfo.Format
&& formatBase.Format != VK_FORMAT_UNDEFINED) {
formatFamily.Add(formatInfo.Format);
formatFamily.Add(formatBase.Format);
}
if (IsR32UavCompatibleFormat(m_desc.Format)) {
formatFamily.Add(formatInfo.Format);
formatFamily.Add(VK_FORMAT_R32_SFLOAT);
formatFamily.Add(VK_FORMAT_R32_UINT);
formatFamily.Add(VK_FORMAT_R32_SINT);
}
if ((m_desc.BindFlags & D3D11_BIND_UNORDERED_ACCESS) && IsR32UavCompatibleFormat(m_desc.Format)) {
formatFamily.Add(formatInfo.Format);
formatFamily.Add(VK_FORMAT_R32_SFLOAT);
formatFamily.Add(VK_FORMAT_R32_UINT);
formatFamily.Add(VK_FORMAT_R32_SINT);
}
// The image must be marked as mutable if it can be reinterpreted

View file

@ -326,8 +326,8 @@ namespace dxvk {
D3D11ImmediateContext* pContext,
const Rc<DxvkDevice>& Device)
: m_ctx(pContext) {
const SpirvCodeBuffer vsCode(d3d11_video_blit_vert);
const SpirvCodeBuffer fsCode(d3d11_video_blit_frag);
SpirvCodeBuffer vsCode(d3d11_video_blit_vert);
SpirvCodeBuffer fsCode(d3d11_video_blit_frag);
const std::array<DxvkResourceSlot, 4> fsResourceSlots = {{
{ 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC },
@ -336,17 +336,18 @@ namespace dxvk {
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, VK_IMAGE_VIEW_TYPE_2D },
}};
m_vs = Device->createShader(
VK_SHADER_STAGE_VERTEX_BIT,
0, nullptr, { 0u, 1u },
vsCode);
m_fs = Device->createShader(
VK_SHADER_STAGE_FRAGMENT_BIT,
fsResourceSlots.size(),
fsResourceSlots.data(),
{ 1u, 1u, 0u, 0u },
fsCode);
DxvkShaderCreateInfo vsInfo;
vsInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
vsInfo.outputMask = 0x1;
m_vs = new DxvkShader(vsInfo, std::move(vsCode));
DxvkShaderCreateInfo fsInfo;
fsInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
fsInfo.resourceSlotCount = fsResourceSlots.size();
fsInfo.resourceSlots = fsResourceSlots.data();
fsInfo.inputMask = 0x1;
fsInfo.outputMask = 0x1;
m_fs = new DxvkShader(fsInfo, std::move(fsCode));
DxvkSamplerCreateInfo samplerInfo;
samplerInfo.magFilter = VK_FILTER_LINEAR;

View file

@ -45,8 +45,7 @@ namespace dxvk {
template <size_t N>
static void copyToStringArray(char (&dst)[N], const char* src) {
std::strncpy(dst, src, N);
dst[N - 1] = '\0';
dxvk::str::strlcpy(dst, src, N);
}

View file

@ -1795,8 +1795,9 @@ namespace dxvk {
return m_recorder->SetRenderState(State, Value);
auto& states = m_state.renderStates;
DWORD old = states[State];
bool changed = states[State] != Value;
bool changed = old != Value;
if (likely(changed)) {
const bool oldClipPlaneEnabled = IsClipPlaneEnabled();
@ -1880,19 +1881,31 @@ namespace dxvk {
break;
case D3DRS_COLORWRITEENABLE:
UpdateActiveRTs(0);
if (likely(!old != !Value)) {
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
UpdateActiveRTs(0);
}
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
case D3DRS_COLORWRITEENABLE1:
UpdateActiveRTs(1);
if (likely(!old != !Value && m_state.renderTargets[1] != nullptr)) {
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
UpdateActiveRTs(1);
}
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
case D3DRS_COLORWRITEENABLE2:
UpdateActiveRTs(2);
if (likely(!old != !Value && m_state.renderTargets[2] != nullptr)) {
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
UpdateActiveRTs(2);
}
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
case D3DRS_COLORWRITEENABLE3:
UpdateActiveRTs(3);
if (likely(!old != !Value && m_state.renderTargets[3] != nullptr)) {
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
UpdateActiveRTs(3);
}
m_flags.set(D3D9DeviceFlag::DirtyBlendState);
break;
@ -1922,17 +1935,17 @@ namespace dxvk {
m_flags.set(D3D9DeviceFlag::DirtyMultiSampleState);
break;
case D3DRS_STENCILENABLE:
case D3DRS_ZWRITEENABLE:
if (m_activeHazardsDS != 0)
case D3DRS_ZENABLE:
if (likely(m_state.depthStencil != nullptr))
m_flags.set(D3D9DeviceFlag::DirtyFramebuffer);
m_flags.set(D3D9DeviceFlag::DirtyDepthStencilState);
break;
case D3DRS_ZENABLE:
case D3DRS_ZFUNC:
case D3DRS_TWOSIDEDSTENCILMODE:
case D3DRS_STENCILENABLE:
case D3DRS_STENCILFAIL:
case D3DRS_STENCILZFAIL:
case D3DRS_STENCILPASS:
@ -5555,12 +5568,18 @@ namespace dxvk {
else if (unlikely(sampleCount != rtImageInfo.sampleCount))
continue;
if (!m_state.renderStates[ColorWriteIndex(i)])
continue;
attachments.color[i] = {
m_state.renderTargets[i]->GetRenderTargetView(srgb),
m_state.renderTargets[i]->GetRenderTargetLayout() };
}
if (m_state.depthStencil != nullptr) {
if (m_state.depthStencil != nullptr &&
(m_state.renderStates[D3DRS_ZENABLE]
|| m_state.renderStates[D3DRS_ZWRITEENABLE]
|| m_state.renderStates[D3DRS_STENCILENABLE])) {
const DxvkImageCreateInfo& dsImageInfo = m_state.depthStencil->GetCommonTexture()->GetImage()->info();
const bool depthWrite = m_state.renderStates[D3DRS_ZWRITEENABLE];

View file

@ -604,9 +604,13 @@ namespace dxvk {
SpirvModule m_module;
std::vector
<DxvkResourceSlot> m_resourceSlots;
DxvkInterfaceSlots m_interfaceSlots;
std::vector<uint32_t> m_entryPointInterfaces;
uint32_t m_inputMask = 0u;
uint32_t m_outputMask = 0u;
uint32_t m_pushConstOffset = 0u;
uint32_t m_pushConstSize = 0u;
DxsoProgramType m_programType;
D3D9FFShaderKeyVS m_vsKey;
D3D9FFShaderKeyFS m_fsKey;
@ -706,19 +710,17 @@ namespace dxvk {
m_entryPointInterfaces.size(),
m_entryPointInterfaces.data());
DxvkShaderOptions shaderOptions = { };
DxvkShaderConstData constData = { };
// Create the shader module object
return new DxvkShader(
isVS() ? VK_SHADER_STAGE_VERTEX_BIT : VK_SHADER_STAGE_FRAGMENT_BIT,
m_resourceSlots.size(),
m_resourceSlots.data(),
m_interfaceSlots,
m_module.compile(),
shaderOptions,
std::move(constData));
DxvkShaderCreateInfo info;
info.stage = isVS() ? VK_SHADER_STAGE_VERTEX_BIT : VK_SHADER_STAGE_FRAGMENT_BIT;
info.resourceSlotCount = m_resourceSlots.size();
info.resourceSlots = m_resourceSlots.data();
info.inputMask = m_inputMask;
info.outputMask = m_outputMask;
info.pushConstOffset = m_pushConstOffset;
info.pushConstSize = m_pushConstOffset;
return new DxvkShader(info, m_module.compile());
}
@ -728,8 +730,8 @@ namespace dxvk {
? m_isgn : m_osgn;
uint32_t& slots = input
? m_interfaceSlots.inputSlots
: m_interfaceSlots.outputSlots;
? m_inputMask
: m_outputMask;
uint32_t i = sgn.elemCount++;
@ -1186,13 +1188,13 @@ namespace dxvk {
uint32_t count;
if (m_programType == DxsoProgramType::PixelShader) {
m_interfaceSlots.pushConstOffset = 0;
m_interfaceSlots.pushConstSize = offsetof(D3D9RenderStateInfo, pointSize);
m_pushConstOffset = 0;
m_pushConstSize = offsetof(D3D9RenderStateInfo, pointSize);
count = 5;
}
else {
m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, pointSize);
m_interfaceSlots.pushConstSize = sizeof(float) * 6;
m_pushConstOffset = offsetof(D3D9RenderStateInfo, pointSize);
m_pushConstSize = sizeof(float) * 6;
count = 11;
}

View file

@ -130,10 +130,14 @@ namespace dxvk {
{ BindingIds::Buffer, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, VK_IMAGE_VIEW_TYPE_1D },
} };
return m_device->createShader(
VK_SHADER_STAGE_COMPUTE_BIT,
resourceSlots.size(), resourceSlots.data(),
{ 0u, 0u, 0u, sizeof(VkExtent2D) }, code);
DxvkShaderCreateInfo info;
info.stage = VK_SHADER_STAGE_COMPUTE_BIT;
info.resourceSlotCount = resourceSlots.size();
info.resourceSlots = resourceSlots.data();
info.pushConstOffset = 0;
info.pushConstSize = sizeof(VkExtent2D);
return new DxvkShader(info, std::move(code));
}

View file

@ -57,7 +57,7 @@ namespace dxvk {
this->supportX4R4G4B4 = config.getOption<bool> ("d3d9.supportX4R4G4B4", true);
this->supportD32 = config.getOption<bool> ("d3d9.supportD32", true);
this->disableA8RT = config.getOption<bool> ("d3d9.disableA8RT", false);
this->invariantPosition = config.getOption<bool> ("d3d9.invariantPosition", false);
this->invariantPosition = config.getOption<bool> ("d3d9.invariantPosition", true);
this->memoryTrackTest = config.getOption<bool> ("d3d9.memoryTrackTest", false);
this->supportVCache = config.getOption<bool> ("d3d9.supportVCache", vendorId == 0x10de);
this->enableDialogMode = config.getOption<bool> ("d3d9.enableDialogMode", false);

View file

@ -130,12 +130,10 @@ namespace dxvk {
m_module.decorateDescriptorSet(buffer, 0);
m_module.decorateBinding(buffer, bufferSlot);
DxvkResourceSlot bufferRes;
bufferRes.slot = bufferSlot;
bufferRes.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bufferRes.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
bufferRes.access = VK_ACCESS_SHADER_WRITE_BIT;
m_resourceSlots.push_back(bufferRes);
m_bufferResource.slot = bufferSlot;
m_bufferResource.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
m_bufferResource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
m_bufferResource.access = VK_ACCESS_SHADER_WRITE_BIT;
// Load our builtins
uint32_t primitiveIdPtr = m_module.newVar(m_module.defPointerType(uint_t, spv::StorageClassInput), spv::StorageClassInput);
@ -168,7 +166,7 @@ namespace dxvk {
uint32_t slotIdx = RegisterLinkerSlot(semantic);
m_module.decorateLocation(elementPtr, slotIdx);
m_interfaceSlots.inputSlots |= 1u << slotIdx;
m_inputMask |= 1u << slotIdx;
}
uint32_t zero = m_module.constu32(0);
@ -283,16 +281,13 @@ namespace dxvk {
m_entryPointInterfaces.data());
m_module.setDebugName(m_entryPointId, "main");
DxvkShaderConstData constData = { };
DxvkShaderCreateInfo info;
info.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
info.resourceSlotCount = 1;
info.resourceSlots = &m_bufferResource;
info.inputMask = m_inputMask;
return new DxvkShader(
VK_SHADER_STAGE_GEOMETRY_BIT,
m_resourceSlots.size(),
m_resourceSlots.data(),
m_interfaceSlots,
m_module.compile(),
DxvkShaderOptions(),
std::move(constData));
return new DxvkShader(info, m_module.compile());
}
private:
@ -301,9 +296,8 @@ namespace dxvk {
std::vector<uint32_t> m_entryPointInterfaces;
uint32_t m_entryPointId = 0;
std::vector<DxvkResourceSlot> m_resourceSlots;
DxvkInterfaceSlots m_interfaceSlots;
uint32_t m_inputMask = 0u;
DxvkResourceSlot m_bufferResource;
};

View file

@ -248,24 +248,24 @@ namespace dxvk {
m_entryPointInterfaces.data());
m_module.setDebugName(m_entryPointId, "main");
DxvkShaderOptions shaderOptions = { };
// Create the shader object
DxvkShaderCreateInfo info;
info.stage = m_programInfo.shaderStage();
info.resourceSlotCount = m_resourceSlots.size();
info.resourceSlots = m_resourceSlots.data();
info.inputMask = m_inputMask;
info.outputMask = m_outputMask;
info.uniformSize = m_immConstData.size();
info.uniformData = m_immConstData.data();
if (m_moduleInfo.xfb != nullptr) {
shaderOptions.rasterizedStream = m_moduleInfo.xfb->rasterizedStream;
if (m_moduleInfo.xfb) {
info.xfbRasterizedStream = m_moduleInfo.xfb->rasterizedStream;
for (uint32_t i = 0; i < 4; i++)
shaderOptions.xfbStrides[i] = m_moduleInfo.xfb->strides[i];
info.xfbStrides[i] = m_moduleInfo.xfb->strides[i];
}
// Create the shader module object
return new DxvkShader(
m_programInfo.shaderStage(),
m_resourceSlots.size(),
m_resourceSlots.data(),
m_interfaceSlots,
m_module.compile(),
shaderOptions,
std::move(m_immConstData));
return new DxvkShader(info, m_module.compile());
}
@ -686,7 +686,7 @@ namespace dxvk {
}
// Declare the input slot as defined
m_interfaceSlots.inputSlots |= 1u << regIdx;
m_inputMask |= 1u << regIdx;
m_vArrayLength = std::max(m_vArrayLength, regIdx + 1);
} else if (sv != DxbcSystemValue::None) {
// Add a new system value mapping if needed
@ -759,7 +759,7 @@ namespace dxvk {
m_oRegs.at(regIdx) = { regType, varId };
// Declare the output slot as defined
m_interfaceSlots.outputSlots |= 1u << regIdx;
m_outputMask |= 1u << regIdx;
}
}
@ -1534,7 +1534,8 @@ namespace dxvk {
const uint32_t* dwordArray) {
this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb",
m_moduleInfo.options.dynamicIndexedConstantBufferAsSsbo);
m_immConstData = DxvkShaderConstData(dwordCount, dwordArray);
m_immConstData.resize(dwordCount * sizeof(uint32_t));
std::memcpy(m_immConstData.data(), dwordArray, m_immConstData.size());
}
@ -5576,6 +5577,49 @@ namespace dxvk {
DxbcRegisterValue DxbcCompiler::emitRegisterLoadRaw(
const DxbcRegister& reg) {
if (reg.type == DxbcOperandType::IndexableTemp) {
bool doBoundsCheck = reg.idx[1].relReg != nullptr;
DxbcRegisterValue vectorId = emitIndexLoad(reg.idx[1]);
if (doBoundsCheck) {
uint32_t boundsCheck = m_module.opULessThan(
m_module.defBoolType(), vectorId.id,
m_module.constu32(m_xRegs.at(reg.idx[0].offset).alength));
// Kind of ugly to have an empty else block here but there's no
// way for us to know the current block ID for the phi below
DxbcConditional cond;
cond.labelIf = m_module.allocateId();
cond.labelElse = m_module.allocateId();
cond.labelEnd = m_module.allocateId();
m_module.opSelectionMerge(cond.labelEnd, spv::SelectionControlMaskNone);
m_module.opBranchConditional(boundsCheck, cond.labelIf, cond.labelElse);
m_module.opLabel(cond.labelIf);
DxbcRegisterValue returnValue = emitValueLoad(emitGetOperandPtr(reg));
m_module.opBranch(cond.labelEnd);
m_module.opLabel (cond.labelElse);
DxbcRegisterValue zeroValue = emitBuildZeroVector(returnValue.type);
m_module.opBranch(cond.labelEnd);
m_module.opLabel (cond.labelEnd);
std::array<SpirvPhiLabel, 2> phiLabels = {{
{ returnValue.id, cond.labelIf },
{ zeroValue.id, cond.labelElse },
}};
returnValue.id = m_module.opPhi(
getVectorTypeId(returnValue.type),
phiLabels.size(), phiLabels.data());
return returnValue;
}
}
return emitValueLoad(emitGetOperandPtr(reg));
}

View file

@ -498,8 +498,8 @@ namespace dxvk {
//////////////////////////////////////////////////
// Immediate constant buffer. If defined, this is
// an array of four-component uint32 vectors.
uint32_t m_immConstBuf = 0;
DxvkShaderConstData m_immConstData;
uint32_t m_immConstBuf = 0;
std::vector<char> m_immConstData;
///////////////////////////////////////////////////
// Sample pos array. If defined, this iis an array
@ -524,7 +524,8 @@ namespace dxvk {
////////////////////////////////////////////
// Inter-stage shader interface slots. Also
// covers vertex input and fragment output.
DxvkInterfaceSlots m_interfaceSlots;
uint32_t m_inputMask = 0u;
uint32_t m_outputMask = 0u;
///////////////////////////////////
// Shader-specific data structures

View file

@ -129,7 +129,7 @@ namespace dxvk {
// DXGI_FORMAT_R11G11B10_FLOAT
{ VK_FORMAT_B10G11R11_UFLOAT_PACK32,
VK_FORMAT_UNDEFINED,
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
VK_FORMAT_R32_UINT,
VK_IMAGE_ASPECT_COLOR_BIT },
// DXGI_FORMAT_R8G8B8A8_TYPELESS
{ VK_FORMAT_R8G8B8A8_UNORM,

View file

@ -246,17 +246,16 @@ namespace dxvk {
Rc<DxvkShader> DxsoCompiler::compileShader() {
DxvkShaderOptions shaderOptions = { };
DxvkShaderConstData constData = { };
DxvkShaderCreateInfo info;
info.stage = m_programInfo.shaderStage();
info.resourceSlotCount = m_resourceSlots.size();
info.resourceSlots = m_resourceSlots.data();
info.inputMask = m_inputMask;
info.outputMask = m_outputMask;
info.pushConstOffset = m_pushConstOffset;
info.pushConstSize = m_pushConstOffset;
return new DxvkShader(
m_programInfo.shaderStage(),
m_resourceSlots.size(),
m_resourceSlots.data(),
m_interfaceSlots,
m_module.compile(),
shaderOptions,
std::move(constData));
return new DxvkShader(info, m_module.compile());
}
void DxsoCompiler::emitInit() {
@ -709,8 +708,8 @@ namespace dxvk {
uint32_t slot = 0;
uint32_t& slots = input
? m_interfaceSlots.inputSlots
: m_interfaceSlots.outputSlots;
? m_inputMask
: m_outputMask;
uint16_t& explicits = input
? m_explicitInputs
@ -1200,8 +1199,8 @@ namespace dxvk {
uint32_t slot = RegisterLinkerSlot(semantic);
uint32_t& slots = input
? m_interfaceSlots.inputSlots
: m_interfaceSlots.outputSlots;
? m_inputMask
: m_outputMask;
slots |= 1u << slot;
@ -1243,7 +1242,7 @@ namespace dxvk {
m_module.constvec4f32(0.0f, 0.0f, 0.0f, 0.0f),
spv::StorageClassOutput);
m_interfaceSlots.outputSlots |= 1u << idx;
m_outputMask |= 1u << idx;
m_module.decorateLocation(m_ps.oColor[idx].id, idx);
m_module.decorateIndex(m_ps.oColor[idx].id, 0);
@ -3596,7 +3595,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
m_module.setDebugName(outputPtr, name.c_str());
m_interfaceSlots.outputSlots |= 1u << slot;
m_outputMask |= 1u << slot;
m_entryPointInterfaces.push_back(outputPtr);
};
@ -3714,20 +3713,20 @@ void DxsoCompiler::emitControlFlowGenericLoop(
// No FF fog component.
if (m_programInfo.type() == DxsoProgramType::PixelShader) {
if (m_programInfo.majorVersion() == 3) {
m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, alphaRef);
m_interfaceSlots.pushConstSize = sizeof(float);
m_pushConstOffset = offsetof(D3D9RenderStateInfo, alphaRef);
m_pushConstSize = sizeof(float);
}
else {
m_interfaceSlots.pushConstOffset = 0;
m_interfaceSlots.pushConstSize = offsetof(D3D9RenderStateInfo, pointSize);
m_pushConstOffset = 0;
m_pushConstSize = offsetof(D3D9RenderStateInfo, pointSize);
}
count = 5;
}
else {
m_interfaceSlots.pushConstOffset = offsetof(D3D9RenderStateInfo, pointSize);
m_pushConstOffset = offsetof(D3D9RenderStateInfo, pointSize);
// Point scale never triggers on programmable
m_interfaceSlots.pushConstSize = sizeof(float) * 3;
m_pushConstSize = sizeof(float) * 3;
count = 8;
}

View file

@ -355,7 +355,10 @@ namespace dxvk {
////////////////////////////////////////////
// Inter-stage shader interface slots. Also
// covers vertex input and fragment output.
DxvkInterfaceSlots m_interfaceSlots;
uint32_t m_inputMask = 0u;
uint32_t m_outputMask = 0u;
uint32_t m_pushConstOffset = 0u;
uint32_t m_pushConstSize = 0u;
///////////////////////////////////
// Shader-specific data structures
@ -435,14 +438,14 @@ namespace dxvk {
DxsoTextureType type);
bool defineInput(uint32_t idx) {
bool alreadyDefined = m_interfaceSlots.inputSlots & 1u << idx;
m_interfaceSlots.inputSlots |= 1u << idx;
bool alreadyDefined = m_inputMask & 1u << idx;
m_inputMask |= 1u << idx;
return alreadyDefined;
}
bool defineOutput(uint32_t idx) {
bool alreadyDefined = m_interfaceSlots.outputSlots & 1u << idx;
m_interfaceSlots.outputSlots |= 1u << idx;
bool alreadyDefined = m_outputMask & 1u << idx;
m_outputMask |= 1u << idx;
return alreadyDefined;
}

View file

@ -4651,7 +4651,7 @@ namespace dxvk {
void DxvkContext::updateTransformFeedbackBuffers() {
auto gsOptions = m_state.gp.shaders.gs->shaderOptions();
const auto& gsInfo = m_state.gp.shaders.gs->info();
VkBuffer xfbBuffers[MaxNumXfbBuffers];
VkDeviceSize xfbOffsets[MaxNumXfbBuffers];
@ -4669,7 +4669,7 @@ namespace dxvk {
if (physSlice.handle != VK_NULL_HANDLE) {
const Rc<DxvkBuffer>& buffer = m_state.xfb.buffers[i].buffer();
buffer->setXfbVertexStride(gsOptions.xfbStrides[i]);
buffer->setXfbVertexStride(gsInfo.xfbStrides[i]);
m_cmd->trackResource<DxvkAccess::Write>(buffer);
}

View file

@ -158,19 +158,6 @@ namespace dxvk {
}
Rc<DxvkShader> DxvkDevice::createShader(
VkShaderStageFlagBits stage,
uint32_t slotCount,
const DxvkResourceSlot* slotInfos,
const DxvkInterfaceSlots& iface,
const SpirvCodeBuffer& code) {
return new DxvkShader(stage,
slotCount, slotInfos, iface, code,
DxvkShaderOptions(),
DxvkShaderConstData());
}
DxvkStatCounters DxvkDevice::getStatCounters() {
DxvkPipelineCount pipe = m_objects.pipelineManager().getPipelineCount();

View file

@ -340,23 +340,6 @@ namespace dxvk {
Rc<DxvkSampler> createSampler(
const DxvkSamplerCreateInfo& createInfo);
/**
* \brief Creates a shader module
*
* \param [in] stage Shader stage
* \param [in] slotCount Resource slot count
* \param [in] slotInfos Resource slot descriptions
* \param [in] iface Inter-stage interface slots
* \param [in] code Shader code
* \returns New shader module
*/
Rc<DxvkShader> createShader(
VkShaderStageFlagBits stage,
uint32_t slotCount,
const DxvkResourceSlot* slotInfos,
const DxvkInterfaceSlots& iface,
const SpirvCodeBuffer& code);
/**
* \brief Retrieves stat counters
*

View file

@ -26,8 +26,8 @@ namespace dxvk {
m_layout = new DxvkPipelineLayout(m_vkd,
m_slotMapping, VK_PIPELINE_BIND_POINT_GRAPHICS);
m_vsIn = m_shaders.vs != nullptr ? m_shaders.vs->interfaceSlots().inputSlots : 0;
m_fsOut = m_shaders.fs != nullptr ? m_shaders.fs->interfaceSlots().outputSlots : 0;
m_vsIn = m_shaders.vs != nullptr ? m_shaders.vs->info().inputMask : 0;
m_fsOut = m_shaders.fs != nullptr ? m_shaders.fs->info().outputMask : 0;
if (m_shaders.gs != nullptr && m_shaders.gs->flags().test(DxvkShaderFlag::HasTransformFeedback))
m_flags.set(DxvkGraphicsPipelineFlag::HasTransformFeedback);
@ -238,7 +238,7 @@ namespace dxvk {
}
int32_t rasterizedStream = m_shaders.gs != nullptr
? m_shaders.gs->shaderOptions().rasterizedStream
? m_shaders.gs->info().xfbRasterizedStream
: 0;
// Compact vertex bindings so that we can more easily update vertex buffers
@ -451,10 +451,11 @@ namespace dxvk {
if (shader == nullptr)
return DxvkShaderModule();
const DxvkShaderCreateInfo& shaderInfo = shader->info();
DxvkShaderModuleCreateInfo info;
// Fix up fragment shader outputs for dual-source blending
if (shader->stage() == VK_SHADER_STAGE_FRAGMENT_BIT) {
if (shaderInfo.stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
info.fsDualSrcBlend = state.omBlend[0].blendEnable() && (
util::isDualSourceBlendFactor(state.omBlend[0].srcColorBlendFactor()) ||
util::isDualSourceBlendFactor(state.omBlend[0].dstColorBlendFactor()) ||
@ -463,15 +464,15 @@ namespace dxvk {
}
// Deal with undefined shader inputs
uint32_t consumedInputs = shader->interfaceSlots().inputSlots;
uint32_t consumedInputs = shaderInfo.inputMask;
uint32_t providedInputs = 0;
if (shader->stage() == VK_SHADER_STAGE_VERTEX_BIT) {
if (shaderInfo.stage == VK_SHADER_STAGE_VERTEX_BIT) {
for (uint32_t i = 0; i < state.il.attributeCount(); i++)
providedInputs |= 1u << state.ilAttributes[i].location();
} else if (shader->stage() != VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
auto prevStage = getPrevStageShader(shader->stage());
providedInputs = prevStage->interfaceSlots().outputSlots;
} else if (shaderInfo.stage != VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
auto prevStage = getPrevStageShader(shaderInfo.stage);
providedInputs = prevStage->info().outputMask;
} else {
// Technically not correct, but this
// would need a lot of extra care

View file

@ -6,43 +6,6 @@
namespace dxvk {
DxvkShaderConstData::DxvkShaderConstData()
: m_size(0), m_data(nullptr) {
}
DxvkShaderConstData::DxvkShaderConstData(
size_t dwordCount,
const uint32_t* dwordArray)
: m_size(dwordCount), m_data(new uint32_t[dwordCount]) {
for (size_t i = 0; i < dwordCount; i++)
m_data[i] = dwordArray[i];
}
DxvkShaderConstData::DxvkShaderConstData(DxvkShaderConstData&& other)
: m_size(other.m_size), m_data(other.m_data) {
other.m_size = 0;
other.m_data = nullptr;
}
DxvkShaderConstData& DxvkShaderConstData::operator = (DxvkShaderConstData&& other) {
delete[] m_data;
this->m_size = other.m_size;
this->m_data = other.m_data;
other.m_size = 0;
other.m_data = nullptr;
return *this;
}
DxvkShaderConstData::~DxvkShaderConstData() {
delete[] m_data;
}
DxvkShaderModule::DxvkShaderModule()
: m_vkd(nullptr), m_stage() {
@ -64,7 +27,7 @@ namespace dxvk {
m_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
m_stage.pNext = nullptr;
m_stage.flags = 0;
m_stage.stage = shader->stage();
m_stage.stage = shader->info().stage;
m_stage.module = VK_NULL_HANDLE;
m_stage.pName = "main";
m_stage.pSpecializationInfo = nullptr;
@ -98,21 +61,30 @@ namespace dxvk {
DxvkShader::DxvkShader(
VkShaderStageFlagBits stage,
uint32_t slotCount,
const DxvkResourceSlot* slotInfos,
const DxvkInterfaceSlots& iface,
SpirvCodeBuffer code,
const DxvkShaderOptions& options,
DxvkShaderConstData&& constData)
: m_stage(stage), m_code(code), m_interface(iface),
m_options(options), m_constData(std::move(constData)) {
// Write back resource slot infos
for (uint32_t i = 0; i < slotCount; i++)
m_slots.push_back(slotInfos[i]);
// Gather the offsets where the binding IDs
// are stored so we can quickly remap them.
const DxvkShaderCreateInfo& info,
SpirvCodeBuffer&& spirv)
: m_info(info), m_code(spirv) {
m_info.resourceSlots = nullptr;
m_info.uniformData = nullptr;
// Copy resource binding slot infos
if (info.resourceSlotCount) {
m_slots.resize(info.resourceSlotCount);
for (uint32_t i = 0; i < info.resourceSlotCount; i++)
m_slots[i] = info.resourceSlots[i];
m_info.resourceSlots = m_slots.data();
}
// Copy uniform buffer data
if (info.uniformSize) {
m_uniformData.resize(info.uniformSize);
std::memcpy(m_uniformData.data(), info.uniformData, info.uniformSize);
m_info.uniformData = m_uniformData.data();
}
// Run an analysis pass over the SPIR-V code to gather some
// info that we may need during pipeline compilation.
SpirvCodeBuffer code = std::move(spirv);
uint32_t o1VarId = 0;
for (auto ins : code) {
@ -147,8 +119,8 @@ namespace dxvk {
}
}
}
DxvkShader::~DxvkShader() {
}
@ -157,12 +129,12 @@ namespace dxvk {
void DxvkShader::defineResourceSlots(
DxvkDescriptorSlotMapping& mapping) const {
for (const auto& slot : m_slots)
mapping.defineSlot(m_stage, slot);
mapping.defineSlot(m_info.stage, slot);
if (m_interface.pushConstSize) {
mapping.definePushConstRange(m_stage,
m_interface.pushConstOffset,
m_interface.pushConstSize);
if (m_info.pushConstSize) {
mapping.definePushConstRange(m_info.stage,
m_info.pushConstOffset,
m_info.pushConstSize);
}
}

View file

@ -50,70 +50,27 @@ namespace dxvk {
using DxvkShaderFlags = Flags<DxvkShaderFlag>;
/**
* \brief Shader interface slots
*
* Stores a bit mask of which shader
* interface slots are defined. Used
* purely for validation purposes.
* \brief Shader info
*/
struct DxvkInterfaceSlots {
uint32_t inputSlots = 0;
uint32_t outputSlots = 0;
struct DxvkShaderCreateInfo {
/// Shader stage
VkShaderStageFlagBits stage;
/// Descriptor info
uint32_t resourceSlotCount = 0;
const DxvkResourceSlot* resourceSlots = nullptr;
/// Input and output register mask
uint32_t inputMask = 0;
uint32_t outputMask = 0;
/// Push constant range
uint32_t pushConstOffset = 0;
uint32_t pushConstSize = 0;
};
/**
* \brief Additional shader options
*
* Contains additional properties that should be
* taken into account when creating pipelines.
*/
struct DxvkShaderOptions {
uint32_t pushConstSize = 0;
/// Uniform buffer data
uint32_t uniformSize = 0;
const char* uniformData = nullptr;
/// Rasterized stream, or -1
int32_t rasterizedStream;
/// Xfb vertex strides
uint32_t xfbStrides[MaxNumXfbBuffers];
};
/**
* \brief Shader constants
*
* Each shader can have constant data associated
* with it, which needs to be copied to a uniform
* buffer. The client API must then bind that buffer
* to an API-specific buffer binding when using the
* shader for rendering.
*/
class DxvkShaderConstData {
public:
DxvkShaderConstData();
DxvkShaderConstData(
size_t dwordCount,
const uint32_t* dwordArray);
DxvkShaderConstData (DxvkShaderConstData&& other);
DxvkShaderConstData& operator = (DxvkShaderConstData&& other);
~DxvkShaderConstData();
const uint32_t* data() const {
return m_data;
}
size_t sizeInBytes() const {
return m_size * sizeof(uint32_t);
}
private:
size_t m_size = 0;
uint32_t* m_data = nullptr;
int32_t xfbRasterizedStream = 0;
/// Transform feedback vertex strides
uint32_t xfbStrides[MaxNumXfbBuffers] = { };
};
@ -139,24 +96,19 @@ namespace dxvk {
public:
DxvkShader(
VkShaderStageFlagBits stage,
uint32_t slotCount,
const DxvkResourceSlot* slotInfos,
const DxvkInterfaceSlots& iface,
SpirvCodeBuffer code,
const DxvkShaderOptions& options,
DxvkShaderConstData&& constData);
const DxvkShaderCreateInfo& info,
SpirvCodeBuffer&& spirv);
~DxvkShader();
/**
* \brief Shader stage
* \returns Shader stage
* \brief Shader info
* \returns Shader info
*/
VkShaderStageFlagBits stage() const {
return m_stage;
const DxvkShaderCreateInfo& info() const {
return m_info;
}
/**
* \brief Retrieves shader flags
* \returns Shader flags
@ -189,37 +141,6 @@ namespace dxvk {
const DxvkDescriptorSlotMapping& mapping,
const DxvkShaderModuleCreateInfo& info);
/**
* \brief Inter-stage interface slots
*
* Retrieves the input and output
* registers used by the shader.
* \returns Shader interface slots
*/
DxvkInterfaceSlots interfaceSlots() const {
return m_interface;
}
/**
* \brief Shader options
* \returns Shader options
*/
DxvkShaderOptions shaderOptions() const {
return m_options;
}
/**
* \brief Shader constant data
*
* Returns a read-only reference to the
* constant data associated with this
* shader object.
* \returns Shader constant data
*/
const DxvkShaderConstData& shaderConstants() const {
return m_constData;
}
/**
* \brief Dumps SPIR-V shader
*
@ -279,20 +200,19 @@ namespace dxvk {
private:
VkShaderStageFlagBits m_stage;
SpirvCompressedBuffer m_code;
DxvkShaderCreateInfo m_info;
SpirvCompressedBuffer m_code;
std::vector<DxvkResourceSlot> m_slots;
std::vector<size_t> m_idOffsets;
DxvkInterfaceSlots m_interface;
DxvkShaderFlags m_flags;
DxvkShaderOptions m_options;
DxvkShaderConstData m_constData;
DxvkShaderKey m_key;
size_t m_hash = 0;
size_t m_o1IdxOffset = 0;
size_t m_o1LocOffset = 0;
size_t m_o1IdxOffset = 0;
size_t m_o1LocOffset = 0;
std::vector<DxvkResourceSlot> m_slots;
std::vector<char> m_uniformData;
std::vector<size_t> m_idOffsets;
static void eliminateInput(SpirvCodeBuffer& code, uint32_t location);

View file

@ -312,43 +312,36 @@ namespace dxvk {
}
void DxvkSwapchainBlitter::createShaders() {
const SpirvCodeBuffer vsCode(dxvk_present_vert);
const SpirvCodeBuffer fsCodeBlit(dxvk_present_frag_blit);
const SpirvCodeBuffer fsCodeCopy(dxvk_present_frag);
const SpirvCodeBuffer fsCodeResolve(dxvk_present_frag_ms);
const SpirvCodeBuffer fsCodeResolveAmd(dxvk_present_frag_ms_amd);
SpirvCodeBuffer vsCode(dxvk_present_vert);
SpirvCodeBuffer fsCodeBlit(dxvk_present_frag_blit);
SpirvCodeBuffer fsCodeCopy(dxvk_present_frag);
SpirvCodeBuffer fsCodeResolve(dxvk_present_frag_ms);
SpirvCodeBuffer fsCodeResolveAmd(dxvk_present_frag_ms_amd);
const std::array<DxvkResourceSlot, 2> fsResourceSlots = {{
{ BindingIds::Image, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_IMAGE_VIEW_TYPE_2D },
{ BindingIds::Gamma, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_IMAGE_VIEW_TYPE_1D },
}};
m_vs = m_device->createShader(
VK_SHADER_STAGE_VERTEX_BIT,
0, nullptr, { 0u, 1u },
vsCode);
DxvkShaderCreateInfo vsInfo;
vsInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
vsInfo.outputMask = 0x1;
m_vs = new DxvkShader(vsInfo, std::move(vsCode));
m_fsBlit = m_device->createShader(
VK_SHADER_STAGE_FRAGMENT_BIT,
fsResourceSlots.size(),
fsResourceSlots.data(),
{ 1u, 1u, 0u, sizeof(PresenterArgs) },
fsCodeBlit);
DxvkShaderCreateInfo fsInfo;
fsInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
fsInfo.resourceSlotCount = fsResourceSlots.size();
fsInfo.resourceSlots = fsResourceSlots.data();
fsInfo.pushConstSize = sizeof(PresenterArgs);
fsInfo.inputMask = 0x1;
fsInfo.outputMask = 0x1;
m_fsBlit = new DxvkShader(fsInfo, std::move(fsCodeBlit));
m_fsCopy = m_device->createShader(
VK_SHADER_STAGE_FRAGMENT_BIT,
fsResourceSlots.size(),
fsResourceSlots.data(),
{ 0u, 1u, 0u, sizeof(PresenterArgs) },
fsCodeCopy);
m_fsResolve = m_device->createShader(
VK_SHADER_STAGE_FRAGMENT_BIT,
fsResourceSlots.size(),
fsResourceSlots.data(),
{ 0u, 1u, 0u, sizeof(PresenterArgs) },
m_device->extensions().amdShaderFragmentMask
? fsCodeResolveAmd : fsCodeResolve);
fsInfo.inputMask = 0;
m_fsCopy = new DxvkShader(fsInfo, std::move(fsCodeCopy));
m_fsResolve = new DxvkShader(fsInfo, m_device->extensions().amdShaderFragmentMask
? std::move(fsCodeResolveAmd)
: std::move(fsCodeResolve));
}
void DxvkSwapchainBlitter::createResolveImage(const DxvkImageCreateInfo& info) {

View file

@ -156,8 +156,8 @@ namespace dxvk::hud {
HudRenderer::ShaderPair HudRenderer::createTextShaders() {
ShaderPair result;
const SpirvCodeBuffer vsCode(hud_text_vert);
const SpirvCodeBuffer fsCode(hud_text_frag);
SpirvCodeBuffer vsCode(hud_text_vert);
SpirvCodeBuffer fsCode(hud_text_frag);
const std::array<DxvkResourceSlot, 3> vsResources = {{
{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_IMAGE_VIEW_TYPE_MAX_ENUM },
@ -168,20 +168,22 @@ namespace dxvk::hud {
{ 2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_IMAGE_VIEW_TYPE_2D },
}};
result.vert = m_device->createShader(
VK_SHADER_STAGE_VERTEX_BIT,
vsResources.size(),
vsResources.data(),
{ 0x0, 0x3, 0, sizeof(HudTextPushConstants) },
vsCode);
result.frag = m_device->createShader(
VK_SHADER_STAGE_FRAGMENT_BIT,
fsResources.size(),
fsResources.data(),
{ 0x3, 0x1 },
fsCode);
DxvkShaderCreateInfo vsInfo;
vsInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
vsInfo.resourceSlotCount = vsResources.size();
vsInfo.resourceSlots = vsResources.data();
vsInfo.outputMask = 0x3;
vsInfo.pushConstSize = sizeof(HudTextPushConstants);
result.vert = new DxvkShader(vsInfo, std::move(vsCode));
DxvkShaderCreateInfo fsInfo;
fsInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
fsInfo.resourceSlotCount = fsResources.size();
fsInfo.resourceSlots = fsResources.data();
fsInfo.inputMask = 0x3;
fsInfo.outputMask = 0x1;
result.frag = new DxvkShader(fsInfo, std::move(fsCode));
return result;
}
@ -189,24 +191,27 @@ namespace dxvk::hud {
HudRenderer::ShaderPair HudRenderer::createGraphShaders() {
ShaderPair result;
const SpirvCodeBuffer vsCode(hud_graph_vert);
const SpirvCodeBuffer fsCode(hud_graph_frag);
SpirvCodeBuffer vsCode(hud_graph_vert);
SpirvCodeBuffer fsCode(hud_graph_frag);
const std::array<DxvkResourceSlot, 1> fsResources = {{
{ 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_IMAGE_VIEW_TYPE_MAX_ENUM },
}};
result.vert = m_device->createShader(
VK_SHADER_STAGE_VERTEX_BIT, 0, nullptr,
{ 0x3, 0x1, 0, sizeof(HudGraphPushConstants) },
vsCode);
DxvkShaderCreateInfo vsInfo;
vsInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
vsInfo.outputMask = 0x1;
vsInfo.pushConstSize = sizeof(HudGraphPushConstants);
result.vert = new DxvkShader(vsInfo, std::move(vsCode));
result.frag = m_device->createShader(
VK_SHADER_STAGE_FRAGMENT_BIT,
fsResources.size(),
fsResources.data(),
{ 0x1, 0x1, 0, sizeof(HudGraphPushConstants) },
fsCode);
DxvkShaderCreateInfo fsInfo;
fsInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
fsInfo.resourceSlotCount = fsResources.size();
fsInfo.resourceSlots = fsResources.data();
fsInfo.inputMask = 0x1;
fsInfo.outputMask = 0x1;
fsInfo.pushConstSize = sizeof(HudGraphPushConstants);
result.frag = new DxvkShader(fsInfo, std::move(fsCode));
return result;
}

View file

@ -8,58 +8,76 @@ namespace dxvk {
}
SpirvCompressedBuffer::SpirvCompressedBuffer(
const SpirvCodeBuffer& code)
SpirvCompressedBuffer::SpirvCompressedBuffer(SpirvCodeBuffer& code)
: m_size(code.dwords()) {
// The compression (detailed below) achieves roughly 55% of the
// original size on average and is very consistent, so an initial
// estimate of roughly 58% will be accurate most of the time.
const uint32_t* data = code.data();
m_code.reserve((m_size * 75) / 128);
// The compression works by eliminating leading null bytes
// from DWORDs, exploiting that SPIR-V IDs are consecutive
// integers that usually fall into the 16-bit range. For
// each DWORD, a two-bit integer is stored which indicates
// the number of bytes it takes in the compressed buffer.
// This way, it can achieve a compression ratio of ~50%.
m_mask.reserve((m_size + NumMaskWords - 1) / NumMaskWords);
m_code.reserve((m_size + 1) / 2);
std::array<uint32_t, 16> block;
uint32_t blockMask = 0;
uint32_t blockOffset = 0;
uint64_t dstWord = 0;
uint32_t dstShift = 0;
// The algorithm used is a simple variable-to-fixed compression that
// encodes up to two consecutive SPIR-V tokens into one DWORD using
// a small number of different encodings. While not achieving great
// compression ratios, the main goal is to allow decompression code
// to be fast, with short dependency chains.
// Compressed tokens are stored in blocks of 16 DWORDs, each preceeded
// by a single DWORD which stores the layout for each DWORD, two bits
// each. The supported layouts, are as follows:
// 0x0: 1x 32-bit; 0x1: 1x 20-bit + 1x 12-bit
// 0x2: 2x 16-bit; 0x3: 1x 12-bit + 1x 20-bit
// These layouts are chosen to allow reasonably efficient encoding of
// opcode tokens, which usually fit into 20 bits, followed by type IDs,
// which tend to be low as well since most types are defined early.
for (size_t i = 0; i < m_size; ) {
if (likely(i + 1 < m_size)) {
uint32_t a = data[i];
uint32_t b = data[i + 1];
uint32_t schema;
uint32_t encode;
for (uint32_t i = 0; i < m_size; i += NumMaskWords) {
uint64_t byteCounts = 0;
for (uint32_t w = 0; w < NumMaskWords && i + w < m_size; w++) {
uint64_t word = data[i + w];
uint64_t bytes = 0;
if (word < (1 << 8)) bytes = 0;
else if (word < (1 << 16)) bytes = 1;
else if (word < (1 << 24)) bytes = 2;
else bytes = 3;
byteCounts |= bytes << (2 * w);
uint32_t bits = 8 * bytes + 8;
uint32_t rem = bit::pack(dstWord, dstShift, word, bits);
if (unlikely(rem != 0)) {
m_code.push_back(dstWord);
dstWord = 0;
dstShift = 0;
bit::pack(dstWord, dstShift, word >> (bits - rem), rem);
if (std::max(a, b) < (1u << 16)) {
schema = 0x2;
encode = a | (b << 16);
} else if (a < (1u << 20) && b < (1u << 12)) {
schema = 0x1;
encode = a | (b << 20);
} else if (a < (1u << 12) && b < (1u << 20)) {
schema = 0x3;
encode = a | (b << 12);
} else {
schema = 0x0;
encode = a;
}
block[blockOffset] = encode;
blockMask |= schema << (blockOffset << 1);
blockOffset += 1;
i += schema ? 2 : 1;
} else {
block[blockOffset] = data[i++];
blockOffset += 1;
}
m_mask.push_back(byteCounts);
if (unlikely(blockOffset == 16) || unlikely(i == m_size)) {
m_code.insert(m_code.end(), blockMask);
m_code.insert(m_code.end(), block.begin(), block.begin() + blockOffset);
blockMask = 0;
blockOffset = 0;
}
}
if (dstShift)
m_code.push_back(dstWord);
m_mask.shrink_to_fit();
m_code.shrink_to_fit();
// Only shrink the array if we have lots of overhead for some reason.
// This should only happen on shaders where our initial estimate was
// too small. In general, we want to avoid reallocation here.
if (m_code.capacity() > (m_code.size() * 10) / 9)
m_code.shrink_to_fit();
}
@ -72,36 +90,31 @@ namespace dxvk {
SpirvCodeBuffer code(m_size);
uint32_t* data = code.data();
if (m_size == 0)
return code;
uint32_t srcOffset = 0;
uint32_t dstOffset = 0;
uint32_t maskIdx = 0;
uint32_t codeIdx = 0;
constexpr uint32_t shiftAmounts = 0x0c101420;
uint64_t srcWord = m_code[codeIdx++];
uint32_t srcShift = 0;
while (dstOffset < m_size) {
uint32_t blockMask = m_code[srcOffset];
for (uint32_t i = 0; i < m_size; i += NumMaskWords) {
uint64_t srcMask = m_mask[maskIdx++];
for (uint32_t i = 0; i < 16 && dstOffset < m_size; i++) {
// Use 64-bit integers for some of the operands so we can
// shift by 32 bits and not handle it as a special cases
uint32_t schema = (blockMask >> (i << 1)) & 0x3;
uint32_t shift = (shiftAmounts >> (schema << 3)) & 0xff;
uint64_t mask = ~(~0ull << shift);
uint64_t encode = m_code[srcOffset + i + 1];
for (uint32_t w = 0; w < NumMaskWords && i + w < m_size; w++) {
uint32_t bits = 8 * ((srcMask & 3) + 1);
data[dstOffset] = encode & mask;
uint64_t word = 0;
uint32_t rem = bit::unpack(word, srcWord, srcShift, bits);
if (likely(schema))
data[dstOffset + 1] = encode >> shift;
if (unlikely(rem != 0)) {
srcWord = m_code[codeIdx++];
srcShift = 0;
uint64_t tmp = 0;
bit::unpack(tmp, srcWord, srcShift, rem);
word |= tmp << (bits - rem);
}
data[i + w] = word;
srcMask >>= 2;
dstOffset += schema ? 2 : 1;
}
srcOffset += 17;
}
return code;

View file

@ -13,13 +13,12 @@ namespace dxvk {
* to keep memory footprint low.
*/
class SpirvCompressedBuffer {
constexpr static uint32_t NumMaskWords = 32;
public:
SpirvCompressedBuffer();
SpirvCompressedBuffer(
const SpirvCodeBuffer& code);
SpirvCompressedBuffer(SpirvCodeBuffer& code);
~SpirvCompressedBuffer();
@ -27,9 +26,12 @@ namespace dxvk {
private:
uint32_t m_size;
std::vector<uint64_t> m_mask;
std::vector<uint64_t> m_code;
size_t m_size;
std::vector<uint32_t> m_code;
void encodeDword(uint32_t dw);
uint32_t decodeDword(size_t& offset) const;
};

View file

@ -230,6 +230,10 @@ namespace dxvk {
{ R"(\\SGWContracts\.exe$)", {{
{ "d3d11.cachedDynamicResources", "a" },
}} },
/* Armored Warfare */
{ R"(\\armoredwarfare\.exe$)", {{
{ "d3d11.cachedDynamicResources", "c" },
}} },
/* Shadow of the Tomb Raider - invariant *
* position breaks character rendering on NV */
{ R"(\\SOTTR\.exe$)", {{
@ -311,10 +315,6 @@ namespace dxvk {
{ R"(\\Gothic(3|3Final| III Forsaken Gods)\.exe$)", {{
{ "d3d9.supportDFFormats", "False" },
}} },
/* Risen */
{ R"(\\Risen[23]?\.exe$)", {{
{ "d3d9.invariantPosition", "True" },
}} },
/* Sonic Adventure 2 */
{ R"(\\Sonic Adventure 2\\(launcher|sonic2app)\.exe$)", {{
{ "d3d9.floatEmulation", "False" },
@ -338,13 +338,8 @@ namespace dxvk {
{ R"(\\Dead Space\.exe$)", {{
{ "d3d9.supportDFFormats", "False" },
}} },
/* Halo 2 */
{ R"(\\halo2\.exe$)", {{
{ "d3d9.invariantPosition", "True" },
}} },
/* Halo CE/HaloPC */
{ R"(\\halo(ce)?\.exe$)", {{
{ "d3d9.invariantPosition", "True" },
// Game enables minor decal layering fixes
// specifically when it detects AMD.
// Avoids chip being detected as unsupported
@ -417,7 +412,6 @@ namespace dxvk {
/* Battlefield 2 (bad z-pass) */
{ R"(\\BF2\.exe$)", {{
{ "d3d9.longMad", "True" },
{ "d3d9.invariantPosition", "True" },
}} },
/* SpellForce 2 Series */
{ R"(\\SpellForce2.*\.exe$)", {{
@ -518,10 +512,6 @@ namespace dxvk {
{ R"(\\BBCF\.exe$)", {{
{ "d3d9.floatEmulation", "Strict" },
}} },
/* James Cameron's Avatar needs invariantPosition to fix black flickering vegetation */
{ R"(\\Avatar\.exe$)", {{
{ "d3d9.invariantPosition", "True" },
}} },
/* Resident Evil games */
{ R"(\\(rerev|rerev2|re0hd|bhd|re5dx9|BH6)\.exe$)", {{
{ "d3d9.allowDirectBufferMapping", "False" },

View file

@ -8,6 +8,8 @@ util_src = files([
'util_monitor.cpp',
'util_shared_res.cpp',
'thread.cpp',
'com/com_guid.cpp',
'com/com_private_data.cpp',

View file

@ -17,7 +17,7 @@ namespace dxvk::sync {
bool RecursiveSpinlock::try_lock() {
uint32_t threadId = GetCurrentThreadId();
uint32_t threadId = dxvk::this_thread::get_id();
uint32_t expected = 0;
bool status = m_owner.compare_exchange_weak(

27
src/util/thread.cpp Normal file
View file

@ -0,0 +1,27 @@
#include "thread.h"
#include "util_likely.h"
#include <atomic>
#ifndef _WIN32
namespace dxvk::this_thread {
static std::atomic<uint32_t> g_threadCtr = { 0u };
static thread_local uint32_t g_threadId = 0u;
// This implementation returns thread ids unique to the current instance.
// ie. if you use this across multiple .so's then you might get conflicting ids.
//
// This isn't an issue for us, as it is only used by the spinlock implementation,
// but may be for you if you use this elsewhere.
uint32_t get_id() {
if (unlikely(!g_threadId))
g_threadId = ++g_threadCtr;
return g_threadId;
}
}
#endif

View file

@ -4,6 +4,7 @@
#include <condition_variable>
#include <functional>
#include <mutex>
#include <thread>
#include "util_error.h"
@ -14,6 +15,7 @@
namespace dxvk {
#ifdef _WIN32
/**
* \brief Thread priority
*/
@ -147,6 +149,10 @@ namespace dxvk {
inline void yield() {
SwitchToThread();
}
inline uint32_t get_id() {
return uint32_t(GetCurrentThreadId());
}
}
@ -323,4 +329,19 @@ namespace dxvk {
};
#else
using mutex = std::mutex;
using thread = std::thread;
using recursive_mutex = std::recursive_mutex;
using condition_variable = std::condition_variable;
namespace this_thread {
inline void yield() {
std::this_thread::yield();
}
uint32_t get_id();
}
#endif
}

View file

@ -1,5 +1,13 @@
#include <array>
#include <cstdlib>
#include <filesystem>
#include <numeric>
#ifdef __linux__
#include <unistd.h>
#include <limits.h>
#endif
#include "util_env.h"
#include "./com/com_include.h"
@ -7,6 +15,7 @@
namespace dxvk::env {
std::string getEnvVar(const char* name) {
#ifdef _WIN32
std::vector<WCHAR> result;
result.resize(MAX_PATH + 1);
@ -14,6 +23,10 @@ namespace dxvk::env {
result.resize(len);
return str::fromws(result.data());
#else
const char* result = std::getenv(name);
return result ? result : "";
#endif
}
@ -36,7 +49,7 @@ namespace dxvk::env {
std::string getExeName() {
std::string fullPath = getExePath();
auto n = fullPath.find_last_of('\\');
auto n = fullPath.find_last_of(env::PlatformDirSlash);
return (n != std::string::npos)
? fullPath.substr(n + 1)
@ -46,16 +59,19 @@ namespace dxvk::env {
std::string getExeBaseName() {
auto exeName = getExeName();
#ifdef _WIN32
auto extp = matchFileExtension(exeName, "exe");
if (extp != std::string::npos)
exeName.erase(extp);
#endif
return exeName;
}
std::string getExePath() {
#if defined(_WIN32)
std::vector<WCHAR> exePath;
exePath.resize(MAX_PATH + 1);
@ -63,10 +79,18 @@ namespace dxvk::env {
exePath.resize(len);
return str::fromws(exePath.data());
#elif defined(__linux__)
std::array<char, PATH_MAX> exePath = {};
size_t count = readlink("/proc/self/exe", exePath.data(), exePath.size());
return std::string(exePath.begin(), exePath.begin() + count);
#endif
}
void setThreadName(const std::string& name) {
#ifdef _WIN32
using SetThreadDescriptionProc = HRESULT (WINAPI *) (HANDLE, PCWSTR);
static auto proc = reinterpret_cast<SetThreadDescriptionProc>(
@ -77,13 +101,22 @@ namespace dxvk::env {
str::tows(name.c_str(), wideName.data(), wideName.size());
(*proc)(::GetCurrentThread(), wideName.data());
}
#else
std::array<char, 16> posixName = {};
dxvk::str::strlcpy(posixName.data(), name.c_str(), 16);
::pthread_setname_np(pthread_self(), posixName.data());
#endif
}
bool createDirectory(const std::string& path) {
#ifdef _WIN32
WCHAR widePath[MAX_PATH];
str::tows(path.c_str(), widePath);
return !!CreateDirectoryW(widePath, nullptr);
#else
return std::filesystem::create_directories(path);
#endif
}
}

View file

@ -4,6 +4,12 @@
namespace dxvk::env {
#ifdef _WIN32
constexpr char PlatformDirSlash = '\\';
#else
constexpr char PlatformDirSlash = '/';
#endif
/**
* \brief Checks whether the host platform is 32-bit
*/

View file

@ -1,5 +1,6 @@
#pragma once
#include <cstring>
#include <string>
#include <sstream>
#include <vector>
@ -39,5 +40,11 @@ namespace dxvk::str {
format1(stream, args...);
return stream.str();
}
inline void strlcpy(char* dst, const char* src, size_t count) {
std::strncpy(dst, src, count);
if (count > 0)
dst[count - 1] = '\0';
}
}