From e83c08c2db69e8db60e2095fef2b676775ec4b43 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Mon, 3 Mar 2025 12:22:59 +0100 Subject: [PATCH 01/12] [dxvk] Add binding flag for multisampled images --- src/d3d11/d3d11_video.cpp | 2 +- src/dxbc/dxbc_compiler.cpp | 3 ++- src/dxvk/dxvk_pipelayout.cpp | 6 +++++- src/dxvk/dxvk_pipelayout.h | 15 ++++++++------- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/d3d11/d3d11_video.cpp b/src/d3d11/d3d11_video.cpp index e76063111..7522b2df7 100644 --- a/src/d3d11/d3d11_video.cpp +++ b/src/d3d11/d3d11_video.cpp @@ -1333,7 +1333,7 @@ namespace dxvk { SpirvCodeBuffer fsCode(d3d11_video_blit_frag); const std::array fsBindings = {{ - { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, VK_IMAGE_VIEW_TYPE_MAX_ENUM, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_UNIFORM_READ_BIT, VK_TRUE }, + { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, VK_IMAGE_VIEW_TYPE_MAX_ENUM, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_UNIFORM_READ_BIT, DxvkAccessOp::None, true }, { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_IMAGE_VIEW_TYPE_2D, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_SHADER_READ_BIT }, { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 2, VK_IMAGE_VIEW_TYPE_2D, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_SHADER_READ_BIT }, }}; diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index f823f84db..184d15748 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -871,7 +871,7 @@ namespace dxvk { binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; binding.access = VK_ACCESS_UNIFORM_READ_BIT; binding.resourceBinding = bindingId; - binding.uboSet = VK_TRUE; + binding.uboSet = true; m_bindings.push_back(binding); } @@ -1080,6 +1080,7 @@ namespace dxvk { DxvkBindingInfo binding = { }; binding.viewType = typeInfo.vtype; binding.resourceBinding = bindingId; + binding.isMultisampled = typeInfo.ms; if (isUav) { binding.descriptorType = resourceType == DxbcResourceDim::Buffer diff --git a/src/dxvk/dxvk_pipelayout.cpp b/src/dxvk/dxvk_pipelayout.cpp index cf3f69553..e3da3e1b4 100644 --- a/src/dxvk/dxvk_pipelayout.cpp +++ b/src/dxvk/dxvk_pipelayout.cpp @@ -37,7 +37,9 @@ namespace dxvk { && viewType == other.viewType && stage == other.stage && access == other.access - && uboSet == other.uboSet; + && accessOp == other.accessOp + && uboSet == other.uboSet + && isMultisampled == other.isMultisampled; } @@ -48,7 +50,9 @@ namespace dxvk { hash.add(uint32_t(viewType)); hash.add(uint32_t(stage)); hash.add(access); + hash.add(uint32_t(accessOp)); hash.add(uint32_t(uboSet)); + hash.add(uint32_t(isMultisampled)); return hash; } diff --git a/src/dxvk/dxvk_pipelayout.h b/src/dxvk/dxvk_pipelayout.h index 5a3beacfa..fc5485e5c 100644 --- a/src/dxvk/dxvk_pipelayout.h +++ b/src/dxvk/dxvk_pipelayout.h @@ -50,13 +50,14 @@ namespace dxvk { * a given shader, or for the whole pipeline. */ struct DxvkBindingInfo { - VkDescriptorType descriptorType; ///< Vulkan descriptor type - uint32_t resourceBinding; ///< API binding slot for the resource - VkImageViewType viewType; ///< Image view type - VkShaderStageFlagBits stage; ///< Shader stage - VkAccessFlags access; ///< Access mask for the resource - VkBool32 uboSet; ///< Whether to include this in the UBO set - DxvkAccessOp accessOp; ///< Order-invariant store type, if any + VkDescriptorType descriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM; ///< Vulkan descriptor type + uint32_t resourceBinding = 0u; ///< API binding slot for the resource + VkImageViewType viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; ///< Image view type + VkShaderStageFlagBits stage = VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM; ///< Shader stage + VkAccessFlags access = 0u; ///< Access mask for the resource + DxvkAccessOp accessOp = DxvkAccessOp::None; ///< Order-invariant store type, if any + bool uboSet = false; ///< Whether to include this in the UBO set + bool isMultisampled = false; ///< Multisampled binding /** * \brief Computes descriptor set index for the given binding From 5a6aa1ace0eecd6619dc1a34ee80cf8fb8afc49a Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Mon, 3 Mar 2025 23:16:04 +0100 Subject: [PATCH 02/12] [dxvk] Store various image properties inside the view --- src/dxvk/dxvk_image.cpp | 32 +++++++++++++++++++++++----- src/dxvk/dxvk_image.h | 46 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/src/dxvk/dxvk_image.cpp b/src/dxvk/dxvk_image.cpp index c8cb51ff6..e085ff570 100644 --- a/src/dxvk/dxvk_image.cpp +++ b/src/dxvk/dxvk_image.cpp @@ -234,23 +234,30 @@ namespace dxvk { // Self-assignment is possible here if we // just update the image properties + bool invalidateViews = false; m_storage = std::move(resource); if (m_storage != old) { m_imageInfo = m_storage->getImageInfo(); - m_version += 1u; if (unlikely(m_info.debugName)) updateDebugName(); + + invalidateViews = true; } + if ((m_info.access | usageInfo.access) != m_info.access) + invalidateViews = true; + m_info.flags |= usageInfo.flags; m_info.usage |= usageInfo.usage; m_info.stages |= usageInfo.stages; m_info.access |= usageInfo.access; - if (usageInfo.layout != VK_IMAGE_LAYOUT_UNDEFINED) + if (usageInfo.layout != VK_IMAGE_LAYOUT_UNDEFINED) { m_info.layout = usageInfo.layout; + invalidateViews = true; + } if (usageInfo.colorSpace != VK_COLOR_SPACE_MAX_ENUM_KHR) m_info.colorSpace = usageInfo.colorSpace; @@ -266,6 +273,10 @@ namespace dxvk { } m_stableAddress |= usageInfo.stableGpuAddress; + + if (invalidateViews) + m_version += 1u; + return old; } @@ -423,8 +434,9 @@ namespace dxvk { DxvkImageView::DxvkImageView( DxvkImage* image, const DxvkImageViewKey& key) - : m_image(image), m_key(key) { - + : m_image (image), + m_key (key) { + updateProperties(); } @@ -509,6 +521,9 @@ namespace dxvk { void DxvkImageView::updateViews() { + // Latch updated image properties + updateProperties(); + // Update all views that are not currently null for (uint32_t i = 0; i < m_views.size(); i++) { if (m_views[i]) @@ -517,5 +532,12 @@ namespace dxvk { m_version = m_image->m_version; } - + + + void DxvkImageView::updateProperties() { + m_properties.layout = m_image->info().layout; + m_properties.samples = m_image->info().sampleCount; + m_properties.layout = m_image->info().layout; + } + } diff --git a/src/dxvk/dxvk_image.h b/src/dxvk/dxvk_image.h index b303218d6..78b863868 100644 --- a/src/dxvk/dxvk_image.h +++ b/src/dxvk/dxvk_image.h @@ -104,6 +104,18 @@ namespace dxvk { }; + /** + * \brief Image properties stored in the view + * + * Used to reduce some pointer chasing. + */ + struct DxvkImageViewImageProperties { + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM; + VkAccessFlags access = 0u; + }; + + /** * \brief Virtual image view * @@ -272,6 +284,30 @@ namespace dxvk { view->imageSubresources()); } + /** + * \brief Queries the default image layout + * + * Used when binding the view as a descriptor. + * \returns Default image layout + */ + VkImageLayout defaultLayout() const { + return m_properties.layout; + } + + /** + * \brief Checks whether the image is multisampled + * \returns \c true if the image is multisampled + */ + bool isMultisampled() const { + return m_properties.samples > VK_SAMPLE_COUNT_1_BIT; + } + + /** + * \brief Checks whether the image has graphics stores + * \returns \c true if the image has graphics pipeline stores + */ + bool hasGfxStores() const; + private: DxvkImage* m_image = nullptr; @@ -279,12 +315,16 @@ namespace dxvk { uint32_t m_version = 0u; + DxvkImageViewImageProperties m_properties = { }; + std::array m_views = { }; VkImageView createView(VkImageViewType type) const; void updateViews(); + void updateProperties(); + }; @@ -760,4 +800,10 @@ namespace dxvk { return m_views[viewType]; } + + inline bool DxvkImageView::hasGfxStores() const { + return (m_properties.access & VK_ACCESS_SHADER_WRITE_BIT) + && (m_image->hasGfxStores()); + } + } From 5887d6bb90cd4b01642991500a79cd0b4791995b Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 4 Mar 2025 02:05:34 +0100 Subject: [PATCH 03/12] [dxvk] Add flag to avoid creating dedicated allocations --- src/dxvk/dxvk_memory.cpp | 3 +++ src/dxvk/dxvk_memory.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index c31dafb83..b6a61c6d5 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -932,6 +932,9 @@ namespace dxvk { dedicatedRequirements.prefersDedicatedAllocation = VK_TRUE; } + if (!dedicatedRequirements.requiresDedicatedAllocation && allocationInfo.mode.test(DxvkAllocationMode::NoDedicated)) + dedicatedRequirements.prefersDedicatedAllocation = VK_FALSE; + Rc allocation; if (!(createInfo.flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) { diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index bd7f685d1..c432df0f2 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -959,6 +959,8 @@ namespace dxvk { /// If set, the allocation will only succeed if it /// can be suballocated from an existing chunk. NoAllocation = 1, + /// Avoid using a dedicated allocation for this resource + NoDedicated = 2, eFlagEnum }; From 8578538622cda0a30daf521d3db87c81baf36299 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 4 Mar 2025 02:09:26 +0100 Subject: [PATCH 04/12] [dxvk] Add flag to avoid dedicated allocations for short-lived images --- src/dxvk/dxvk_image.cpp | 3 +++ src/dxvk/dxvk_image.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/dxvk/dxvk_image.cpp b/src/dxvk/dxvk_image.cpp index e085ff570..25a330b2d 100644 --- a/src/dxvk/dxvk_image.cpp +++ b/src/dxvk/dxvk_image.cpp @@ -216,6 +216,9 @@ namespace dxvk { allocationInfo.properties = m_properties; allocationInfo.mode = mode; + if (m_info.transient) + allocationInfo.mode.set(DxvkAllocationMode::NoDedicated); + return m_allocator->createImageResource(imageInfo, allocationInfo, sharedMemoryInfo); } diff --git a/src/dxvk/dxvk_image.h b/src/dxvk/dxvk_image.h index 78b863868..b6126bdc7 100644 --- a/src/dxvk/dxvk_image.h +++ b/src/dxvk/dxvk_image.h @@ -63,6 +63,9 @@ namespace dxvk { // to be in its default layout after each submission VkBool32 shared = VK_FALSE; + // Image is likely to have a short lifetime + VkBool32 transient = VK_FALSE; + // Image view formats that can // be used with this image uint32_t viewFormatCount = 0; From 236a8b63292bffafcd3d4b1e91881a2022eb57d5 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 4 Mar 2025 02:11:15 +0100 Subject: [PATCH 05/12] [dxvk] Refactor depth-stencil resolve to also work on color images --- src/dxvk/dxvk_context.cpp | 112 ++++++++++++++++++++++---------------- src/dxvk/dxvk_context.h | 7 ++- 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index ff6c75294..cd41af95a 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -1999,13 +1999,11 @@ namespace dxvk { this->prepareImage(srcImage, vk::makeSubresourceRange(region.srcSubresource)); if (useFb) { - this->resolveImageFb( - dstImage, srcImage, region, VK_FORMAT_UNDEFINED, - depthMode, stencilMode); + this->resolveImageFb(dstImage, srcImage, region, + VK_FORMAT_UNDEFINED, depthMode, stencilMode); } else { - this->resolveImageDs( - dstImage, srcImage, region, - depthMode, stencilMode); + this->resolveImageRp(dstImage, srcImage, region, + VK_FORMAT_UNDEFINED, depthMode, stencilMode); } } @@ -4693,21 +4691,31 @@ namespace dxvk { } - void DxvkContext::resolveImageDs( + void DxvkContext::resolveImageRp( const Rc& dstImage, const Rc& srcImage, const VkImageResolve& region, - VkResolveModeFlagBits depthMode, + VkFormat format, + VkResolveModeFlagBits mode, VkResolveModeFlagBits stencilMode) { auto dstSubresourceRange = vk::makeSubresourceRange(region.dstSubresource); auto srcSubresourceRange = vk::makeSubresourceRange(region.srcSubresource); + bool isDepthStencil = (dstImage->formatInfo()->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); + DxvkImageUsageInfo usageInfo = { }; - usageInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + usageInfo.usage = isDepthStencil + ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT + : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + if (format) { + usageInfo.viewFormatCount = 1u; + usageInfo.viewFormats = &format; + } if (!ensureImageCompatibility(dstImage, usageInfo) || !ensureImageCompatibility(srcImage, usageInfo)) { - Logger::err(str::format("DxvkContext: resolveImageDs: Unsupported images:" + Logger::err(str::format("DxvkContext: resolveImageRp: Unsupported images:" "\n dst format: ", dstImage->info().format, "\n src format: ", srcImage->info().format)); } @@ -4720,42 +4728,54 @@ namespace dxvk { const char* srcName = srcImage->info().debugName; m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer, - vk::makeLabel(0xf0dcdc, str::format("Resolve DS (", + vk::makeLabel(0xf0dcdc, str::format("Resolve pass (", dstName ? dstName : "unknown", ", ", srcName ? srcName : "unknown", ")").c_str())); } - // Transition both images to usable layouts if necessary. For the source image we - // can be fairly leniet since writable layouts are allowed for resolve attachments. - VkImageLayout dstLayout = dstImage->pickLayout(VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + // Transition both images to usable layouts if necessary. For the source image + // we can be fairly lenient when dealing with writable depth-stencil layouts. + VkImageLayout writableLayout = isDepthStencil + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkImageLayout dstLayout = dstImage->pickLayout(writableLayout); VkImageLayout srcLayout = srcImage->info().layout; if (srcLayout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && srcLayout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) - srcLayout = srcImage->pickLayout(VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + srcLayout = srcImage->pickLayout(writableLayout); - addImageLayoutTransition(*srcImage, srcSubresourceRange, srcLayout, - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT, false); - addImageLayoutTransition(*dstImage, dstSubresourceRange, dstLayout, - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, true); + VkPipelineStageFlags2 stages = isDepthStencil + ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT + : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + + VkAccessFlags2 srcAccess = isDepthStencil + ? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT + : VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + + VkAccessFlags2 dstAccess = isDepthStencil + ? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT + : VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + + addImageLayoutTransition(*srcImage, srcSubresourceRange, srcLayout, stages, srcAccess, false); + addImageLayoutTransition(*dstImage, dstSubresourceRange, dstLayout, stages, dstAccess, true); flushImageLayoutTransitions(DxvkCmdBuffer::ExecBuffer); // Create a pair of views for the attachment resolve DxvkMetaResolveViews views(dstImage, region.dstSubresource, srcImage, region.srcSubresource, dstImage->info().format); - VkRenderingAttachmentInfo depthAttachment = { VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO }; - depthAttachment.imageView = views.srcView->handle(); - depthAttachment.imageLayout = srcLayout; - depthAttachment.resolveMode = depthMode; - depthAttachment.resolveImageView = views.dstView->handle(); - depthAttachment.resolveImageLayout = dstLayout; - depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + VkRenderingAttachmentInfo attachment = { VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO }; + attachment.imageView = views.srcView->handle(); + attachment.imageLayout = srcLayout; + attachment.resolveMode = mode; + attachment.resolveImageView = views.dstView->handle(); + attachment.resolveImageLayout = dstLayout; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - VkRenderingAttachmentInfo stencilAttachment = depthAttachment; + VkRenderingAttachmentInfo stencilAttachment = attachment; stencilAttachment.resolveMode = stencilMode; VkExtent3D extent = dstImage->mipLevelExtent(region.dstSubresource.mipLevel); @@ -4765,29 +4785,25 @@ namespace dxvk { renderingInfo.renderArea.extent = VkExtent2D { extent.width, extent.height }; renderingInfo.layerCount = region.dstSubresource.layerCount; - if (dstImage->formatInfo()->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) - renderingInfo.pDepthAttachment = &depthAttachment; + if (isDepthStencil) { + if (dstImage->formatInfo()->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) + renderingInfo.pDepthAttachment = &attachment; - if (dstImage->formatInfo()->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) - renderingInfo.pStencilAttachment = &stencilAttachment; + if (dstImage->formatInfo()->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) + renderingInfo.pStencilAttachment = &stencilAttachment; + } else { + renderingInfo.colorAttachmentCount = 1u; + renderingInfo.pColorAttachments = &attachment; + } m_cmd->cmdBeginRendering(&renderingInfo); m_cmd->cmdEndRendering(); - // Add barriers for the resolve operation - accessImage(DxvkCmdBuffer::ExecBuffer, - *srcImage, srcSubresourceRange, srcLayout, - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT, - DxvkAccessOp::None); - - accessImage(DxvkCmdBuffer::ExecBuffer, - *dstImage, dstSubresourceRange, dstLayout, - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - DxvkAccessOp::None); + // Add barriers for the render pass resolve + accessImage(DxvkCmdBuffer::ExecBuffer, *srcImage, srcSubresourceRange, + srcLayout, stages, srcAccess, DxvkAccessOp::None); + accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, dstSubresourceRange, + dstLayout, stages, dstAccess, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 81cb14bf7..c8bec80ef 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -1616,13 +1616,14 @@ namespace dxvk { const Rc& srcImage, const VkImageResolve& region); - void resolveImageDs( + void resolveImageRp( const Rc& dstImage, const Rc& srcImage, const VkImageResolve& region, - VkResolveModeFlagBits depthMode, + VkFormat format, + VkResolveModeFlagBits mode, VkResolveModeFlagBits stencilMode); - + void resolveImageFb( const Rc& dstImage, const Rc& srcImage, From ad777223526ca86dda042e2cc1c62721c61622c7 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 4 Mar 2025 03:15:15 +0100 Subject: [PATCH 06/12] [dxvk] Add implicit resolve when app tries to sample multisampled image --- src/dxvk/dxvk_context.cpp | 128 ++++++++++++++++---- src/dxvk/dxvk_context.h | 8 +- src/dxvk/dxvk_implicit_resolve.cpp | 182 +++++++++++++++++++++++++++++ src/dxvk/dxvk_implicit_resolve.h | 108 +++++++++++++++++ src/dxvk/dxvk_sparse.h | 10 ++ src/dxvk/meson.build | 1 + 6 files changed, 415 insertions(+), 22 deletions(-) create mode 100644 src/dxvk/dxvk_implicit_resolve.cpp create mode 100644 src/dxvk/dxvk_implicit_resolve.h diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index cd41af95a..3f4544237 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -15,7 +16,8 @@ namespace dxvk { m_initAcquires(DxvkCmdBuffer::InitBarriers), m_initBarriers(DxvkCmdBuffer::InitBuffer), m_execBarriers(DxvkCmdBuffer::ExecBuffer), - m_queryManager(m_common->queryPool()) { + m_queryManager(m_common->queryPool()), + m_implicitResolves(device) { // Init framebuffer info with default render pass in case // the app does not explicitly bind any render targets m_state.om.framebufferInfo = makeFramebufferInfo(m_state.om.renderTargets); @@ -81,6 +83,8 @@ namespace dxvk { this->endCurrentCommands(); this->relocateQueuedResources(); + m_implicitResolves.cleanup(m_trackingId); + if (m_descriptorPool->shouldSubmit(false)) { m_cmd->trackDescriptorPool(m_descriptorPool, m_descriptorManager); m_descriptorPool = m_descriptorManager->getDescriptorPool(); @@ -418,8 +422,16 @@ namespace dxvk { clearRect.layerCount = imageView->info().layerCount; m_cmd->cmdClearAttachments(1, &clearInfo, 1, &clearRect); - } else + } else { this->deferClear(imageView, clearAspects, clearValue); + } + + if (imageView->isMultisampled()) { + auto subresources = imageView->imageSubresources(); + subresources.aspectMask = clearAspects; + + m_implicitResolves.invalidate(*imageView->image(), subresources); + } } @@ -440,6 +452,13 @@ namespace dxvk { this->clearImageViewFb(imageView, offset, extent, aspect, value); else if (viewUsage & VK_IMAGE_USAGE_STORAGE_BIT) this->clearImageViewCs(imageView, offset, extent, value); + + if (imageView->isMultisampled()) { + auto subresources = imageView->imageSubresources(); + subresources.aspectMask = aspect; + + m_implicitResolves.invalidate(*imageView->image(), subresources); + } } @@ -579,6 +598,9 @@ namespace dxvk { srcImage, srcSubresource, srcOffset, extent); } + + if (dstImage->info().sampleCount > VK_SAMPLE_COUNT_1_BIT) + m_implicitResolves.invalidate(*dstImage, vk::makeSubresourceRange(dstSubresource)); } @@ -5659,8 +5681,19 @@ namespace dxvk { m_cmd->cmdClearAttachments(lateClearCount, lateClears.data(), 1, &clearRect); } - for (uint32_t i = 0; i < framebufferInfo.numAttachments(); i++) - m_cmd->track(framebufferInfo.getAttachment(i).view->image(), DxvkAccess::Write); + for (uint32_t i = 0; i < framebufferInfo.numAttachments(); i++) { + const auto& attachment = framebufferInfo.getAttachment(i); + m_cmd->track(attachment.view->image(), DxvkAccess::Write); + + if (attachment.view->isMultisampled()) { + VkImageSubresourceRange subresources = attachment.view->imageSubresources(); + + if (subresources.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) + subresources.aspectMask = vk::getWritableAspectsForLayout(attachment.layout); + + m_implicitResolves.invalidate(*attachment.view->image(), subresources); + } + } m_cmd->addStatCtr(DxvkStatCounter::CmdRenderPassCount, 1u); } @@ -6108,14 +6141,24 @@ namespace dxvk { viewHandle = res.imageView->handle(binding.viewType); if (viewHandle) { - descriptorInfo.image.sampler = VK_NULL_HANDLE; - descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (likely(!res.imageView->isMultisampled() || binding.isMultisampled)) { + descriptorInfo.image.sampler = VK_NULL_HANDLE; + descriptorInfo.image.imageView = viewHandle; + descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) - accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); - m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.imageView->image(), DxvkAccess::Read); + } else { + auto view = m_implicitResolves.getResolveView(*res.imageView, m_trackingId); + + descriptorInfo.image.sampler = VK_NULL_HANDLE; + descriptorInfo.image.imageView = view->handle(binding.viewType); + descriptorInfo.image.imageLayout = view->image()->info().layout; + + m_cmd->track(view->image(), DxvkAccess::Read); + } } else { descriptorInfo.image.sampler = VK_NULL_HANDLE; descriptorInfo.image.imageView = VK_NULL_HANDLE; @@ -6157,15 +6200,26 @@ namespace dxvk { viewHandle = res.imageView->handle(binding.viewType); if (viewHandle) { - descriptorInfo.image.sampler = res.sampler->handle(); - descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (likely(!res.imageView->isMultisampled() || binding.isMultisampled)) { + descriptorInfo.image.sampler = res.sampler->handle(); + descriptorInfo.image.imageView = viewHandle; + descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) - accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); - m_cmd->track(res.sampler); - m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.sampler); + } else { + auto view = m_implicitResolves.getResolveView(*res.imageView, m_trackingId); + + descriptorInfo.image.sampler = res.sampler->handle(); + descriptorInfo.image.imageView = view->handle(binding.viewType); + descriptorInfo.image.imageLayout = view->image()->info().layout; + + m_cmd->track(view->image(), DxvkAccess::Read); + m_cmd->track(res.sampler); + } } else { descriptorInfo.image.sampler = m_common->dummyResources().samplerHandle(); descriptorInfo.image.imageView = VK_NULL_HANDLE; @@ -6818,7 +6872,8 @@ namespace dxvk { &m_state.pc.data[pushConstRange.offset]); } - + + template bool DxvkContext::commitComputeState() { this->spillRenderPass(false); @@ -6840,9 +6895,15 @@ namespace dxvk { if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) this->beginBarrierControlDebugRegion(); - if (m_descriptorState.hasDirtyComputeSets()) + if (m_descriptorState.hasDirtyComputeSets()) { this->updateComputeShaderResources(); + if (unlikely(Resolve && m_implicitResolves.hasPendingResolves())) { + this->flushImplicitResolves(); + return this->commitComputeState(); + } + } + if (m_flags.test(DxvkContextFlag::DirtyPushConstants)) this->updatePushConstants(); @@ -6850,7 +6911,7 @@ namespace dxvk { } - template + template bool DxvkContext::commitGraphicsState() { if (m_flags.test(DxvkContextFlag::GpDirtyPipeline)) { if (unlikely(!this->updateGraphicsPipeline())) @@ -6910,8 +6971,18 @@ namespace dxvk { return false; } - if (m_descriptorState.hasDirtyGraphicsSets()) + if (m_descriptorState.hasDirtyGraphicsSets()) { this->updateGraphicsShaderResources(); + + if (unlikely(Resolve && m_implicitResolves.hasPendingResolves())) { + // If implicit resolves are required for any of the shader bindings, we need + // to discard all the state setup that we've done so far and try again + this->spillRenderPass(true); + this->flushImplicitResolves(); + + return this->commitGraphicsState(); + } + } if (m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasTransformFeedback)) this->updateTransformFeedbackState(); @@ -7665,6 +7736,21 @@ namespace dxvk { } + void DxvkContext::flushImplicitResolves() { + spillRenderPass(true); + + DxvkImplicitResolveOp op; + + while (m_implicitResolves.extractResolve(op)) { + prepareImage(op.inputImage, vk::makeSubresourceRange(op.resolveRegion.srcSubresource)); + prepareImage(op.resolveImage, vk::makeSubresourceRange(op.resolveRegion.dstSubresource)); + + resolveImageRp(op.resolveImage, op.inputImage, op.resolveRegion, + op.resolveFormat, op.resolveMode, op.resolveMode); + } + } + + void DxvkContext::beginCurrentCommands() { beginActiveDebugRegions(); diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index c8bec80ef..86fe31bdc 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -4,6 +4,7 @@ #include "dxvk_bind_mask.h" #include "dxvk_cmdlist.h" #include "dxvk_context_state.h" +#include "dxvk_implicit_resolve.h" #include "dxvk_latency.h" #include "dxvk_objects.h" #include "dxvk_queue.h" @@ -1461,6 +1462,8 @@ namespace dxvk { uint64_t m_latencyFrameId = 0u; bool m_endLatencyTracking = false; + DxvkImplicitResolveTracker m_implicitResolves; + void blitImageFb( Rc dstView, const VkOffset3D* dstOffsets, @@ -1776,9 +1779,10 @@ namespace dxvk { template void updatePushConstants(); + template bool commitComputeState(); - template + template bool commitGraphicsState(); template @@ -1876,6 +1880,8 @@ namespace dxvk { void resizeDescriptorArrays( uint32_t bindingCount); + void flushImplicitResolves(); + void beginCurrentCommands(); void endCurrentCommands(); diff --git a/src/dxvk/dxvk_implicit_resolve.cpp b/src/dxvk/dxvk_implicit_resolve.cpp new file mode 100644 index 000000000..abaa92d12 --- /dev/null +++ b/src/dxvk/dxvk_implicit_resolve.cpp @@ -0,0 +1,182 @@ +#include + +#include "dxvk_device.h" +#include "dxvk_implicit_resolve.h" + +namespace dxvk { + + DxvkImplicitResolveTracker::DxvkImplicitResolveTracker(Rc device) + : m_device(std::move(device)) { + + } + + + DxvkImplicitResolveTracker::~DxvkImplicitResolveTracker() { + + } + + + Rc DxvkImplicitResolveTracker::getResolveView( + DxvkImageView& view, + uint64_t trackingId) { + // We generally only expect to have one or two views at most in games + // that hit this path at all, so iterating over the arras is fine + for (auto& v : m_resolveViews) { + if (v.inputView == &view) { + addResolveOp(v); + return v.resolveView; + } + } + + // Create a new resolve image with only the array layers covered by the + // input view. We expect resolve images to be somewhat short-lived. + DxvkImageCreateInfo imageInfo = view.image()->info(); + + DxvkImageCreateInfo resolveInfo = { }; + resolveInfo.type = imageInfo.type; + resolveInfo.format = view.info().format; + resolveInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + resolveInfo.extent = imageInfo.extent; + resolveInfo.numLayers = view.info().layerCount; + resolveInfo.mipLevels = 1u; + resolveInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + resolveInfo.stages = m_device->getShaderPipelineStages(); + resolveInfo.access = VK_ACCESS_SHADER_READ_BIT; + resolveInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + resolveInfo.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + resolveInfo.transient = VK_TRUE; + resolveInfo.debugName = "Resolve image"; + + if (view.info().aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + resolveInfo.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + resolveInfo.stages |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + resolveInfo.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } else { + resolveInfo.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + resolveInfo.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + resolveInfo.access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + Rc image = m_device->createImage(resolveInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + cleanup(image->getMemoryInfo().size, trackingId); + + DxvkImageViewKey viewKey = view.info(); + viewKey.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + viewKey.layerIndex = 0u; + + auto& resolveView = m_resolveViews.emplace_back(); + resolveView.inputView = &view; + resolveView.resolveView = image->createView(viewKey); + + addResolveOp(resolveView); + + return resolveView.resolveView; + } + + + bool DxvkImplicitResolveTracker::extractResolve( + DxvkImplicitResolveOp& resolve) { + if (m_resolveOps.empty()) { + resolve = DxvkImplicitResolveOp(); + return false; + } + + resolve = std::move(m_resolveOps.back()); + m_resolveOps.pop_back(); + return true; + } + + + void DxvkImplicitResolveTracker::invalidate( + const DxvkImage& image, + const VkImageSubresourceRange& subresources) { + for (auto& v : m_resolveViews) { + if (v.resolveDone && v.inputView->image() == &image) { + auto viewSubresource = v.inputView->imageSubresources(); + + if ((subresources.aspectMask & viewSubresource.aspectMask) + && vk::checkSubresourceRangeOverlap(viewSubresource, subresources)) + v.resolveDone = false; + } + } + } + + + void DxvkImplicitResolveTracker::cleanup( + uint64_t trackingId) { + cleanup(0u, trackingId); + } + + + void DxvkImplicitResolveTracker::addResolveOp( + DxvkImplicitResolveView& view) { + if (view.resolveDone) + return; + + // Determine resolve parameters based on the view format rather than the + // image format, since this will more likely represent what the app is + // trying to do + auto format = view.inputView->formatInfo(); + + auto& op = m_resolveOps.emplace_back(); + op.inputImage = view.inputView->image(); + op.resolveImage = view.resolveView->image(); + op.resolveRegion.srcSubresource = vk::pickSubresourceLayers(view.inputView->imageSubresources(), 0u); + op.resolveRegion.srcSubresource.aspectMask = format->aspectMask; + op.resolveRegion.dstSubresource = vk::pickSubresourceLayers(view.resolveView->imageSubresources(), 0u); + op.resolveRegion.dstSubresource.aspectMask = format->aspectMask; + op.resolveRegion.dstSubresource.baseArrayLayer = 0u; + op.resolveRegion.extent = view.resolveView->mipLevelExtent(0u); + op.resolveFormat = view.inputView->info().format; + op.resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; + + if ((format->flags.any(DxvkFormatFlag::SampledSInt, DxvkFormatFlag::SampledUInt) + || (format->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)))) + op.resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + + view.resolveDone = true; + } + + + void DxvkImplicitResolveTracker::cleanup( + VkDeviceSize allocationSize, + uint64_t trackingId) { + constexpr VkDeviceSize MaxMemory = 64ull << 20u; + + constexpr uint64_t MaxLifetime = 256u; + constexpr uint64_t MinLifetime = 16u; + + // Eliminate images that haven't been used in a long time + for (auto i = m_resolveViews.begin(); i != m_resolveViews.end(); ) { + if (i->resolveView->image()->getTrackId() + MaxLifetime < trackingId) { + i = m_resolveViews.erase(i); + } else { + allocationSize += i->resolveView->image()->getMemoryInfo().size; + i++; + } + } + + // If we're using a large amount of memory for resolve images, eliminate + // the least recently used resolve images until we drop below the size + // threshold again. + while (allocationSize > MaxMemory) { + auto lr = m_resolveViews.end(); + + for (auto i = m_resolveViews.begin(); i != m_resolveViews.end(); i++) { + if (i->resolveView->image()->getTrackId() + MinLifetime < trackingId) { + if (lr == m_resolveViews.end() + || lr->resolveView->image()->getTrackId() > i->resolveView->image()->getTrackId()) + lr = i; + } + } + + if (lr == m_resolveViews.end()) + break; + + allocationSize -= lr->resolveView->image()->getMemoryInfo().size; + m_resolveViews.erase(lr); + } + } + +} diff --git a/src/dxvk/dxvk_implicit_resolve.h b/src/dxvk/dxvk_implicit_resolve.h new file mode 100644 index 000000000..1f9f695b2 --- /dev/null +++ b/src/dxvk/dxvk_implicit_resolve.h @@ -0,0 +1,108 @@ +#pragma once + +#include + +#include "dxvk_image.h" + +#include "../util/util_small_vector.h" + +namespace dxvk { + + struct DxvkImplicitResolveView { + Rc inputView = nullptr; + Rc resolveView = nullptr; + bool resolveDone = false; + }; + + + struct DxvkImplicitResolveOp { + Rc inputImage = nullptr; + Rc resolveImage = nullptr; + VkImageResolve resolveRegion = { }; + VkFormat resolveFormat = VK_FORMAT_UNDEFINED; + VkResolveModeFlagBits resolveMode = VK_RESOLVE_MODE_NONE; + }; + + + class DxvkDevice; + + class DxvkImplicitResolveTracker { + + public: + + DxvkImplicitResolveTracker(Rc device); + + ~DxvkImplicitResolveTracker(); + + /** + * \brief Checks whether there are pending resolves + * + * \returns \c true if any there are any resolves that must + * be executed prior to submitting the current draw. + */ + bool hasPendingResolves() const { + return !m_resolveOps.empty(); + } + + /** + * \brief Retrieves resolve image view for a given input view + * + * \param [in] view Multisampled view bound to the context + * \returns Non-multisampled view to replace the bound view with + */ + Rc getResolveView( + DxvkImageView& view, + uint64_t trackingId); + + /** + * \brief Extracts a resolve operation to execute + * + * \param [out] resolve Extracted resolve parameters + * \returns \c true if a resolve was extracted, \c false + * if all resolves have already been processed. + */ + bool extractResolve( + DxvkImplicitResolveOp& resolve); + + /** + * \brief Invalidates resolve cache for a given set of image subresources + * + * Must be called any time the given set of subresources of this + * resource is written, so that the corresponding resolve image + * can get updated the next time it is read. Must not be called + * for any subresource that is only being read, since that may + * cause problems with read-only depth-stencil access. + * \param [in] image The multisampled image + * \param [in] subresources Image subresources written + */ + void invalidate( + const DxvkImage& image, + const VkImageSubresourceRange& subresources); + + /** + * \brief Cleans up resolve image cache + * + * Destroys resolve images that have not been used in a while + * in order to reduce memory wasted on unused images. + * \param [in] trackingId Current context command list ID + */ + void cleanup( + uint64_t trackingId); + + private: + + Rc m_device; + + std::vector m_resolveViews; + std::vector m_resolveOps; + + void addResolveOp( + DxvkImplicitResolveView& view); + + void cleanup( + VkDeviceSize allocationSize, + uint64_t trackingId); + + }; + +} diff --git a/src/dxvk/dxvk_sparse.h b/src/dxvk/dxvk_sparse.h index e9f6f919f..6897ea9dd 100644 --- a/src/dxvk/dxvk_sparse.h +++ b/src/dxvk/dxvk_sparse.h @@ -535,6 +535,16 @@ namespace dxvk { return Rc::unsafeCreate(this); } + /** + * \brief Queries tracking ID + * + * Used to determine when a resource has last been used. + * \returns Tracking ID + */ + uint64_t getTrackId() const { + return m_trackId >> 1u; + } + /** * \brief Sets tracked command list ID * diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index 9b2b07356..31ca70406 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -89,6 +89,7 @@ dxvk_src = [ 'dxvk_gpu_query.cpp', 'dxvk_graphics.cpp', 'dxvk_image.cpp', + 'dxvk_implicit_resolve.cpp', 'dxvk_instance.cpp', 'dxvk_latency_builtin.cpp', 'dxvk_latency_reflex.cpp', From 72d67435fb3dd1750f19f8b21c4beffd356142e6 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 4 Mar 2025 18:43:31 +0100 Subject: [PATCH 07/12] [dxvk] Use latched image view properties when possible Reduces pointer chasing. --- src/dxvk/dxvk_context.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 3f4544237..005c22733 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -6144,9 +6144,9 @@ namespace dxvk { if (likely(!res.imageView->isMultisampled() || binding.isMultisampled)) { descriptorInfo.image.sampler = VK_NULL_HANDLE; descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + descriptorInfo.image.imageLayout = res.imageView->defaultLayout(); - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->hasGfxStores())) accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); m_cmd->track(res.imageView->image(), DxvkAccess::Read); @@ -6155,7 +6155,7 @@ namespace dxvk { descriptorInfo.image.sampler = VK_NULL_HANDLE; descriptorInfo.image.imageView = view->handle(binding.viewType); - descriptorInfo.image.imageLayout = view->image()->info().layout; + descriptorInfo.image.imageLayout = view->defaultLayout(); m_cmd->track(view->image(), DxvkAccess::Read); } @@ -6177,9 +6177,9 @@ namespace dxvk { if (viewHandle) { descriptorInfo.image.sampler = VK_NULL_HANDLE; descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + descriptorInfo.image.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || res.imageView->image()->hasGfxStores()) + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || res.imageView->hasGfxStores()) accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, binding.accessOp); m_cmd->track(res.imageView->image(), (binding.access & vk::AccessWriteMask) @@ -6203,9 +6203,9 @@ namespace dxvk { if (likely(!res.imageView->isMultisampled() || binding.isMultisampled)) { descriptorInfo.image.sampler = res.sampler->handle(); descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + descriptorInfo.image.imageLayout = res.imageView->defaultLayout(); - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->hasGfxStores())) accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); m_cmd->track(res.imageView->image(), DxvkAccess::Read); @@ -6215,7 +6215,7 @@ namespace dxvk { descriptorInfo.image.sampler = res.sampler->handle(); descriptorInfo.image.imageView = view->handle(binding.viewType); - descriptorInfo.image.imageLayout = view->image()->info().layout; + descriptorInfo.image.imageLayout = view->defaultLayout(); m_cmd->track(view->image(), DxvkAccess::Read); m_cmd->track(res.sampler); @@ -7052,7 +7052,7 @@ namespace dxvk { case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { if (slot.imageView) { - if (!IsGraphics || slot.imageView->image()->hasGfxStores()) + if (!IsGraphics || slot.imageView->hasGfxStores()) requiresBarrier |= checkImageViewBarrier(slot.imageView, binding.access, binding.accessOp); else if (binding.access & vk::AccessWriteMask) requiresBarrier |= !slot.imageView->image()->trackGfxStores(); @@ -7061,7 +7061,7 @@ namespace dxvk { case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { - if (slot.imageView && (!IsGraphics || slot.imageView->image()->hasGfxStores())) + if (slot.imageView && (!IsGraphics || slot.imageView->hasGfxStores())) requiresBarrier |= checkImageViewBarrier(slot.imageView, binding.access, DxvkAccessOp::None); } break; From 71a85d26612096b599b5d5398e42796fef34cc19 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 5 Mar 2025 00:07:09 +0100 Subject: [PATCH 08/12] [dxvk] Always prefer render pass resolves Current AMDVLK has faster render pass resolves than what we can do with fmask, and our render pass resolve path can deal with all types of color images now if we pass the correct resolve mode. Keep the fb path for non-standard depth resolve modes as well as swapchain blits with composition, but otherwise there is no reason to use this path by default anymore. --- src/dxvk/dxvk_context.cpp | 29 +++++++++++++++++------------ src/dxvk/dxvk_device.cpp | 3 --- src/dxvk/dxvk_device.h | 1 - 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 005c22733..210526438 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -1950,22 +1950,27 @@ namespace dxvk { this->prepareImage(dstImage, vk::makeSubresourceRange(region.dstSubresource)); this->prepareImage(srcImage, vk::makeSubresourceRange(region.srcSubresource)); - bool useFb = srcImage->info().format != format + auto formatInfo = lookupFormatInfo(format); + + bool useRp = srcImage->info().format != format || dstImage->info().format != format; - if (m_device->perfHints().preferFbResolve) { - useFb |= (dstImage->info().usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) - && (srcImage->info().usage & VK_IMAGE_USAGE_SAMPLED_BIT); - } + useRp |= (srcImage->info().usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + && (dstImage->info().usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - if (!useFb) { - this->resolveImageHw( - dstImage, srcImage, region); + if (useRp) { + // Work out resolve mode based on format properties. For color images, + // we must use AVERAGE unless the resolve uses an integer format. + VkResolveModeFlagBits mode = VK_RESOLVE_MODE_AVERAGE_BIT; + + if (formatInfo->flags.any(DxvkFormatFlag::SampledSInt, DxvkFormatFlag::SampledUInt) + || (formatInfo->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) + mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + + this->resolveImageRp(dstImage, srcImage, region, + format, mode, mode); } else { - this->resolveImageFb( - dstImage, srcImage, region, format, - VK_RESOLVE_MODE_NONE, - VK_RESOLVE_MODE_NONE); + this->resolveImageHw(dstImage, srcImage, region); } } diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index 5a4999942..e0f04e74d 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -427,9 +427,6 @@ namespace dxvk { && (m_adapter->matchesDriver(VK_DRIVER_ID_MESA_RADV_KHR) || m_adapter->matchesDriver(VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR) || m_adapter->matchesDriver(VK_DRIVER_ID_AMD_PROPRIETARY_KHR)); - hints.preferFbResolve = m_features.amdShaderFragmentMask - && (m_adapter->matchesDriver(VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR) - || m_adapter->matchesDriver(VK_DRIVER_ID_AMD_PROPRIETARY_KHR)); // Older Nvidia drivers sometimes use the wrong format // to interpret the clear color in render pass clears. diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index 9142a739d..0a1e622e5 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -36,7 +36,6 @@ namespace dxvk { */ struct DxvkDevicePerfHints { VkBool32 preferFbDepthStencilCopy : 1; - VkBool32 preferFbResolve : 1; VkBool32 renderPassClearFormatBug : 1; VkBool32 preferRenderPassOps : 1; VkBool32 preferPrimaryCmdBufs : 1; From 1e4b39ad4d0ecaa86803f7dedad2530e46543ace Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 5 Mar 2025 11:05:21 +0100 Subject: [PATCH 09/12] [dxvk] Refactor resolve shaders Removes some code duplication while adding support for non-AVERAGE resolves for plain color images. Also nuke the AMD path since it is no longer used. --- src/dxvk/dxvk_meta_resolve.cpp | 5 +- src/dxvk/meson.build | 1 - src/dxvk/shaders/dxvk_resolve_common.glsl | 35 ++++++++ src/dxvk/shaders/dxvk_resolve_frag_d.frag | 50 +++--------- src/dxvk/shaders/dxvk_resolve_frag_ds.frag | 79 ++++--------------- src/dxvk/shaders/dxvk_resolve_frag_f.frag | 17 ++-- src/dxvk/shaders/dxvk_resolve_frag_f_amd.frag | 56 ------------- src/dxvk/shaders/dxvk_resolve_frag_i.frag | 16 +++- src/dxvk/shaders/dxvk_resolve_frag_u.frag | 18 ++++- 9 files changed, 104 insertions(+), 173 deletions(-) create mode 100644 src/dxvk/shaders/dxvk_resolve_common.glsl delete mode 100644 src/dxvk/shaders/dxvk_resolve_frag_f_amd.frag diff --git a/src/dxvk/dxvk_meta_resolve.cpp b/src/dxvk/dxvk_meta_resolve.cpp index 7362dc4c4..be8b0be6f 100644 --- a/src/dxvk/dxvk_meta_resolve.cpp +++ b/src/dxvk/dxvk_meta_resolve.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -52,9 +51,7 @@ namespace dxvk { DxvkMetaResolveObjects::DxvkMetaResolveObjects(const DxvkDevice* device) : m_vkd (device->vkd()), - m_shaderFragF (device->features().amdShaderFragmentMask - ? createShaderModule(dxvk_resolve_frag_f_amd) - : createShaderModule(dxvk_resolve_frag_f)), + m_shaderFragF (createShaderModule(dxvk_resolve_frag_f)), m_shaderFragU (createShaderModule(dxvk_resolve_frag_u)), m_shaderFragI (createShaderModule(dxvk_resolve_frag_i)), m_shaderFragD (createShaderModule(dxvk_resolve_frag_d)) { diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index 31ca70406..b6222ddd3 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -50,7 +50,6 @@ dxvk_shaders = files([ 'shaders/dxvk_resolve_frag_d.frag', 'shaders/dxvk_resolve_frag_ds.frag', 'shaders/dxvk_resolve_frag_f.frag', - 'shaders/dxvk_resolve_frag_f_amd.frag', 'shaders/dxvk_resolve_frag_i.frag', 'shaders/dxvk_resolve_frag_u.frag', diff --git a/src/dxvk/shaders/dxvk_resolve_common.glsl b/src/dxvk/shaders/dxvk_resolve_common.glsl new file mode 100644 index 000000000..3183c9337 --- /dev/null +++ b/src/dxvk/shaders/dxvk_resolve_common.glsl @@ -0,0 +1,35 @@ +#define VK_RESOLVE_MODE_NONE (0) +#define VK_RESOLVE_MODE_SAMPLE_ZERO_BIT (1 << 0) +#define VK_RESOLVE_MODE_AVERAGE_BIT (1 << 1) +#define VK_RESOLVE_MODE_MIN_BIT (1 << 2) +#define VK_RESOLVE_MODE_MAX_BIT (1 << 3) + +#define resolve_fn(name, type, load_fn) \ +type name(ivec3 coord, int samples, uint mode) { \ + if (mode == VK_RESOLVE_MODE_NONE) \ + return type(0); \ + type value = load_fn(coord, 0); \ + \ + switch (mode) { \ + case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: \ + return value; \ + \ + case VK_RESOLVE_MODE_AVERAGE_BIT: \ + for (int i = 1; i < samples; i++) \ + value += load_fn(coord, i); \ + value /= type(c_samples); \ + break; \ + \ + case VK_RESOLVE_MODE_MIN_BIT: \ + for (int i = 1; i < samples; i++) \ + value = min(value, load_fn(coord, i)); \ + break; \ + \ + case VK_RESOLVE_MODE_MAX_BIT: \ + for (int i = 1; i < c_samples; i++) \ + value = min(value, load_fn(coord, i)); \ + break; \ + } \ + \ + return value; \ +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_d.frag b/src/dxvk/shaders/dxvk_resolve_frag_d.frag index 70152d014..a0b91ec6d 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_d.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_d.frag @@ -1,56 +1,28 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_EXT_samplerless_texture_functions : enable -#define VK_RESOLVE_MODE_NONE (0) -#define VK_RESOLVE_MODE_SAMPLE_ZERO_BIT (1 << 0) -#define VK_RESOLVE_MODE_AVERAGE_BIT (1 << 1) -#define VK_RESOLVE_MODE_MIN_BIT (1 << 2) -#define VK_RESOLVE_MODE_MAX_BIT (1 << 3) +#include "dxvk_resolve_common.glsl" layout(constant_id = 0) const int c_samples = 1; -layout(constant_id = 1) const int c_mode_d = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; +layout(constant_id = 1) const int c_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; layout(binding = 0) uniform texture2DMSArray s_depth; +float load_depth(ivec3 coord, int s) { + return texelFetch(s_depth, coord, s).r; +} + +resolve_fn(resolve_depth, float, load_depth) + layout(push_constant) uniform u_info_t { ivec2 offset; } u_info; -float resolve_depth(ivec3 coord) { - float depth = 0.0f; - - switch (c_mode_d) { - case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: - depth = texelFetch(s_depth, coord, 0).r; - break; - - case VK_RESOLVE_MODE_AVERAGE_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth += texelFetch(s_depth, coord, i).r; - depth /= float(c_samples); - break; - - case VK_RESOLVE_MODE_MIN_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth = min(depth, texelFetch(s_depth, coord, i).r); - break; - - case VK_RESOLVE_MODE_MAX_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth = max(depth, texelFetch(s_depth, coord, i).r); - break; - } - - return depth; -} - void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - gl_FragDepth = resolve_depth(coord); -} \ No newline at end of file + gl_FragDepth = resolve_depth(coord, c_samples, c_mode); +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_ds.frag b/src/dxvk/shaders/dxvk_resolve_frag_ds.frag index b92a03d71..a7bda7c93 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_ds.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_ds.frag @@ -1,13 +1,10 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_ARB_shader_stencil_export : enable #extension GL_EXT_samplerless_texture_functions : enable -#define VK_RESOLVE_MODE_NONE (0) -#define VK_RESOLVE_MODE_SAMPLE_ZERO_BIT (1 << 0) -#define VK_RESOLVE_MODE_AVERAGE_BIT (1 << 1) -#define VK_RESOLVE_MODE_MIN_BIT (1 << 2) -#define VK_RESOLVE_MODE_MAX_BIT (1 << 3) +#include "dxvk_resolve_common.glsl" layout(constant_id = 0) const int c_samples = 1; layout(constant_id = 1) const int c_mode_d = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; @@ -16,69 +13,25 @@ layout(constant_id = 2) const int c_mode_s = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; layout(binding = 0) uniform texture2DMSArray s_depth; layout(binding = 1) uniform utexture2DMSArray s_stencil; +float load_depth(ivec3 coord, int s) { + return texelFetch(s_depth, coord, s).r; +} + +uint load_stencil(ivec3 coord, int s) { + return texelFetch(s_stencil, coord, s).r; +} + +resolve_fn(resolve_depth, float, load_depth) +resolve_fn(resolve_stencil, uint, load_stencil) + layout(push_constant) uniform u_info_t { ivec2 offset; } u_info; -float resolve_depth(ivec3 coord) { - float depth = 0.0f; - - switch (c_mode_d) { - case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: - depth = texelFetch(s_depth, coord, 0).r; - break; - - case VK_RESOLVE_MODE_AVERAGE_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth += texelFetch(s_depth, coord, i).r; - depth /= float(c_samples); - break; - - case VK_RESOLVE_MODE_MIN_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth = min(depth, texelFetch(s_depth, coord, i).r); - break; - - case VK_RESOLVE_MODE_MAX_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth = max(depth, texelFetch(s_depth, coord, i).r); - break; - } - - return depth; -} - -int resolve_stencil(ivec3 coord) { - uint stencil = 0u; - - switch (c_mode_s) { - case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: - stencil = texelFetch(s_stencil, coord, 0).r; - break; - - case VK_RESOLVE_MODE_MIN_BIT: - stencil = texelFetch(s_stencil, coord, 0).r; - for (int i = 1; i < c_samples; i++) - stencil = min(stencil, texelFetch(s_stencil, coord, i).r); - break; - - case VK_RESOLVE_MODE_MAX_BIT: - stencil = texelFetch(s_stencil, coord, 0).r; - for (int i = 1; i < c_samples; i++) - stencil = max(stencil, texelFetch(s_stencil, coord, i).r); - break; - } - - return int(stencil); -} - void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - gl_FragDepth = resolve_depth(coord); - gl_FragStencilRefARB = resolve_stencil(coord); -} \ No newline at end of file + gl_FragDepth = resolve_depth(coord, c_samples, c_mode_d); + gl_FragStencilRefARB = int(resolve_stencil(coord, c_samples, c_mode_s)); +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_f.frag b/src/dxvk/shaders/dxvk_resolve_frag_f.frag index 2c7442c14..8fbd642de 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_f.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_f.frag @@ -1,13 +1,23 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_EXT_samplerless_texture_functions : enable +#include "dxvk_resolve_common.glsl" + layout(constant_id = 0) const int c_samples = 1; +layout(constant_id = 1) const int c_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; layout(binding = 0) uniform texture2DMSArray s_image; layout(location = 0) out vec4 o_color; +vec4 load_color(ivec3 coord, int s) { + return texelFetch(s_image, coord, s); +} + +resolve_fn(resolve_color, vec4, load_color) + layout(push_constant) uniform u_info_t { ivec2 offset; @@ -15,8 +25,5 @@ uniform u_info_t { void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - vec4 color = vec4(0.0f); - for (int i = 0; i < c_samples; i++) - color += texelFetch(s_image, coord, i); - o_color = color / float(c_samples); -} \ No newline at end of file + o_color = resolve_color(coord, c_samples, c_mode); +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_f_amd.frag b/src/dxvk/shaders/dxvk_resolve_frag_f_amd.frag deleted file mode 100644 index a550e3507..000000000 --- a/src/dxvk/shaders/dxvk_resolve_frag_f_amd.frag +++ /dev/null @@ -1,56 +0,0 @@ -#version 450 - -#extension GL_EXT_samplerless_texture_functions : enable -#extension GL_EXT_spirv_intrinsics : enable - -// GL_AMD_shader_fragment_mask was never updated to support -// sampler-less functions, so we have to define these manually -spirv_instruction(extensions = ["SPV_AMD_shader_fragment_mask"], capabilities = [5010], id = 5011) -uint fragment_mask_fetch(texture2DMSArray tex, ivec3 coord); - -spirv_instruction(extensions = ["SPV_AMD_shader_fragment_mask"], capabilities = [5010], id = 5012) -vec4 fragment_fetch(texture2DMSArray tex, ivec3 coord, uint index); - -layout(constant_id = 0) const int c_samples = 1; - -layout(set = 0, binding = 0) -uniform texture2DMSArray s_image; - -layout(location = 0) out vec4 o_color; - -layout(push_constant) -uniform u_info_t { - ivec2 offset; -} u_info; - -void main() { - ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - - // get a four-bit fragment index for each sample - uint fragMask = fragment_mask_fetch(s_image, coord); - - // count number of occurences of each fragment - // index in one four-bit counter for each sample - uint fragCount = 0u; - - for (int i = 0; i < 4 * c_samples; i += 4) { - uint fragIndex = bitfieldExtract(fragMask, i, 4); - fragCount += 1u << (fragIndex << 2); - } - - // perform necessary texture lookups to compute - // final fragment color - o_color = vec4(0.0f); - - while (fragCount != 0) { - int fragIndex = findLSB(fragCount) >> 2; - int fragShift = fragIndex << 2; - - o_color += fragment_fetch(s_image, coord, fragIndex) - * float(bitfieldExtract(fragCount, fragShift, 4)); - - fragCount = bitfieldInsert(fragCount, 0, fragShift, 4); - } - - o_color /= float(c_samples); -} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_i.frag b/src/dxvk/shaders/dxvk_resolve_frag_i.frag index a721897a5..094954457 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_i.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_i.frag @@ -1,11 +1,23 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_EXT_samplerless_texture_functions : enable +#include "dxvk_resolve_common.glsl" + +layout(constant_id = 0) const int c_samples = 1; +layout(constant_id = 1) const int c_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + layout(binding = 0) uniform itexture2DMSArray s_image; layout(location = 0) out ivec4 o_color; +ivec4 load_color(ivec3 coord, int s) { + return texelFetch(s_image, coord, s); +} + +resolve_fn(resolve_color, ivec4, load_color) + layout(push_constant) uniform u_info_t { ivec2 offset; @@ -13,5 +25,5 @@ uniform u_info_t { void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - o_color = texelFetch(s_image, coord, 0); -} \ No newline at end of file + o_color = resolve_color(coord, c_samples, c_mode); +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_u.frag b/src/dxvk/shaders/dxvk_resolve_frag_u.frag index f7b4e73da..ab8b3d18a 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_u.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_u.frag @@ -1,10 +1,22 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_EXT_samplerless_texture_functions : enable +#include "dxvk_resolve_common.glsl" + +layout(constant_id = 0) const int c_samples = 1; +layout(constant_id = 1) const int c_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + layout(binding = 0) uniform utexture2DMSArray s_image; -layout(location = 0) out uvec4 o_color; +layout(location = 0) out vec4 o_color; + +uvec4 load_color(ivec3 coord, int s) { + return texelFetch(s_image, coord, s); +} + +resolve_fn(resolve_color, uvec4, load_color) layout(push_constant) uniform u_info_t { @@ -13,5 +25,5 @@ uniform u_info_t { void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - o_color = texelFetch(s_image, coord, 0); -} \ No newline at end of file + o_color = resolve_color(coord, c_samples, c_mode); +} From e4812a1177551a105fe29abdfc34a2fd1a24407c Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 5 Mar 2025 11:34:47 +0100 Subject: [PATCH 10/12] [dxvk] Always use SAMPLE_ZERO for implicit resolves --- src/dxvk/dxvk_context.cpp | 17 +++++++++++++++-- src/dxvk/dxvk_implicit_resolve.cpp | 5 ----- src/dxvk/dxvk_implicit_resolve.h | 1 - 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 210526438..e574015cf 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -7750,8 +7750,21 @@ namespace dxvk { prepareImage(op.inputImage, vk::makeSubresourceRange(op.resolveRegion.srcSubresource)); prepareImage(op.resolveImage, vk::makeSubresourceRange(op.resolveRegion.dstSubresource)); - resolveImageRp(op.resolveImage, op.inputImage, op.resolveRegion, - op.resolveFormat, op.resolveMode, op.resolveMode); + // Always do a SAMPLE_ZERO resolve here since that's less expensive and closer to what + // happens on native AMD anyway. Need to use a shader in case we are dealing with a + // non-integer color image since render pass resolves only support AVERAGE. + auto formatInfo = lookupFormatInfo(op.resolveFormat); + + bool useRp = (formatInfo->flags.any(DxvkFormatFlag::SampledSInt, DxvkFormatFlag::SampledUInt)) + || (formatInfo->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); + + if (useRp) { + resolveImageRp(op.resolveImage, op.inputImage, op.resolveRegion, + op.resolveFormat, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT); + } else { + resolveImageFb(op.resolveImage, op.inputImage, op.resolveRegion, + op.resolveFormat, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, VK_RESOLVE_MODE_NONE); + } } } diff --git a/src/dxvk/dxvk_implicit_resolve.cpp b/src/dxvk/dxvk_implicit_resolve.cpp index abaa92d12..e1c284a99 100644 --- a/src/dxvk/dxvk_implicit_resolve.cpp +++ b/src/dxvk/dxvk_implicit_resolve.cpp @@ -129,11 +129,6 @@ namespace dxvk { op.resolveRegion.dstSubresource.baseArrayLayer = 0u; op.resolveRegion.extent = view.resolveView->mipLevelExtent(0u); op.resolveFormat = view.inputView->info().format; - op.resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; - - if ((format->flags.any(DxvkFormatFlag::SampledSInt, DxvkFormatFlag::SampledUInt) - || (format->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)))) - op.resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; view.resolveDone = true; } diff --git a/src/dxvk/dxvk_implicit_resolve.h b/src/dxvk/dxvk_implicit_resolve.h index 1f9f695b2..d75681976 100644 --- a/src/dxvk/dxvk_implicit_resolve.h +++ b/src/dxvk/dxvk_implicit_resolve.h @@ -20,7 +20,6 @@ namespace dxvk { Rc resolveImage = nullptr; VkImageResolve resolveRegion = { }; VkFormat resolveFormat = VK_FORMAT_UNDEFINED; - VkResolveModeFlagBits resolveMode = VK_RESOLVE_MODE_NONE; }; From 285f1f24c5dbe530fc327f8d5d1c374af2d8e467 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 5 Mar 2025 12:33:59 +0100 Subject: [PATCH 11/12] [dxvk] Also try to use render pass resolves on desktop drivers Elides redundant back-to-back resolves in some games, and reduces some barrier spam when the app resolves multiple images. --- src/dxvk/dxvk_context.cpp | 12 ++++++------ src/dxvk/dxvk_device.cpp | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index e574015cf..2835f03bf 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -2458,9 +2458,6 @@ namespace dxvk { void DxvkContext::flushResolves() { - if (!m_device->perfHints().preferRenderPassOps) - return; - for (size_t i = 0; i < m_state.om.framebufferInfo.numAttachments(); i++) { auto& resolve = m_deferredResolves.at(i); @@ -5659,10 +5656,13 @@ namespace dxvk { // On drivers that don't natively support secondary command buffers, only // use them to enable MSAA resolve attachments. Also ignore color-only // render passes here since we almost certainly need the output anyway. - bool useSecondaryCmdBuffer = m_device->perfHints().preferRenderPassOps; + bool useSecondaryCmdBuffer = !m_device->perfHints().preferPrimaryCmdBufs + && renderingInheritance.rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; - if (useSecondaryCmdBuffer && (m_device->perfHints().preferPrimaryCmdBufs || !depthStencilAspects)) - useSecondaryCmdBuffer = renderingInheritance.rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; + if (m_device->perfHints().preferRenderPassOps) { + useSecondaryCmdBuffer = renderingInheritance.rasterizationSamples > VK_SAMPLE_COUNT_1_BIT + || (!m_device->perfHints().preferPrimaryCmdBufs && depthStencilAspects); + } if (useSecondaryCmdBuffer) { // Begin secondary command buffer on tiling GPUs so that subsequent diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index e0f04e74d..7b6aafb3a 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -451,8 +451,7 @@ namespace dxvk { // Be less aggressive on secondary command buffer usage on // drivers that do not natively support them - hints.preferPrimaryCmdBufs = !hints.preferRenderPassOps - || m_adapter->matchesDriver(VK_DRIVER_ID_MESA_HONEYKRISP); + hints.preferPrimaryCmdBufs = m_adapter->matchesDriver(VK_DRIVER_ID_MESA_HONEYKRISP); return hints; } From 023238e4d1f57d4bb72342782ceda26e59a4848f Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 5 Mar 2025 14:34:01 +0100 Subject: [PATCH 12/12] [dxvk] Highlight multisampled render passes when debugging --- src/dxvk/dxvk_context.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 2835f03bf..14f6edbe1 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -3008,6 +3008,8 @@ namespace dxvk { void DxvkContext::beginRenderPassDebugRegion() { + VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_1_BIT; + bool hasColorAttachments = false; bool hasDepthAttachment = m_state.om.renderTargets.depth.view != nullptr; @@ -3033,6 +3035,7 @@ namespace dxvk { label << (hasColorAttachments ? ", " : "") << i << ": " << (imageName ? imageName : "unknown"); hasColorAttachments = true; + sampleCount = m_state.om.renderTargets.color[i].view->image()->info().sampleCount; } } @@ -3042,14 +3045,21 @@ namespace dxvk { const char* imageName = m_state.om.renderTargets.depth.view->image()->info().debugName; label << "DS:" << (imageName ? imageName : "unknown"); + + sampleCount = m_state.om.renderTargets.depth.view->image()->info().sampleCount; } if (!hasColorAttachments && !hasDepthAttachment) label << "No attachments"; + if (sampleCount > VK_SAMPLE_COUNT_1_BIT) + label << ", " << uint32_t(sampleCount) << "x MSAA"; + label << ")"; - pushDebugRegion(vk::makeLabel(0xf0e6dc, label.str().c_str()), + uint32_t color = sampleCount > VK_SAMPLE_COUNT_1_BIT ? 0xf0dcf0 : 0xf0e6dc; + + pushDebugRegion(vk::makeLabel(color, label.str().c_str()), util::DxvkDebugLabelType::InternalRenderPass); }