diff --git a/src/d3d11/d3d11_video.cpp b/src/d3d11/d3d11_video.cpp index e76063111..7522b2df7 100644 --- a/src/d3d11/d3d11_video.cpp +++ b/src/d3d11/d3d11_video.cpp @@ -1333,7 +1333,7 @@ namespace dxvk { SpirvCodeBuffer fsCode(d3d11_video_blit_frag); const std::array fsBindings = {{ - { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, VK_IMAGE_VIEW_TYPE_MAX_ENUM, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_UNIFORM_READ_BIT, VK_TRUE }, + { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, VK_IMAGE_VIEW_TYPE_MAX_ENUM, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_UNIFORM_READ_BIT, DxvkAccessOp::None, true }, { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_IMAGE_VIEW_TYPE_2D, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_SHADER_READ_BIT }, { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 2, VK_IMAGE_VIEW_TYPE_2D, VK_SHADER_STAGE_FRAGMENT_BIT, VK_ACCESS_SHADER_READ_BIT }, }}; diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index f823f84db..184d15748 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -871,7 +871,7 @@ namespace dxvk { binding.viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; binding.access = VK_ACCESS_UNIFORM_READ_BIT; binding.resourceBinding = bindingId; - binding.uboSet = VK_TRUE; + binding.uboSet = true; m_bindings.push_back(binding); } @@ -1080,6 +1080,7 @@ namespace dxvk { DxvkBindingInfo binding = { }; binding.viewType = typeInfo.vtype; binding.resourceBinding = bindingId; + binding.isMultisampled = typeInfo.ms; if (isUav) { binding.descriptorType = resourceType == DxbcResourceDim::Buffer diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index ff6c75294..14f6edbe1 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -15,7 +16,8 @@ namespace dxvk { m_initAcquires(DxvkCmdBuffer::InitBarriers), m_initBarriers(DxvkCmdBuffer::InitBuffer), m_execBarriers(DxvkCmdBuffer::ExecBuffer), - m_queryManager(m_common->queryPool()) { + m_queryManager(m_common->queryPool()), + m_implicitResolves(device) { // Init framebuffer info with default render pass in case // the app does not explicitly bind any render targets m_state.om.framebufferInfo = makeFramebufferInfo(m_state.om.renderTargets); @@ -81,6 +83,8 @@ namespace dxvk { this->endCurrentCommands(); this->relocateQueuedResources(); + m_implicitResolves.cleanup(m_trackingId); + if (m_descriptorPool->shouldSubmit(false)) { m_cmd->trackDescriptorPool(m_descriptorPool, m_descriptorManager); m_descriptorPool = m_descriptorManager->getDescriptorPool(); @@ -418,8 +422,16 @@ namespace dxvk { clearRect.layerCount = imageView->info().layerCount; m_cmd->cmdClearAttachments(1, &clearInfo, 1, &clearRect); - } else + } else { this->deferClear(imageView, clearAspects, clearValue); + } + + if (imageView->isMultisampled()) { + auto subresources = imageView->imageSubresources(); + subresources.aspectMask = clearAspects; + + m_implicitResolves.invalidate(*imageView->image(), subresources); + } } @@ -440,6 +452,13 @@ namespace dxvk { this->clearImageViewFb(imageView, offset, extent, aspect, value); else if (viewUsage & VK_IMAGE_USAGE_STORAGE_BIT) this->clearImageViewCs(imageView, offset, extent, value); + + if (imageView->isMultisampled()) { + auto subresources = imageView->imageSubresources(); + subresources.aspectMask = aspect; + + m_implicitResolves.invalidate(*imageView->image(), subresources); + } } @@ -579,6 +598,9 @@ namespace dxvk { srcImage, srcSubresource, srcOffset, extent); } + + if (dstImage->info().sampleCount > VK_SAMPLE_COUNT_1_BIT) + m_implicitResolves.invalidate(*dstImage, vk::makeSubresourceRange(dstSubresource)); } @@ -1928,22 +1950,27 @@ namespace dxvk { this->prepareImage(dstImage, vk::makeSubresourceRange(region.dstSubresource)); this->prepareImage(srcImage, vk::makeSubresourceRange(region.srcSubresource)); - bool useFb = srcImage->info().format != format + auto formatInfo = lookupFormatInfo(format); + + bool useRp = srcImage->info().format != format || dstImage->info().format != format; - if (m_device->perfHints().preferFbResolve) { - useFb |= (dstImage->info().usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) - && (srcImage->info().usage & VK_IMAGE_USAGE_SAMPLED_BIT); - } + useRp |= (srcImage->info().usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) + && (dstImage->info().usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - if (!useFb) { - this->resolveImageHw( - dstImage, srcImage, region); + if (useRp) { + // Work out resolve mode based on format properties. For color images, + // we must use AVERAGE unless the resolve uses an integer format. + VkResolveModeFlagBits mode = VK_RESOLVE_MODE_AVERAGE_BIT; + + if (formatInfo->flags.any(DxvkFormatFlag::SampledSInt, DxvkFormatFlag::SampledUInt) + || (formatInfo->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) + mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + + this->resolveImageRp(dstImage, srcImage, region, + format, mode, mode); } else { - this->resolveImageFb( - dstImage, srcImage, region, format, - VK_RESOLVE_MODE_NONE, - VK_RESOLVE_MODE_NONE); + this->resolveImageHw(dstImage, srcImage, region); } } @@ -1999,13 +2026,11 @@ namespace dxvk { this->prepareImage(srcImage, vk::makeSubresourceRange(region.srcSubresource)); if (useFb) { - this->resolveImageFb( - dstImage, srcImage, region, VK_FORMAT_UNDEFINED, - depthMode, stencilMode); + this->resolveImageFb(dstImage, srcImage, region, + VK_FORMAT_UNDEFINED, depthMode, stencilMode); } else { - this->resolveImageDs( - dstImage, srcImage, region, - depthMode, stencilMode); + this->resolveImageRp(dstImage, srcImage, region, + VK_FORMAT_UNDEFINED, depthMode, stencilMode); } } @@ -2433,9 +2458,6 @@ namespace dxvk { void DxvkContext::flushResolves() { - if (!m_device->perfHints().preferRenderPassOps) - return; - for (size_t i = 0; i < m_state.om.framebufferInfo.numAttachments(); i++) { auto& resolve = m_deferredResolves.at(i); @@ -2986,6 +3008,8 @@ namespace dxvk { void DxvkContext::beginRenderPassDebugRegion() { + VkSampleCountFlagBits sampleCount = VK_SAMPLE_COUNT_1_BIT; + bool hasColorAttachments = false; bool hasDepthAttachment = m_state.om.renderTargets.depth.view != nullptr; @@ -3011,6 +3035,7 @@ namespace dxvk { label << (hasColorAttachments ? ", " : "") << i << ": " << (imageName ? imageName : "unknown"); hasColorAttachments = true; + sampleCount = m_state.om.renderTargets.color[i].view->image()->info().sampleCount; } } @@ -3020,14 +3045,21 @@ namespace dxvk { const char* imageName = m_state.om.renderTargets.depth.view->image()->info().debugName; label << "DS:" << (imageName ? imageName : "unknown"); + + sampleCount = m_state.om.renderTargets.depth.view->image()->info().sampleCount; } if (!hasColorAttachments && !hasDepthAttachment) label << "No attachments"; + if (sampleCount > VK_SAMPLE_COUNT_1_BIT) + label << ", " << uint32_t(sampleCount) << "x MSAA"; + label << ")"; - pushDebugRegion(vk::makeLabel(0xf0e6dc, label.str().c_str()), + uint32_t color = sampleCount > VK_SAMPLE_COUNT_1_BIT ? 0xf0dcf0 : 0xf0e6dc; + + pushDebugRegion(vk::makeLabel(color, label.str().c_str()), util::DxvkDebugLabelType::InternalRenderPass); } @@ -4693,21 +4725,31 @@ namespace dxvk { } - void DxvkContext::resolveImageDs( + void DxvkContext::resolveImageRp( const Rc& dstImage, const Rc& srcImage, const VkImageResolve& region, - VkResolveModeFlagBits depthMode, + VkFormat format, + VkResolveModeFlagBits mode, VkResolveModeFlagBits stencilMode) { auto dstSubresourceRange = vk::makeSubresourceRange(region.dstSubresource); auto srcSubresourceRange = vk::makeSubresourceRange(region.srcSubresource); + bool isDepthStencil = (dstImage->formatInfo()->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); + DxvkImageUsageInfo usageInfo = { }; - usageInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + usageInfo.usage = isDepthStencil + ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT + : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + if (format) { + usageInfo.viewFormatCount = 1u; + usageInfo.viewFormats = &format; + } if (!ensureImageCompatibility(dstImage, usageInfo) || !ensureImageCompatibility(srcImage, usageInfo)) { - Logger::err(str::format("DxvkContext: resolveImageDs: Unsupported images:" + Logger::err(str::format("DxvkContext: resolveImageRp: Unsupported images:" "\n dst format: ", dstImage->info().format, "\n src format: ", srcImage->info().format)); } @@ -4720,42 +4762,54 @@ namespace dxvk { const char* srcName = srcImage->info().debugName; m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer, - vk::makeLabel(0xf0dcdc, str::format("Resolve DS (", + vk::makeLabel(0xf0dcdc, str::format("Resolve pass (", dstName ? dstName : "unknown", ", ", srcName ? srcName : "unknown", ")").c_str())); } - // Transition both images to usable layouts if necessary. For the source image we - // can be fairly leniet since writable layouts are allowed for resolve attachments. - VkImageLayout dstLayout = dstImage->pickLayout(VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + // Transition both images to usable layouts if necessary. For the source image + // we can be fairly lenient when dealing with writable depth-stencil layouts. + VkImageLayout writableLayout = isDepthStencil + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkImageLayout dstLayout = dstImage->pickLayout(writableLayout); VkImageLayout srcLayout = srcImage->info().layout; if (srcLayout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && srcLayout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) - srcLayout = srcImage->pickLayout(VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + srcLayout = srcImage->pickLayout(writableLayout); - addImageLayoutTransition(*srcImage, srcSubresourceRange, srcLayout, - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT, false); - addImageLayoutTransition(*dstImage, dstSubresourceRange, dstLayout, - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, true); + VkPipelineStageFlags2 stages = isDepthStencil + ? VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT + : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + + VkAccessFlags2 srcAccess = isDepthStencil + ? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT + : VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + + VkAccessFlags2 dstAccess = isDepthStencil + ? VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT + : VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + + addImageLayoutTransition(*srcImage, srcSubresourceRange, srcLayout, stages, srcAccess, false); + addImageLayoutTransition(*dstImage, dstSubresourceRange, dstLayout, stages, dstAccess, true); flushImageLayoutTransitions(DxvkCmdBuffer::ExecBuffer); // Create a pair of views for the attachment resolve DxvkMetaResolveViews views(dstImage, region.dstSubresource, srcImage, region.srcSubresource, dstImage->info().format); - VkRenderingAttachmentInfo depthAttachment = { VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO }; - depthAttachment.imageView = views.srcView->handle(); - depthAttachment.imageLayout = srcLayout; - depthAttachment.resolveMode = depthMode; - depthAttachment.resolveImageView = views.dstView->handle(); - depthAttachment.resolveImageLayout = dstLayout; - depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + VkRenderingAttachmentInfo attachment = { VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO }; + attachment.imageView = views.srcView->handle(); + attachment.imageLayout = srcLayout; + attachment.resolveMode = mode; + attachment.resolveImageView = views.dstView->handle(); + attachment.resolveImageLayout = dstLayout; + attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - VkRenderingAttachmentInfo stencilAttachment = depthAttachment; + VkRenderingAttachmentInfo stencilAttachment = attachment; stencilAttachment.resolveMode = stencilMode; VkExtent3D extent = dstImage->mipLevelExtent(region.dstSubresource.mipLevel); @@ -4765,29 +4819,25 @@ namespace dxvk { renderingInfo.renderArea.extent = VkExtent2D { extent.width, extent.height }; renderingInfo.layerCount = region.dstSubresource.layerCount; - if (dstImage->formatInfo()->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) - renderingInfo.pDepthAttachment = &depthAttachment; + if (isDepthStencil) { + if (dstImage->formatInfo()->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) + renderingInfo.pDepthAttachment = &attachment; - if (dstImage->formatInfo()->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) - renderingInfo.pStencilAttachment = &stencilAttachment; + if (dstImage->formatInfo()->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) + renderingInfo.pStencilAttachment = &stencilAttachment; + } else { + renderingInfo.colorAttachmentCount = 1u; + renderingInfo.pColorAttachments = &attachment; + } m_cmd->cmdBeginRendering(&renderingInfo); m_cmd->cmdEndRendering(); - // Add barriers for the resolve operation - accessImage(DxvkCmdBuffer::ExecBuffer, - *srcImage, srcSubresourceRange, srcLayout, - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT, - DxvkAccessOp::None); - - accessImage(DxvkCmdBuffer::ExecBuffer, - *dstImage, dstSubresourceRange, dstLayout, - VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - DxvkAccessOp::None); + // Add barriers for the render pass resolve + accessImage(DxvkCmdBuffer::ExecBuffer, *srcImage, srcSubresourceRange, + srcLayout, stages, srcAccess, DxvkAccessOp::None); + accessImage(DxvkCmdBuffer::ExecBuffer, *dstImage, dstSubresourceRange, + dstLayout, stages, dstAccess, DxvkAccessOp::None); if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); @@ -5616,10 +5666,13 @@ namespace dxvk { // On drivers that don't natively support secondary command buffers, only // use them to enable MSAA resolve attachments. Also ignore color-only // render passes here since we almost certainly need the output anyway. - bool useSecondaryCmdBuffer = m_device->perfHints().preferRenderPassOps; + bool useSecondaryCmdBuffer = !m_device->perfHints().preferPrimaryCmdBufs + && renderingInheritance.rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; - if (useSecondaryCmdBuffer && (m_device->perfHints().preferPrimaryCmdBufs || !depthStencilAspects)) - useSecondaryCmdBuffer = renderingInheritance.rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; + if (m_device->perfHints().preferRenderPassOps) { + useSecondaryCmdBuffer = renderingInheritance.rasterizationSamples > VK_SAMPLE_COUNT_1_BIT + || (!m_device->perfHints().preferPrimaryCmdBufs && depthStencilAspects); + } if (useSecondaryCmdBuffer) { // Begin secondary command buffer on tiling GPUs so that subsequent @@ -5643,8 +5696,19 @@ namespace dxvk { m_cmd->cmdClearAttachments(lateClearCount, lateClears.data(), 1, &clearRect); } - for (uint32_t i = 0; i < framebufferInfo.numAttachments(); i++) - m_cmd->track(framebufferInfo.getAttachment(i).view->image(), DxvkAccess::Write); + for (uint32_t i = 0; i < framebufferInfo.numAttachments(); i++) { + const auto& attachment = framebufferInfo.getAttachment(i); + m_cmd->track(attachment.view->image(), DxvkAccess::Write); + + if (attachment.view->isMultisampled()) { + VkImageSubresourceRange subresources = attachment.view->imageSubresources(); + + if (subresources.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) + subresources.aspectMask = vk::getWritableAspectsForLayout(attachment.layout); + + m_implicitResolves.invalidate(*attachment.view->image(), subresources); + } + } m_cmd->addStatCtr(DxvkStatCounter::CmdRenderPassCount, 1u); } @@ -6092,14 +6156,24 @@ namespace dxvk { viewHandle = res.imageView->handle(binding.viewType); if (viewHandle) { - descriptorInfo.image.sampler = VK_NULL_HANDLE; - descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (likely(!res.imageView->isMultisampled() || binding.isMultisampled)) { + descriptorInfo.image.sampler = VK_NULL_HANDLE; + descriptorInfo.image.imageView = viewHandle; + descriptorInfo.image.imageLayout = res.imageView->defaultLayout(); - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) - accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->hasGfxStores())) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); - m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.imageView->image(), DxvkAccess::Read); + } else { + auto view = m_implicitResolves.getResolveView(*res.imageView, m_trackingId); + + descriptorInfo.image.sampler = VK_NULL_HANDLE; + descriptorInfo.image.imageView = view->handle(binding.viewType); + descriptorInfo.image.imageLayout = view->defaultLayout(); + + m_cmd->track(view->image(), DxvkAccess::Read); + } } else { descriptorInfo.image.sampler = VK_NULL_HANDLE; descriptorInfo.image.imageView = VK_NULL_HANDLE; @@ -6118,9 +6192,9 @@ namespace dxvk { if (viewHandle) { descriptorInfo.image.sampler = VK_NULL_HANDLE; descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + descriptorInfo.image.imageLayout = VK_IMAGE_LAYOUT_GENERAL; - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || res.imageView->image()->hasGfxStores()) + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || res.imageView->hasGfxStores()) accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, binding.accessOp); m_cmd->track(res.imageView->image(), (binding.access & vk::AccessWriteMask) @@ -6141,15 +6215,26 @@ namespace dxvk { viewHandle = res.imageView->handle(binding.viewType); if (viewHandle) { - descriptorInfo.image.sampler = res.sampler->handle(); - descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (likely(!res.imageView->isMultisampled() || binding.isMultisampled)) { + descriptorInfo.image.sampler = res.sampler->handle(); + descriptorInfo.image.imageView = viewHandle; + descriptorInfo.image.imageLayout = res.imageView->defaultLayout(); - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) - accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->hasGfxStores())) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); - m_cmd->track(res.sampler); - m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.sampler); + } else { + auto view = m_implicitResolves.getResolveView(*res.imageView, m_trackingId); + + descriptorInfo.image.sampler = res.sampler->handle(); + descriptorInfo.image.imageView = view->handle(binding.viewType); + descriptorInfo.image.imageLayout = view->defaultLayout(); + + m_cmd->track(view->image(), DxvkAccess::Read); + m_cmd->track(res.sampler); + } } else { descriptorInfo.image.sampler = m_common->dummyResources().samplerHandle(); descriptorInfo.image.imageView = VK_NULL_HANDLE; @@ -6802,7 +6887,8 @@ namespace dxvk { &m_state.pc.data[pushConstRange.offset]); } - + + template bool DxvkContext::commitComputeState() { this->spillRenderPass(false); @@ -6824,9 +6910,15 @@ namespace dxvk { if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) this->beginBarrierControlDebugRegion(); - if (m_descriptorState.hasDirtyComputeSets()) + if (m_descriptorState.hasDirtyComputeSets()) { this->updateComputeShaderResources(); + if (unlikely(Resolve && m_implicitResolves.hasPendingResolves())) { + this->flushImplicitResolves(); + return this->commitComputeState(); + } + } + if (m_flags.test(DxvkContextFlag::DirtyPushConstants)) this->updatePushConstants(); @@ -6834,7 +6926,7 @@ namespace dxvk { } - template + template bool DxvkContext::commitGraphicsState() { if (m_flags.test(DxvkContextFlag::GpDirtyPipeline)) { if (unlikely(!this->updateGraphicsPipeline())) @@ -6894,8 +6986,18 @@ namespace dxvk { return false; } - if (m_descriptorState.hasDirtyGraphicsSets()) + if (m_descriptorState.hasDirtyGraphicsSets()) { this->updateGraphicsShaderResources(); + + if (unlikely(Resolve && m_implicitResolves.hasPendingResolves())) { + // If implicit resolves are required for any of the shader bindings, we need + // to discard all the state setup that we've done so far and try again + this->spillRenderPass(true); + this->flushImplicitResolves(); + + return this->commitGraphicsState(); + } + } if (m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasTransformFeedback)) this->updateTransformFeedbackState(); @@ -6965,7 +7067,7 @@ namespace dxvk { case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { if (slot.imageView) { - if (!IsGraphics || slot.imageView->image()->hasGfxStores()) + if (!IsGraphics || slot.imageView->hasGfxStores()) requiresBarrier |= checkImageViewBarrier(slot.imageView, binding.access, binding.accessOp); else if (binding.access & vk::AccessWriteMask) requiresBarrier |= !slot.imageView->image()->trackGfxStores(); @@ -6974,7 +7076,7 @@ namespace dxvk { case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { - if (slot.imageView && (!IsGraphics || slot.imageView->image()->hasGfxStores())) + if (slot.imageView && (!IsGraphics || slot.imageView->hasGfxStores())) requiresBarrier |= checkImageViewBarrier(slot.imageView, binding.access, DxvkAccessOp::None); } break; @@ -7649,6 +7751,34 @@ namespace dxvk { } + void DxvkContext::flushImplicitResolves() { + spillRenderPass(true); + + DxvkImplicitResolveOp op; + + while (m_implicitResolves.extractResolve(op)) { + prepareImage(op.inputImage, vk::makeSubresourceRange(op.resolveRegion.srcSubresource)); + prepareImage(op.resolveImage, vk::makeSubresourceRange(op.resolveRegion.dstSubresource)); + + // Always do a SAMPLE_ZERO resolve here since that's less expensive and closer to what + // happens on native AMD anyway. Need to use a shader in case we are dealing with a + // non-integer color image since render pass resolves only support AVERAGE. + auto formatInfo = lookupFormatInfo(op.resolveFormat); + + bool useRp = (formatInfo->flags.any(DxvkFormatFlag::SampledSInt, DxvkFormatFlag::SampledUInt)) + || (formatInfo->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)); + + if (useRp) { + resolveImageRp(op.resolveImage, op.inputImage, op.resolveRegion, + op.resolveFormat, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT); + } else { + resolveImageFb(op.resolveImage, op.inputImage, op.resolveRegion, + op.resolveFormat, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT, VK_RESOLVE_MODE_NONE); + } + } + } + + void DxvkContext::beginCurrentCommands() { beginActiveDebugRegions(); diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 81cb14bf7..86fe31bdc 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -4,6 +4,7 @@ #include "dxvk_bind_mask.h" #include "dxvk_cmdlist.h" #include "dxvk_context_state.h" +#include "dxvk_implicit_resolve.h" #include "dxvk_latency.h" #include "dxvk_objects.h" #include "dxvk_queue.h" @@ -1461,6 +1462,8 @@ namespace dxvk { uint64_t m_latencyFrameId = 0u; bool m_endLatencyTracking = false; + DxvkImplicitResolveTracker m_implicitResolves; + void blitImageFb( Rc dstView, const VkOffset3D* dstOffsets, @@ -1616,13 +1619,14 @@ namespace dxvk { const Rc& srcImage, const VkImageResolve& region); - void resolveImageDs( + void resolveImageRp( const Rc& dstImage, const Rc& srcImage, const VkImageResolve& region, - VkResolveModeFlagBits depthMode, + VkFormat format, + VkResolveModeFlagBits mode, VkResolveModeFlagBits stencilMode); - + void resolveImageFb( const Rc& dstImage, const Rc& srcImage, @@ -1775,9 +1779,10 @@ namespace dxvk { template void updatePushConstants(); + template bool commitComputeState(); - template + template bool commitGraphicsState(); template @@ -1875,6 +1880,8 @@ namespace dxvk { void resizeDescriptorArrays( uint32_t bindingCount); + void flushImplicitResolves(); + void beginCurrentCommands(); void endCurrentCommands(); diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index 5a4999942..7b6aafb3a 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -427,9 +427,6 @@ namespace dxvk { && (m_adapter->matchesDriver(VK_DRIVER_ID_MESA_RADV_KHR) || m_adapter->matchesDriver(VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR) || m_adapter->matchesDriver(VK_DRIVER_ID_AMD_PROPRIETARY_KHR)); - hints.preferFbResolve = m_features.amdShaderFragmentMask - && (m_adapter->matchesDriver(VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR) - || m_adapter->matchesDriver(VK_DRIVER_ID_AMD_PROPRIETARY_KHR)); // Older Nvidia drivers sometimes use the wrong format // to interpret the clear color in render pass clears. @@ -454,8 +451,7 @@ namespace dxvk { // Be less aggressive on secondary command buffer usage on // drivers that do not natively support them - hints.preferPrimaryCmdBufs = !hints.preferRenderPassOps - || m_adapter->matchesDriver(VK_DRIVER_ID_MESA_HONEYKRISP); + hints.preferPrimaryCmdBufs = m_adapter->matchesDriver(VK_DRIVER_ID_MESA_HONEYKRISP); return hints; } diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index 9142a739d..0a1e622e5 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -36,7 +36,6 @@ namespace dxvk { */ struct DxvkDevicePerfHints { VkBool32 preferFbDepthStencilCopy : 1; - VkBool32 preferFbResolve : 1; VkBool32 renderPassClearFormatBug : 1; VkBool32 preferRenderPassOps : 1; VkBool32 preferPrimaryCmdBufs : 1; diff --git a/src/dxvk/dxvk_image.cpp b/src/dxvk/dxvk_image.cpp index c8cb51ff6..25a330b2d 100644 --- a/src/dxvk/dxvk_image.cpp +++ b/src/dxvk/dxvk_image.cpp @@ -216,6 +216,9 @@ namespace dxvk { allocationInfo.properties = m_properties; allocationInfo.mode = mode; + if (m_info.transient) + allocationInfo.mode.set(DxvkAllocationMode::NoDedicated); + return m_allocator->createImageResource(imageInfo, allocationInfo, sharedMemoryInfo); } @@ -234,23 +237,30 @@ namespace dxvk { // Self-assignment is possible here if we // just update the image properties + bool invalidateViews = false; m_storage = std::move(resource); if (m_storage != old) { m_imageInfo = m_storage->getImageInfo(); - m_version += 1u; if (unlikely(m_info.debugName)) updateDebugName(); + + invalidateViews = true; } + if ((m_info.access | usageInfo.access) != m_info.access) + invalidateViews = true; + m_info.flags |= usageInfo.flags; m_info.usage |= usageInfo.usage; m_info.stages |= usageInfo.stages; m_info.access |= usageInfo.access; - if (usageInfo.layout != VK_IMAGE_LAYOUT_UNDEFINED) + if (usageInfo.layout != VK_IMAGE_LAYOUT_UNDEFINED) { m_info.layout = usageInfo.layout; + invalidateViews = true; + } if (usageInfo.colorSpace != VK_COLOR_SPACE_MAX_ENUM_KHR) m_info.colorSpace = usageInfo.colorSpace; @@ -266,6 +276,10 @@ namespace dxvk { } m_stableAddress |= usageInfo.stableGpuAddress; + + if (invalidateViews) + m_version += 1u; + return old; } @@ -423,8 +437,9 @@ namespace dxvk { DxvkImageView::DxvkImageView( DxvkImage* image, const DxvkImageViewKey& key) - : m_image(image), m_key(key) { - + : m_image (image), + m_key (key) { + updateProperties(); } @@ -509,6 +524,9 @@ namespace dxvk { void DxvkImageView::updateViews() { + // Latch updated image properties + updateProperties(); + // Update all views that are not currently null for (uint32_t i = 0; i < m_views.size(); i++) { if (m_views[i]) @@ -517,5 +535,12 @@ namespace dxvk { m_version = m_image->m_version; } - + + + void DxvkImageView::updateProperties() { + m_properties.layout = m_image->info().layout; + m_properties.samples = m_image->info().sampleCount; + m_properties.layout = m_image->info().layout; + } + } diff --git a/src/dxvk/dxvk_image.h b/src/dxvk/dxvk_image.h index b303218d6..b6126bdc7 100644 --- a/src/dxvk/dxvk_image.h +++ b/src/dxvk/dxvk_image.h @@ -63,6 +63,9 @@ namespace dxvk { // to be in its default layout after each submission VkBool32 shared = VK_FALSE; + // Image is likely to have a short lifetime + VkBool32 transient = VK_FALSE; + // Image view formats that can // be used with this image uint32_t viewFormatCount = 0; @@ -104,6 +107,18 @@ namespace dxvk { }; + /** + * \brief Image properties stored in the view + * + * Used to reduce some pointer chasing. + */ + struct DxvkImageViewImageProperties { + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM; + VkAccessFlags access = 0u; + }; + + /** * \brief Virtual image view * @@ -272,6 +287,30 @@ namespace dxvk { view->imageSubresources()); } + /** + * \brief Queries the default image layout + * + * Used when binding the view as a descriptor. + * \returns Default image layout + */ + VkImageLayout defaultLayout() const { + return m_properties.layout; + } + + /** + * \brief Checks whether the image is multisampled + * \returns \c true if the image is multisampled + */ + bool isMultisampled() const { + return m_properties.samples > VK_SAMPLE_COUNT_1_BIT; + } + + /** + * \brief Checks whether the image has graphics stores + * \returns \c true if the image has graphics pipeline stores + */ + bool hasGfxStores() const; + private: DxvkImage* m_image = nullptr; @@ -279,12 +318,16 @@ namespace dxvk { uint32_t m_version = 0u; + DxvkImageViewImageProperties m_properties = { }; + std::array m_views = { }; VkImageView createView(VkImageViewType type) const; void updateViews(); + void updateProperties(); + }; @@ -760,4 +803,10 @@ namespace dxvk { return m_views[viewType]; } + + inline bool DxvkImageView::hasGfxStores() const { + return (m_properties.access & VK_ACCESS_SHADER_WRITE_BIT) + && (m_image->hasGfxStores()); + } + } diff --git a/src/dxvk/dxvk_implicit_resolve.cpp b/src/dxvk/dxvk_implicit_resolve.cpp new file mode 100644 index 000000000..e1c284a99 --- /dev/null +++ b/src/dxvk/dxvk_implicit_resolve.cpp @@ -0,0 +1,177 @@ +#include + +#include "dxvk_device.h" +#include "dxvk_implicit_resolve.h" + +namespace dxvk { + + DxvkImplicitResolveTracker::DxvkImplicitResolveTracker(Rc device) + : m_device(std::move(device)) { + + } + + + DxvkImplicitResolveTracker::~DxvkImplicitResolveTracker() { + + } + + + Rc DxvkImplicitResolveTracker::getResolveView( + DxvkImageView& view, + uint64_t trackingId) { + // We generally only expect to have one or two views at most in games + // that hit this path at all, so iterating over the arras is fine + for (auto& v : m_resolveViews) { + if (v.inputView == &view) { + addResolveOp(v); + return v.resolveView; + } + } + + // Create a new resolve image with only the array layers covered by the + // input view. We expect resolve images to be somewhat short-lived. + DxvkImageCreateInfo imageInfo = view.image()->info(); + + DxvkImageCreateInfo resolveInfo = { }; + resolveInfo.type = imageInfo.type; + resolveInfo.format = view.info().format; + resolveInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + resolveInfo.extent = imageInfo.extent; + resolveInfo.numLayers = view.info().layerCount; + resolveInfo.mipLevels = 1u; + resolveInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + resolveInfo.stages = m_device->getShaderPipelineStages(); + resolveInfo.access = VK_ACCESS_SHADER_READ_BIT; + resolveInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + resolveInfo.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + resolveInfo.transient = VK_TRUE; + resolveInfo.debugName = "Resolve image"; + + if (view.info().aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + resolveInfo.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + resolveInfo.stages |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + resolveInfo.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } else { + resolveInfo.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + resolveInfo.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + resolveInfo.access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + Rc image = m_device->createImage(resolveInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + cleanup(image->getMemoryInfo().size, trackingId); + + DxvkImageViewKey viewKey = view.info(); + viewKey.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + viewKey.layerIndex = 0u; + + auto& resolveView = m_resolveViews.emplace_back(); + resolveView.inputView = &view; + resolveView.resolveView = image->createView(viewKey); + + addResolveOp(resolveView); + + return resolveView.resolveView; + } + + + bool DxvkImplicitResolveTracker::extractResolve( + DxvkImplicitResolveOp& resolve) { + if (m_resolveOps.empty()) { + resolve = DxvkImplicitResolveOp(); + return false; + } + + resolve = std::move(m_resolveOps.back()); + m_resolveOps.pop_back(); + return true; + } + + + void DxvkImplicitResolveTracker::invalidate( + const DxvkImage& image, + const VkImageSubresourceRange& subresources) { + for (auto& v : m_resolveViews) { + if (v.resolveDone && v.inputView->image() == &image) { + auto viewSubresource = v.inputView->imageSubresources(); + + if ((subresources.aspectMask & viewSubresource.aspectMask) + && vk::checkSubresourceRangeOverlap(viewSubresource, subresources)) + v.resolveDone = false; + } + } + } + + + void DxvkImplicitResolveTracker::cleanup( + uint64_t trackingId) { + cleanup(0u, trackingId); + } + + + void DxvkImplicitResolveTracker::addResolveOp( + DxvkImplicitResolveView& view) { + if (view.resolveDone) + return; + + // Determine resolve parameters based on the view format rather than the + // image format, since this will more likely represent what the app is + // trying to do + auto format = view.inputView->formatInfo(); + + auto& op = m_resolveOps.emplace_back(); + op.inputImage = view.inputView->image(); + op.resolveImage = view.resolveView->image(); + op.resolveRegion.srcSubresource = vk::pickSubresourceLayers(view.inputView->imageSubresources(), 0u); + op.resolveRegion.srcSubresource.aspectMask = format->aspectMask; + op.resolveRegion.dstSubresource = vk::pickSubresourceLayers(view.resolveView->imageSubresources(), 0u); + op.resolveRegion.dstSubresource.aspectMask = format->aspectMask; + op.resolveRegion.dstSubresource.baseArrayLayer = 0u; + op.resolveRegion.extent = view.resolveView->mipLevelExtent(0u); + op.resolveFormat = view.inputView->info().format; + + view.resolveDone = true; + } + + + void DxvkImplicitResolveTracker::cleanup( + VkDeviceSize allocationSize, + uint64_t trackingId) { + constexpr VkDeviceSize MaxMemory = 64ull << 20u; + + constexpr uint64_t MaxLifetime = 256u; + constexpr uint64_t MinLifetime = 16u; + + // Eliminate images that haven't been used in a long time + for (auto i = m_resolveViews.begin(); i != m_resolveViews.end(); ) { + if (i->resolveView->image()->getTrackId() + MaxLifetime < trackingId) { + i = m_resolveViews.erase(i); + } else { + allocationSize += i->resolveView->image()->getMemoryInfo().size; + i++; + } + } + + // If we're using a large amount of memory for resolve images, eliminate + // the least recently used resolve images until we drop below the size + // threshold again. + while (allocationSize > MaxMemory) { + auto lr = m_resolveViews.end(); + + for (auto i = m_resolveViews.begin(); i != m_resolveViews.end(); i++) { + if (i->resolveView->image()->getTrackId() + MinLifetime < trackingId) { + if (lr == m_resolveViews.end() + || lr->resolveView->image()->getTrackId() > i->resolveView->image()->getTrackId()) + lr = i; + } + } + + if (lr == m_resolveViews.end()) + break; + + allocationSize -= lr->resolveView->image()->getMemoryInfo().size; + m_resolveViews.erase(lr); + } + } + +} diff --git a/src/dxvk/dxvk_implicit_resolve.h b/src/dxvk/dxvk_implicit_resolve.h new file mode 100644 index 000000000..d75681976 --- /dev/null +++ b/src/dxvk/dxvk_implicit_resolve.h @@ -0,0 +1,107 @@ +#pragma once + +#include + +#include "dxvk_image.h" + +#include "../util/util_small_vector.h" + +namespace dxvk { + + struct DxvkImplicitResolveView { + Rc inputView = nullptr; + Rc resolveView = nullptr; + bool resolveDone = false; + }; + + + struct DxvkImplicitResolveOp { + Rc inputImage = nullptr; + Rc resolveImage = nullptr; + VkImageResolve resolveRegion = { }; + VkFormat resolveFormat = VK_FORMAT_UNDEFINED; + }; + + + class DxvkDevice; + + class DxvkImplicitResolveTracker { + + public: + + DxvkImplicitResolveTracker(Rc device); + + ~DxvkImplicitResolveTracker(); + + /** + * \brief Checks whether there are pending resolves + * + * \returns \c true if any there are any resolves that must + * be executed prior to submitting the current draw. + */ + bool hasPendingResolves() const { + return !m_resolveOps.empty(); + } + + /** + * \brief Retrieves resolve image view for a given input view + * + * \param [in] view Multisampled view bound to the context + * \returns Non-multisampled view to replace the bound view with + */ + Rc getResolveView( + DxvkImageView& view, + uint64_t trackingId); + + /** + * \brief Extracts a resolve operation to execute + * + * \param [out] resolve Extracted resolve parameters + * \returns \c true if a resolve was extracted, \c false + * if all resolves have already been processed. + */ + bool extractResolve( + DxvkImplicitResolveOp& resolve); + + /** + * \brief Invalidates resolve cache for a given set of image subresources + * + * Must be called any time the given set of subresources of this + * resource is written, so that the corresponding resolve image + * can get updated the next time it is read. Must not be called + * for any subresource that is only being read, since that may + * cause problems with read-only depth-stencil access. + * \param [in] image The multisampled image + * \param [in] subresources Image subresources written + */ + void invalidate( + const DxvkImage& image, + const VkImageSubresourceRange& subresources); + + /** + * \brief Cleans up resolve image cache + * + * Destroys resolve images that have not been used in a while + * in order to reduce memory wasted on unused images. + * \param [in] trackingId Current context command list ID + */ + void cleanup( + uint64_t trackingId); + + private: + + Rc m_device; + + std::vector m_resolveViews; + std::vector m_resolveOps; + + void addResolveOp( + DxvkImplicitResolveView& view); + + void cleanup( + VkDeviceSize allocationSize, + uint64_t trackingId); + + }; + +} diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index c31dafb83..b6a61c6d5 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -932,6 +932,9 @@ namespace dxvk { dedicatedRequirements.prefersDedicatedAllocation = VK_TRUE; } + if (!dedicatedRequirements.requiresDedicatedAllocation && allocationInfo.mode.test(DxvkAllocationMode::NoDedicated)) + dedicatedRequirements.prefersDedicatedAllocation = VK_FALSE; + Rc allocation; if (!(createInfo.flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) { diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index bd7f685d1..c432df0f2 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -959,6 +959,8 @@ namespace dxvk { /// If set, the allocation will only succeed if it /// can be suballocated from an existing chunk. NoAllocation = 1, + /// Avoid using a dedicated allocation for this resource + NoDedicated = 2, eFlagEnum }; diff --git a/src/dxvk/dxvk_meta_resolve.cpp b/src/dxvk/dxvk_meta_resolve.cpp index 7362dc4c4..be8b0be6f 100644 --- a/src/dxvk/dxvk_meta_resolve.cpp +++ b/src/dxvk/dxvk_meta_resolve.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -52,9 +51,7 @@ namespace dxvk { DxvkMetaResolveObjects::DxvkMetaResolveObjects(const DxvkDevice* device) : m_vkd (device->vkd()), - m_shaderFragF (device->features().amdShaderFragmentMask - ? createShaderModule(dxvk_resolve_frag_f_amd) - : createShaderModule(dxvk_resolve_frag_f)), + m_shaderFragF (createShaderModule(dxvk_resolve_frag_f)), m_shaderFragU (createShaderModule(dxvk_resolve_frag_u)), m_shaderFragI (createShaderModule(dxvk_resolve_frag_i)), m_shaderFragD (createShaderModule(dxvk_resolve_frag_d)) { diff --git a/src/dxvk/dxvk_pipelayout.cpp b/src/dxvk/dxvk_pipelayout.cpp index cf3f69553..e3da3e1b4 100644 --- a/src/dxvk/dxvk_pipelayout.cpp +++ b/src/dxvk/dxvk_pipelayout.cpp @@ -37,7 +37,9 @@ namespace dxvk { && viewType == other.viewType && stage == other.stage && access == other.access - && uboSet == other.uboSet; + && accessOp == other.accessOp + && uboSet == other.uboSet + && isMultisampled == other.isMultisampled; } @@ -48,7 +50,9 @@ namespace dxvk { hash.add(uint32_t(viewType)); hash.add(uint32_t(stage)); hash.add(access); + hash.add(uint32_t(accessOp)); hash.add(uint32_t(uboSet)); + hash.add(uint32_t(isMultisampled)); return hash; } diff --git a/src/dxvk/dxvk_pipelayout.h b/src/dxvk/dxvk_pipelayout.h index 5a3beacfa..fc5485e5c 100644 --- a/src/dxvk/dxvk_pipelayout.h +++ b/src/dxvk/dxvk_pipelayout.h @@ -50,13 +50,14 @@ namespace dxvk { * a given shader, or for the whole pipeline. */ struct DxvkBindingInfo { - VkDescriptorType descriptorType; ///< Vulkan descriptor type - uint32_t resourceBinding; ///< API binding slot for the resource - VkImageViewType viewType; ///< Image view type - VkShaderStageFlagBits stage; ///< Shader stage - VkAccessFlags access; ///< Access mask for the resource - VkBool32 uboSet; ///< Whether to include this in the UBO set - DxvkAccessOp accessOp; ///< Order-invariant store type, if any + VkDescriptorType descriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM; ///< Vulkan descriptor type + uint32_t resourceBinding = 0u; ///< API binding slot for the resource + VkImageViewType viewType = VK_IMAGE_VIEW_TYPE_MAX_ENUM; ///< Image view type + VkShaderStageFlagBits stage = VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM; ///< Shader stage + VkAccessFlags access = 0u; ///< Access mask for the resource + DxvkAccessOp accessOp = DxvkAccessOp::None; ///< Order-invariant store type, if any + bool uboSet = false; ///< Whether to include this in the UBO set + bool isMultisampled = false; ///< Multisampled binding /** * \brief Computes descriptor set index for the given binding diff --git a/src/dxvk/dxvk_sparse.h b/src/dxvk/dxvk_sparse.h index e9f6f919f..6897ea9dd 100644 --- a/src/dxvk/dxvk_sparse.h +++ b/src/dxvk/dxvk_sparse.h @@ -535,6 +535,16 @@ namespace dxvk { return Rc::unsafeCreate(this); } + /** + * \brief Queries tracking ID + * + * Used to determine when a resource has last been used. + * \returns Tracking ID + */ + uint64_t getTrackId() const { + return m_trackId >> 1u; + } + /** * \brief Sets tracked command list ID * diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index 9b2b07356..b6222ddd3 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -50,7 +50,6 @@ dxvk_shaders = files([ 'shaders/dxvk_resolve_frag_d.frag', 'shaders/dxvk_resolve_frag_ds.frag', 'shaders/dxvk_resolve_frag_f.frag', - 'shaders/dxvk_resolve_frag_f_amd.frag', 'shaders/dxvk_resolve_frag_i.frag', 'shaders/dxvk_resolve_frag_u.frag', @@ -89,6 +88,7 @@ dxvk_src = [ 'dxvk_gpu_query.cpp', 'dxvk_graphics.cpp', 'dxvk_image.cpp', + 'dxvk_implicit_resolve.cpp', 'dxvk_instance.cpp', 'dxvk_latency_builtin.cpp', 'dxvk_latency_reflex.cpp', diff --git a/src/dxvk/shaders/dxvk_resolve_common.glsl b/src/dxvk/shaders/dxvk_resolve_common.glsl new file mode 100644 index 000000000..3183c9337 --- /dev/null +++ b/src/dxvk/shaders/dxvk_resolve_common.glsl @@ -0,0 +1,35 @@ +#define VK_RESOLVE_MODE_NONE (0) +#define VK_RESOLVE_MODE_SAMPLE_ZERO_BIT (1 << 0) +#define VK_RESOLVE_MODE_AVERAGE_BIT (1 << 1) +#define VK_RESOLVE_MODE_MIN_BIT (1 << 2) +#define VK_RESOLVE_MODE_MAX_BIT (1 << 3) + +#define resolve_fn(name, type, load_fn) \ +type name(ivec3 coord, int samples, uint mode) { \ + if (mode == VK_RESOLVE_MODE_NONE) \ + return type(0); \ + type value = load_fn(coord, 0); \ + \ + switch (mode) { \ + case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: \ + return value; \ + \ + case VK_RESOLVE_MODE_AVERAGE_BIT: \ + for (int i = 1; i < samples; i++) \ + value += load_fn(coord, i); \ + value /= type(c_samples); \ + break; \ + \ + case VK_RESOLVE_MODE_MIN_BIT: \ + for (int i = 1; i < samples; i++) \ + value = min(value, load_fn(coord, i)); \ + break; \ + \ + case VK_RESOLVE_MODE_MAX_BIT: \ + for (int i = 1; i < c_samples; i++) \ + value = min(value, load_fn(coord, i)); \ + break; \ + } \ + \ + return value; \ +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_d.frag b/src/dxvk/shaders/dxvk_resolve_frag_d.frag index 70152d014..a0b91ec6d 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_d.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_d.frag @@ -1,56 +1,28 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_EXT_samplerless_texture_functions : enable -#define VK_RESOLVE_MODE_NONE (0) -#define VK_RESOLVE_MODE_SAMPLE_ZERO_BIT (1 << 0) -#define VK_RESOLVE_MODE_AVERAGE_BIT (1 << 1) -#define VK_RESOLVE_MODE_MIN_BIT (1 << 2) -#define VK_RESOLVE_MODE_MAX_BIT (1 << 3) +#include "dxvk_resolve_common.glsl" layout(constant_id = 0) const int c_samples = 1; -layout(constant_id = 1) const int c_mode_d = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; +layout(constant_id = 1) const int c_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; layout(binding = 0) uniform texture2DMSArray s_depth; +float load_depth(ivec3 coord, int s) { + return texelFetch(s_depth, coord, s).r; +} + +resolve_fn(resolve_depth, float, load_depth) + layout(push_constant) uniform u_info_t { ivec2 offset; } u_info; -float resolve_depth(ivec3 coord) { - float depth = 0.0f; - - switch (c_mode_d) { - case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: - depth = texelFetch(s_depth, coord, 0).r; - break; - - case VK_RESOLVE_MODE_AVERAGE_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth += texelFetch(s_depth, coord, i).r; - depth /= float(c_samples); - break; - - case VK_RESOLVE_MODE_MIN_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth = min(depth, texelFetch(s_depth, coord, i).r); - break; - - case VK_RESOLVE_MODE_MAX_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth = max(depth, texelFetch(s_depth, coord, i).r); - break; - } - - return depth; -} - void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - gl_FragDepth = resolve_depth(coord); -} \ No newline at end of file + gl_FragDepth = resolve_depth(coord, c_samples, c_mode); +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_ds.frag b/src/dxvk/shaders/dxvk_resolve_frag_ds.frag index b92a03d71..a7bda7c93 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_ds.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_ds.frag @@ -1,13 +1,10 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_ARB_shader_stencil_export : enable #extension GL_EXT_samplerless_texture_functions : enable -#define VK_RESOLVE_MODE_NONE (0) -#define VK_RESOLVE_MODE_SAMPLE_ZERO_BIT (1 << 0) -#define VK_RESOLVE_MODE_AVERAGE_BIT (1 << 1) -#define VK_RESOLVE_MODE_MIN_BIT (1 << 2) -#define VK_RESOLVE_MODE_MAX_BIT (1 << 3) +#include "dxvk_resolve_common.glsl" layout(constant_id = 0) const int c_samples = 1; layout(constant_id = 1) const int c_mode_d = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; @@ -16,69 +13,25 @@ layout(constant_id = 2) const int c_mode_s = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; layout(binding = 0) uniform texture2DMSArray s_depth; layout(binding = 1) uniform utexture2DMSArray s_stencil; +float load_depth(ivec3 coord, int s) { + return texelFetch(s_depth, coord, s).r; +} + +uint load_stencil(ivec3 coord, int s) { + return texelFetch(s_stencil, coord, s).r; +} + +resolve_fn(resolve_depth, float, load_depth) +resolve_fn(resolve_stencil, uint, load_stencil) + layout(push_constant) uniform u_info_t { ivec2 offset; } u_info; -float resolve_depth(ivec3 coord) { - float depth = 0.0f; - - switch (c_mode_d) { - case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: - depth = texelFetch(s_depth, coord, 0).r; - break; - - case VK_RESOLVE_MODE_AVERAGE_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth += texelFetch(s_depth, coord, i).r; - depth /= float(c_samples); - break; - - case VK_RESOLVE_MODE_MIN_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth = min(depth, texelFetch(s_depth, coord, i).r); - break; - - case VK_RESOLVE_MODE_MAX_BIT: - depth = texelFetch(s_depth, coord, 0).r; - for (int i = 1; i < c_samples; i++) - depth = max(depth, texelFetch(s_depth, coord, i).r); - break; - } - - return depth; -} - -int resolve_stencil(ivec3 coord) { - uint stencil = 0u; - - switch (c_mode_s) { - case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT: - stencil = texelFetch(s_stencil, coord, 0).r; - break; - - case VK_RESOLVE_MODE_MIN_BIT: - stencil = texelFetch(s_stencil, coord, 0).r; - for (int i = 1; i < c_samples; i++) - stencil = min(stencil, texelFetch(s_stencil, coord, i).r); - break; - - case VK_RESOLVE_MODE_MAX_BIT: - stencil = texelFetch(s_stencil, coord, 0).r; - for (int i = 1; i < c_samples; i++) - stencil = max(stencil, texelFetch(s_stencil, coord, i).r); - break; - } - - return int(stencil); -} - void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - gl_FragDepth = resolve_depth(coord); - gl_FragStencilRefARB = resolve_stencil(coord); -} \ No newline at end of file + gl_FragDepth = resolve_depth(coord, c_samples, c_mode_d); + gl_FragStencilRefARB = int(resolve_stencil(coord, c_samples, c_mode_s)); +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_f.frag b/src/dxvk/shaders/dxvk_resolve_frag_f.frag index 2c7442c14..8fbd642de 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_f.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_f.frag @@ -1,13 +1,23 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_EXT_samplerless_texture_functions : enable +#include "dxvk_resolve_common.glsl" + layout(constant_id = 0) const int c_samples = 1; +layout(constant_id = 1) const int c_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; layout(binding = 0) uniform texture2DMSArray s_image; layout(location = 0) out vec4 o_color; +vec4 load_color(ivec3 coord, int s) { + return texelFetch(s_image, coord, s); +} + +resolve_fn(resolve_color, vec4, load_color) + layout(push_constant) uniform u_info_t { ivec2 offset; @@ -15,8 +25,5 @@ uniform u_info_t { void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - vec4 color = vec4(0.0f); - for (int i = 0; i < c_samples; i++) - color += texelFetch(s_image, coord, i); - o_color = color / float(c_samples); -} \ No newline at end of file + o_color = resolve_color(coord, c_samples, c_mode); +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_f_amd.frag b/src/dxvk/shaders/dxvk_resolve_frag_f_amd.frag deleted file mode 100644 index a550e3507..000000000 --- a/src/dxvk/shaders/dxvk_resolve_frag_f_amd.frag +++ /dev/null @@ -1,56 +0,0 @@ -#version 450 - -#extension GL_EXT_samplerless_texture_functions : enable -#extension GL_EXT_spirv_intrinsics : enable - -// GL_AMD_shader_fragment_mask was never updated to support -// sampler-less functions, so we have to define these manually -spirv_instruction(extensions = ["SPV_AMD_shader_fragment_mask"], capabilities = [5010], id = 5011) -uint fragment_mask_fetch(texture2DMSArray tex, ivec3 coord); - -spirv_instruction(extensions = ["SPV_AMD_shader_fragment_mask"], capabilities = [5010], id = 5012) -vec4 fragment_fetch(texture2DMSArray tex, ivec3 coord, uint index); - -layout(constant_id = 0) const int c_samples = 1; - -layout(set = 0, binding = 0) -uniform texture2DMSArray s_image; - -layout(location = 0) out vec4 o_color; - -layout(push_constant) -uniform u_info_t { - ivec2 offset; -} u_info; - -void main() { - ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - - // get a four-bit fragment index for each sample - uint fragMask = fragment_mask_fetch(s_image, coord); - - // count number of occurences of each fragment - // index in one four-bit counter for each sample - uint fragCount = 0u; - - for (int i = 0; i < 4 * c_samples; i += 4) { - uint fragIndex = bitfieldExtract(fragMask, i, 4); - fragCount += 1u << (fragIndex << 2); - } - - // perform necessary texture lookups to compute - // final fragment color - o_color = vec4(0.0f); - - while (fragCount != 0) { - int fragIndex = findLSB(fragCount) >> 2; - int fragShift = fragIndex << 2; - - o_color += fragment_fetch(s_image, coord, fragIndex) - * float(bitfieldExtract(fragCount, fragShift, 4)); - - fragCount = bitfieldInsert(fragCount, 0, fragShift, 4); - } - - o_color /= float(c_samples); -} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_i.frag b/src/dxvk/shaders/dxvk_resolve_frag_i.frag index a721897a5..094954457 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_i.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_i.frag @@ -1,11 +1,23 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_EXT_samplerless_texture_functions : enable +#include "dxvk_resolve_common.glsl" + +layout(constant_id = 0) const int c_samples = 1; +layout(constant_id = 1) const int c_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + layout(binding = 0) uniform itexture2DMSArray s_image; layout(location = 0) out ivec4 o_color; +ivec4 load_color(ivec3 coord, int s) { + return texelFetch(s_image, coord, s); +} + +resolve_fn(resolve_color, ivec4, load_color) + layout(push_constant) uniform u_info_t { ivec2 offset; @@ -13,5 +25,5 @@ uniform u_info_t { void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - o_color = texelFetch(s_image, coord, 0); -} \ No newline at end of file + o_color = resolve_color(coord, c_samples, c_mode); +} diff --git a/src/dxvk/shaders/dxvk_resolve_frag_u.frag b/src/dxvk/shaders/dxvk_resolve_frag_u.frag index f7b4e73da..ab8b3d18a 100644 --- a/src/dxvk/shaders/dxvk_resolve_frag_u.frag +++ b/src/dxvk/shaders/dxvk_resolve_frag_u.frag @@ -1,10 +1,22 @@ #version 450 +#extension GL_GOOGLE_include_directive : enable #extension GL_EXT_samplerless_texture_functions : enable +#include "dxvk_resolve_common.glsl" + +layout(constant_id = 0) const int c_samples = 1; +layout(constant_id = 1) const int c_mode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + layout(binding = 0) uniform utexture2DMSArray s_image; -layout(location = 0) out uvec4 o_color; +layout(location = 0) out vec4 o_color; + +uvec4 load_color(ivec3 coord, int s) { + return texelFetch(s_image, coord, s); +} + +resolve_fn(resolve_color, uvec4, load_color) layout(push_constant) uniform u_info_t { @@ -13,5 +25,5 @@ uniform u_info_t { void main() { ivec3 coord = ivec3(gl_FragCoord.xy + u_info.offset, gl_Layer); - o_color = texelFetch(s_image, coord, 0); -} \ No newline at end of file + o_color = resolve_color(coord, c_samples, c_mode); +}