[dxvk] Optimize flush heuristic for low-latency

Possibly can be optimized more, but just changing these numbers already had a huge effect, especially for games having a small number of submissions to begin with.
This commit is contained in:
netborg 2025-02-28 08:05:46 +01:00
parent bee72c27c8
commit 0d018451fd
4 changed files with 19 additions and 9 deletions

View file

@ -2,6 +2,7 @@
#include "dxvk_framepacer_mode_low_latency.h" #include "dxvk_framepacer_mode_low_latency.h"
#include "dxvk_framepacer_mode_min_latency.h" #include "dxvk_framepacer_mode_min_latency.h"
#include "dxvk_options.h" #include "dxvk_options.h"
#include "../../util/util_flush.h"
#include "../../util/util_env.h" #include "../../util/util_env.h"
#include "../../util/log/log.h" #include "../../util/log/log.h"
@ -40,11 +41,15 @@ namespace dxvk {
case FramePacerMode::LOW_LATENCY: case FramePacerMode::LOW_LATENCY:
Logger::info( "Frame pace: low-latency" ); Logger::info( "Frame pace: low-latency" );
GpuFlushTracker::m_minPendingSubmissions = 1;
GpuFlushTracker::m_minChunkCount = 1;
m_mode = std::make_unique<LowLatencyMode>(mode, &m_latencyMarkersStorage, options); m_mode = std::make_unique<LowLatencyMode>(mode, &m_latencyMarkersStorage, options);
break; break;
case FramePacerMode::MIN_LATENCY: case FramePacerMode::MIN_LATENCY:
Logger::info( "Frame pace: min-latency" ); Logger::info( "Frame pace: min-latency" );
GpuFlushTracker::m_minPendingSubmissions = 1;
GpuFlushTracker::m_minChunkCount = 1;
m_mode = std::make_unique<MinLatencyMode>(mode, &m_latencyMarkersStorage); m_mode = std::make_unique<MinLatencyMode>(mode, &m_latencyMarkersStorage);
break; break;
} }

View file

@ -136,7 +136,7 @@ namespace dxvk {
// not implemented methods // not implemented methods
void notifyCpuPresentBegin( uint64_t frameId) override { } void notifyCpuPresentBegin( uint64_t frameId ) override { }
void notifyCpuPresentEnd( uint64_t frameId ) override { } void notifyCpuPresentEnd( uint64_t frameId ) override { }
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { } void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
void notifyGpuExecutionBegin( uint64_t frameId ) override { } void notifyGpuExecutionBegin( uint64_t frameId ) override { }

View file

@ -2,6 +2,10 @@
namespace dxvk { namespace dxvk {
std::atomic<uint32_t> GpuFlushTracker::m_minPendingSubmissions = { 2 };
std::atomic<uint32_t> GpuFlushTracker::m_minChunkCount = { 3 };
std::atomic<uint32_t> GpuFlushTracker::m_maxChunkCount = { 20 };
GpuFlushTracker::GpuFlushTracker(GpuFlushType maxType) GpuFlushTracker::GpuFlushTracker(GpuFlushType maxType)
: m_maxType(maxType) { : m_maxType(maxType) {
@ -11,10 +15,6 @@ namespace dxvk {
GpuFlushType flushType, GpuFlushType flushType,
uint64_t chunkId, uint64_t chunkId,
uint32_t lastCompleteSubmissionId) { uint32_t lastCompleteSubmissionId) {
constexpr uint32_t minPendingSubmissions = 2;
constexpr uint32_t minChunkCount = 3u;
constexpr uint32_t maxChunkCount = 20u;
// Do not flush if there is nothing to flush // Do not flush if there is nothing to flush
uint32_t chunkCount = uint32_t(chunkId - m_lastFlushChunkId); uint32_t chunkCount = uint32_t(chunkId - m_lastFlushChunkId);
@ -42,14 +42,14 @@ namespace dxvk {
case GpuFlushType::ImplicitStrongHint: { case GpuFlushType::ImplicitStrongHint: {
// Flush aggressively with a strong hint to reduce readback latency. // Flush aggressively with a strong hint to reduce readback latency.
return chunkCount >= minChunkCount; return chunkCount >= m_minChunkCount;
} }
case GpuFlushType::ImplicitMediumHint: case GpuFlushType::ImplicitMediumHint:
case GpuFlushType::ImplicitWeakHint: { case GpuFlushType::ImplicitWeakHint: {
// Aim for a higher number of chunks per submission with // Aim for a higher number of chunks per submission with
// a weak hint in order to avoid submitting too often. // a weak hint in order to avoid submitting too often.
if (chunkCount < 2 * minChunkCount) if (chunkCount < 2 * m_minChunkCount)
return false; return false;
// Actual heuristic is shared with synchronization commands // Actual heuristic is shared with synchronization commands
@ -60,13 +60,13 @@ namespace dxvk {
// required if the application is spinning on a query or resource. // required if the application is spinning on a query or resource.
uint32_t pendingSubmissions = uint32_t(m_lastFlushSubmissionId - lastCompleteSubmissionId); uint32_t pendingSubmissions = uint32_t(m_lastFlushSubmissionId - lastCompleteSubmissionId);
if (pendingSubmissions < minPendingSubmissions) if (pendingSubmissions < m_minPendingSubmissions)
return true; return true;
// Use the number of pending submissions to decide whether to flush. Other // Use the number of pending submissions to decide whether to flush. Other
// than ignoring the minimum chunk count condition, we should treat this // than ignoring the minimum chunk count condition, we should treat this
// the same as weak hints to avoid unnecessary synchronization. // the same as weak hints to avoid unnecessary synchronization.
uint32_t threshold = std::min(maxChunkCount, pendingSubmissions * minChunkCount); uint32_t threshold = std::min(m_maxChunkCount.load(), pendingSubmissions * m_minChunkCount.load());
return chunkCount >= threshold; return chunkCount >= threshold;
} }
} }

View file

@ -3,6 +3,7 @@
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
#include <atomic>
namespace dxvk { namespace dxvk {
@ -64,6 +65,10 @@ namespace dxvk {
uint64_t chunkId, uint64_t chunkId,
uint64_t submissionId); uint64_t submissionId);
static std::atomic<uint32_t> m_minPendingSubmissions;
static std::atomic<uint32_t> m_minChunkCount;
static std::atomic<uint32_t> m_maxChunkCount;
private: private:
GpuFlushType m_maxType = GpuFlushType::ImplicitWeakHint; GpuFlushType m_maxType = GpuFlushType::ImplicitWeakHint;