[dxvk] Optimize flush heuristic for low-latency

Possibly can be optimized more, but just changing these numbers already had a huge effect, especially for games having a small number of submissions to begin with.
This commit is contained in:
netborg 2025-02-28 08:05:46 +01:00
parent bee72c27c8
commit 0d018451fd
4 changed files with 19 additions and 9 deletions

View file

@ -2,6 +2,7 @@
#include "dxvk_framepacer_mode_low_latency.h"
#include "dxvk_framepacer_mode_min_latency.h"
#include "dxvk_options.h"
#include "../../util/util_flush.h"
#include "../../util/util_env.h"
#include "../../util/log/log.h"
@ -40,11 +41,15 @@ namespace dxvk {
case FramePacerMode::LOW_LATENCY:
Logger::info( "Frame pace: low-latency" );
GpuFlushTracker::m_minPendingSubmissions = 1;
GpuFlushTracker::m_minChunkCount = 1;
m_mode = std::make_unique<LowLatencyMode>(mode, &m_latencyMarkersStorage, options);
break;
case FramePacerMode::MIN_LATENCY:
Logger::info( "Frame pace: min-latency" );
GpuFlushTracker::m_minPendingSubmissions = 1;
GpuFlushTracker::m_minChunkCount = 1;
m_mode = std::make_unique<MinLatencyMode>(mode, &m_latencyMarkersStorage);
break;
}

View file

@ -136,7 +136,7 @@ namespace dxvk {
// not implemented methods
void notifyCpuPresentBegin( uint64_t frameId) override { }
void notifyCpuPresentBegin( uint64_t frameId ) override { }
void notifyCpuPresentEnd( uint64_t frameId ) override { }
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
void notifyGpuExecutionBegin( uint64_t frameId ) override { }

View file

@ -2,6 +2,10 @@
namespace dxvk {
std::atomic<uint32_t> GpuFlushTracker::m_minPendingSubmissions = { 2 };
std::atomic<uint32_t> GpuFlushTracker::m_minChunkCount = { 3 };
std::atomic<uint32_t> GpuFlushTracker::m_maxChunkCount = { 20 };
GpuFlushTracker::GpuFlushTracker(GpuFlushType maxType)
: m_maxType(maxType) {
@ -11,10 +15,6 @@ namespace dxvk {
GpuFlushType flushType,
uint64_t chunkId,
uint32_t lastCompleteSubmissionId) {
constexpr uint32_t minPendingSubmissions = 2;
constexpr uint32_t minChunkCount = 3u;
constexpr uint32_t maxChunkCount = 20u;
// Do not flush if there is nothing to flush
uint32_t chunkCount = uint32_t(chunkId - m_lastFlushChunkId);
@ -42,14 +42,14 @@ namespace dxvk {
case GpuFlushType::ImplicitStrongHint: {
// Flush aggressively with a strong hint to reduce readback latency.
return chunkCount >= minChunkCount;
return chunkCount >= m_minChunkCount;
}
case GpuFlushType::ImplicitMediumHint:
case GpuFlushType::ImplicitWeakHint: {
// Aim for a higher number of chunks per submission with
// a weak hint in order to avoid submitting too often.
if (chunkCount < 2 * minChunkCount)
if (chunkCount < 2 * m_minChunkCount)
return false;
// Actual heuristic is shared with synchronization commands
@ -60,13 +60,13 @@ namespace dxvk {
// required if the application is spinning on a query or resource.
uint32_t pendingSubmissions = uint32_t(m_lastFlushSubmissionId - lastCompleteSubmissionId);
if (pendingSubmissions < minPendingSubmissions)
if (pendingSubmissions < m_minPendingSubmissions)
return true;
// Use the number of pending submissions to decide whether to flush. Other
// than ignoring the minimum chunk count condition, we should treat this
// the same as weak hints to avoid unnecessary synchronization.
uint32_t threshold = std::min(maxChunkCount, pendingSubmissions * minChunkCount);
uint32_t threshold = std::min(m_maxChunkCount.load(), pendingSubmissions * m_minChunkCount.load());
return chunkCount >= threshold;
}
}

View file

@ -3,6 +3,7 @@
#include <cstddef>
#include <cstdint>
#include <vector>
#include <atomic>
namespace dxvk {
@ -64,6 +65,10 @@ namespace dxvk {
uint64_t chunkId,
uint64_t submissionId);
static std::atomic<uint32_t> m_minPendingSubmissions;
static std::atomic<uint32_t> m_minChunkCount;
static std::atomic<uint32_t> m_maxChunkCount;
private:
GpuFlushType m_maxType = GpuFlushType::ImplicitWeakHint;