[dxvk] Optimize GPU start measurement for low-latency frame pacing

In practice, this change affects oversubscribed threading situations where waking up the "dxvk-queue" thread potentially can cause delays in the 100s of microseconds. For a lot of situations this change isn't affecting measurements in a meaningful way. Possibly affects AMD where vkQueueSubmit execution time is non-zero.
This commit is contained in:
netborg 2025-02-23 15:09:59 +01:00
parent c802bdf42e
commit 869cf25f7f
2 changed files with 7 additions and 10 deletions

View file

@ -54,8 +54,10 @@ namespace dxvk {
}
// be consistent that every frame has a gpuReady event from the previous frame
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(DXGI_MAX_SWAP_CHAIN_BUFFERS+1);
uint64_t firstFrameId = DXGI_MAX_SWAP_CHAIN_BUFFERS+1;
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(firstFrameId);
m->gpuReady.push_back(high_resolution_clock::now());
m_gpuStarts[ firstFrameId % m_gpuStarts.size() ] = gpuReadyBit;
}

View file

@ -34,13 +34,13 @@ namespace dxvk {
// potentially wait some more if the cpu gets too much ahead
m_mode->startFrame(frameId);
m_latencyMarkersStorage.registerFrameStart(frameId);
m_gpuStarts[ frameId % m_gpuStarts.size() ].store(0);
}
void notifyGpuPresentEnd( uint64_t frameId ) override {
// the frame has been displayed to the screen
m_latencyMarkersStorage.registerFrameEnd(frameId);
m_mode->endFrame(frameId);
m_gpuStarts[ (frameId-1) % m_gpuStarts.size() ].store(0);
}
void notifyCsRenderBegin( uint64_t frameId ) override {
@ -95,12 +95,6 @@ namespace dxvk {
}
}
void notifyGpuExecutionBegin( uint64_t frameId ) override {
assert( frameId == m_lastFinishedFrameId+1 );
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
gpuExecutionCheckGpuStart(frameId, m, high_resolution_clock::now());
}
void notifyGpuExecutionEnd( uint64_t frameId ) override {
auto now = high_resolution_clock::now();
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
@ -120,7 +114,7 @@ namespace dxvk {
next->gpuReady.clear();
next->gpuReady.push_back(now);
gpuExecutionCheckGpuStart(frameId, m, now);
gpuExecutionCheckGpuStart(frameId+1, next, now);
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
m_mode->finishRender(frameId);
@ -149,6 +143,7 @@ namespace dxvk {
void notifyCpuPresentBegin( uint64_t frameId) override { }
void notifyCpuPresentEnd( uint64_t frameId ) override { }
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
void notifyGpuExecutionBegin( uint64_t frameId ) override { }
void discardTimings() override { }
DxvkLatencyStats getStatistics( uint64_t frameId ) override
{ return DxvkLatencyStats(); }
@ -181,7 +176,7 @@ namespace dxvk {
uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
std::array< std::atomic< uint16_t >, 16 > m_gpuStarts = { };
std::array< std::atomic< uint16_t >, 8 > m_gpuStarts = { };
static constexpr uint16_t queueSubmitBit = 1;
static constexpr uint16_t gpuReadyBit = 2;