[dxvk] Optimize GPU start measurement for low-latency frame pacing

In practice, this change affects oversubscribed threading situations where waking up the "dxvk-queue" thread potentially can cause delays in the 100s of microseconds. For a lot of situations this change isn't affecting measurements in a meaningful way. Possibly affects AMD where vkQueueSubmit execution time is non-zero.
This commit is contained in:
netborg 2025-02-23 15:09:59 +01:00
parent c802bdf42e
commit 869cf25f7f
2 changed files with 7 additions and 10 deletions

View file

@ -54,8 +54,10 @@ namespace dxvk {
} }
// be consistent that every frame has a gpuReady event from the previous frame // be consistent that every frame has a gpuReady event from the previous frame
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(DXGI_MAX_SWAP_CHAIN_BUFFERS+1); uint64_t firstFrameId = DXGI_MAX_SWAP_CHAIN_BUFFERS+1;
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(firstFrameId);
m->gpuReady.push_back(high_resolution_clock::now()); m->gpuReady.push_back(high_resolution_clock::now());
m_gpuStarts[ firstFrameId % m_gpuStarts.size() ] = gpuReadyBit;
} }

View file

@ -34,13 +34,13 @@ namespace dxvk {
// potentially wait some more if the cpu gets too much ahead // potentially wait some more if the cpu gets too much ahead
m_mode->startFrame(frameId); m_mode->startFrame(frameId);
m_latencyMarkersStorage.registerFrameStart(frameId); m_latencyMarkersStorage.registerFrameStart(frameId);
m_gpuStarts[ frameId % m_gpuStarts.size() ].store(0);
} }
void notifyGpuPresentEnd( uint64_t frameId ) override { void notifyGpuPresentEnd( uint64_t frameId ) override {
// the frame has been displayed to the screen // the frame has been displayed to the screen
m_latencyMarkersStorage.registerFrameEnd(frameId); m_latencyMarkersStorage.registerFrameEnd(frameId);
m_mode->endFrame(frameId); m_mode->endFrame(frameId);
m_gpuStarts[ (frameId-1) % m_gpuStarts.size() ].store(0);
} }
void notifyCsRenderBegin( uint64_t frameId ) override { void notifyCsRenderBegin( uint64_t frameId ) override {
@ -95,12 +95,6 @@ namespace dxvk {
} }
} }
void notifyGpuExecutionBegin( uint64_t frameId ) override {
assert( frameId == m_lastFinishedFrameId+1 );
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
gpuExecutionCheckGpuStart(frameId, m, high_resolution_clock::now());
}
void notifyGpuExecutionEnd( uint64_t frameId ) override { void notifyGpuExecutionEnd( uint64_t frameId ) override {
auto now = high_resolution_clock::now(); auto now = high_resolution_clock::now();
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1); LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
@ -120,7 +114,7 @@ namespace dxvk {
next->gpuReady.clear(); next->gpuReady.clear();
next->gpuReady.push_back(now); next->gpuReady.push_back(now);
gpuExecutionCheckGpuStart(frameId, m, now); gpuExecutionCheckGpuStart(frameId+1, next, now);
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId); m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
m_mode->finishRender(frameId); m_mode->finishRender(frameId);
@ -149,6 +143,7 @@ namespace dxvk {
void notifyCpuPresentBegin( uint64_t frameId) override { } void notifyCpuPresentBegin( uint64_t frameId) override { }
void notifyCpuPresentEnd( uint64_t frameId ) override { } void notifyCpuPresentEnd( uint64_t frameId ) override { }
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { } void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
void notifyGpuExecutionBegin( uint64_t frameId ) override { }
void discardTimings() override { } void discardTimings() override { }
DxvkLatencyStats getStatistics( uint64_t frameId ) override DxvkLatencyStats getStatistics( uint64_t frameId ) override
{ return DxvkLatencyStats(); } { return DxvkLatencyStats(); }
@ -181,7 +176,7 @@ namespace dxvk {
uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS }; uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS }; uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
std::array< std::atomic< uint16_t >, 16 > m_gpuStarts = { }; std::array< std::atomic< uint16_t >, 8 > m_gpuStarts = { };
static constexpr uint16_t queueSubmitBit = 1; static constexpr uint16_t queueSubmitBit = 1;
static constexpr uint16_t gpuReadyBit = 2; static constexpr uint16_t gpuReadyBit = 2;