mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-06 20:58:37 +01:00
[dxvk] Optimize GPU start measurement for low-latency frame pacing
In practice, this change affects oversubscribed threading situations where waking up the "dxvk-queue" thread potentially can cause delays in the 100s of microseconds. For a lot of situations this change isn't affecting measurements in a meaningful way. Possibly affects AMD where vkQueueSubmit execution time is non-zero.
This commit is contained in:
parent
c802bdf42e
commit
869cf25f7f
2 changed files with 7 additions and 10 deletions
|
@ -54,8 +54,10 @@ namespace dxvk {
|
|||
}
|
||||
|
||||
// be consistent that every frame has a gpuReady event from the previous frame
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(DXGI_MAX_SWAP_CHAIN_BUFFERS+1);
|
||||
uint64_t firstFrameId = DXGI_MAX_SWAP_CHAIN_BUFFERS+1;
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(firstFrameId);
|
||||
m->gpuReady.push_back(high_resolution_clock::now());
|
||||
m_gpuStarts[ firstFrameId % m_gpuStarts.size() ] = gpuReadyBit;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -34,13 +34,13 @@ namespace dxvk {
|
|||
// potentially wait some more if the cpu gets too much ahead
|
||||
m_mode->startFrame(frameId);
|
||||
m_latencyMarkersStorage.registerFrameStart(frameId);
|
||||
m_gpuStarts[ frameId % m_gpuStarts.size() ].store(0);
|
||||
}
|
||||
|
||||
void notifyGpuPresentEnd( uint64_t frameId ) override {
|
||||
// the frame has been displayed to the screen
|
||||
m_latencyMarkersStorage.registerFrameEnd(frameId);
|
||||
m_mode->endFrame(frameId);
|
||||
m_gpuStarts[ (frameId-1) % m_gpuStarts.size() ].store(0);
|
||||
}
|
||||
|
||||
void notifyCsRenderBegin( uint64_t frameId ) override {
|
||||
|
@ -95,12 +95,6 @@ namespace dxvk {
|
|||
}
|
||||
}
|
||||
|
||||
void notifyGpuExecutionBegin( uint64_t frameId ) override {
|
||||
assert( frameId == m_lastFinishedFrameId+1 );
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
||||
gpuExecutionCheckGpuStart(frameId, m, high_resolution_clock::now());
|
||||
}
|
||||
|
||||
void notifyGpuExecutionEnd( uint64_t frameId ) override {
|
||||
auto now = high_resolution_clock::now();
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
||||
|
@ -120,7 +114,7 @@ namespace dxvk {
|
|||
next->gpuReady.clear();
|
||||
next->gpuReady.push_back(now);
|
||||
|
||||
gpuExecutionCheckGpuStart(frameId, m, now);
|
||||
gpuExecutionCheckGpuStart(frameId+1, next, now);
|
||||
|
||||
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
|
||||
m_mode->finishRender(frameId);
|
||||
|
@ -149,6 +143,7 @@ namespace dxvk {
|
|||
void notifyCpuPresentBegin( uint64_t frameId) override { }
|
||||
void notifyCpuPresentEnd( uint64_t frameId ) override { }
|
||||
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
|
||||
void notifyGpuExecutionBegin( uint64_t frameId ) override { }
|
||||
void discardTimings() override { }
|
||||
DxvkLatencyStats getStatistics( uint64_t frameId ) override
|
||||
{ return DxvkLatencyStats(); }
|
||||
|
@ -181,7 +176,7 @@ namespace dxvk {
|
|||
uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
|
||||
std::array< std::atomic< uint16_t >, 16 > m_gpuStarts = { };
|
||||
std::array< std::atomic< uint16_t >, 8 > m_gpuStarts = { };
|
||||
static constexpr uint16_t queueSubmitBit = 1;
|
||||
static constexpr uint16_t gpuReadyBit = 2;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue