mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-06 20:58:37 +01:00
[dxvk] Optimize GPU start measurement for low-latency frame pacing
In practice, this change affects oversubscribed threading situations where waking up the "dxvk-queue" thread potentially can cause delays in the 100s of microseconds. For a lot of situations this change isn't affecting measurements in a meaningful way. Possibly affects AMD where vkQueueSubmit execution time is non-zero.
This commit is contained in:
parent
c802bdf42e
commit
869cf25f7f
2 changed files with 7 additions and 10 deletions
|
@ -54,8 +54,10 @@ namespace dxvk {
|
||||||
}
|
}
|
||||||
|
|
||||||
// be consistent that every frame has a gpuReady event from the previous frame
|
// be consistent that every frame has a gpuReady event from the previous frame
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(DXGI_MAX_SWAP_CHAIN_BUFFERS+1);
|
uint64_t firstFrameId = DXGI_MAX_SWAP_CHAIN_BUFFERS+1;
|
||||||
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(firstFrameId);
|
||||||
m->gpuReady.push_back(high_resolution_clock::now());
|
m->gpuReady.push_back(high_resolution_clock::now());
|
||||||
|
m_gpuStarts[ firstFrameId % m_gpuStarts.size() ] = gpuReadyBit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -34,13 +34,13 @@ namespace dxvk {
|
||||||
// potentially wait some more if the cpu gets too much ahead
|
// potentially wait some more if the cpu gets too much ahead
|
||||||
m_mode->startFrame(frameId);
|
m_mode->startFrame(frameId);
|
||||||
m_latencyMarkersStorage.registerFrameStart(frameId);
|
m_latencyMarkersStorage.registerFrameStart(frameId);
|
||||||
m_gpuStarts[ frameId % m_gpuStarts.size() ].store(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void notifyGpuPresentEnd( uint64_t frameId ) override {
|
void notifyGpuPresentEnd( uint64_t frameId ) override {
|
||||||
// the frame has been displayed to the screen
|
// the frame has been displayed to the screen
|
||||||
m_latencyMarkersStorage.registerFrameEnd(frameId);
|
m_latencyMarkersStorage.registerFrameEnd(frameId);
|
||||||
m_mode->endFrame(frameId);
|
m_mode->endFrame(frameId);
|
||||||
|
m_gpuStarts[ (frameId-1) % m_gpuStarts.size() ].store(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void notifyCsRenderBegin( uint64_t frameId ) override {
|
void notifyCsRenderBegin( uint64_t frameId ) override {
|
||||||
|
@ -95,12 +95,6 @@ namespace dxvk {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void notifyGpuExecutionBegin( uint64_t frameId ) override {
|
|
||||||
assert( frameId == m_lastFinishedFrameId+1 );
|
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
|
||||||
gpuExecutionCheckGpuStart(frameId, m, high_resolution_clock::now());
|
|
||||||
}
|
|
||||||
|
|
||||||
void notifyGpuExecutionEnd( uint64_t frameId ) override {
|
void notifyGpuExecutionEnd( uint64_t frameId ) override {
|
||||||
auto now = high_resolution_clock::now();
|
auto now = high_resolution_clock::now();
|
||||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
||||||
|
@ -120,7 +114,7 @@ namespace dxvk {
|
||||||
next->gpuReady.clear();
|
next->gpuReady.clear();
|
||||||
next->gpuReady.push_back(now);
|
next->gpuReady.push_back(now);
|
||||||
|
|
||||||
gpuExecutionCheckGpuStart(frameId, m, now);
|
gpuExecutionCheckGpuStart(frameId+1, next, now);
|
||||||
|
|
||||||
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
|
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
|
||||||
m_mode->finishRender(frameId);
|
m_mode->finishRender(frameId);
|
||||||
|
@ -149,6 +143,7 @@ namespace dxvk {
|
||||||
void notifyCpuPresentBegin( uint64_t frameId) override { }
|
void notifyCpuPresentBegin( uint64_t frameId) override { }
|
||||||
void notifyCpuPresentEnd( uint64_t frameId ) override { }
|
void notifyCpuPresentEnd( uint64_t frameId ) override { }
|
||||||
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
|
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
|
||||||
|
void notifyGpuExecutionBegin( uint64_t frameId ) override { }
|
||||||
void discardTimings() override { }
|
void discardTimings() override { }
|
||||||
DxvkLatencyStats getStatistics( uint64_t frameId ) override
|
DxvkLatencyStats getStatistics( uint64_t frameId ) override
|
||||||
{ return DxvkLatencyStats(); }
|
{ return DxvkLatencyStats(); }
|
||||||
|
@ -181,7 +176,7 @@ namespace dxvk {
|
||||||
uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||||
uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||||
|
|
||||||
std::array< std::atomic< uint16_t >, 16 > m_gpuStarts = { };
|
std::array< std::atomic< uint16_t >, 8 > m_gpuStarts = { };
|
||||||
static constexpr uint16_t queueSubmitBit = 1;
|
static constexpr uint16_t queueSubmitBit = 1;
|
||||||
static constexpr uint16_t gpuReadyBit = 2;
|
static constexpr uint16_t gpuReadyBit = 2;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue