From 196644c7a5bff38d1be9a8ffadebf30829613d86 Mon Sep 17 00:00:00 2001
From: Philip Rebohle <philip.rebohle@tu-dortmund.de>
Date: Sun, 19 Jan 2025 02:46:24 +0100
Subject: [PATCH] [dxvk] Add latency tracker based on NV_low_latency2

---
 src/dxvk/dxvk_device.cpp             |   4 +
 src/dxvk/dxvk_latency_builtin_nv.cpp | 213 +++++++++++++++++++++++++++
 src/dxvk/dxvk_latency_builtin_nv.h   | 114 ++++++++++++++
 src/dxvk/meson.build                 |   1 +
 4 files changed, 332 insertions(+)
 create mode 100644 src/dxvk/dxvk_latency_builtin_nv.cpp
 create mode 100644 src/dxvk/dxvk_latency_builtin_nv.h

diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp
index e0df92ee7..2efa8ab1f 100644
--- a/src/dxvk/dxvk_device.cpp
+++ b/src/dxvk/dxvk_device.cpp
@@ -1,6 +1,7 @@
 #include "dxvk_device.h"
 #include "dxvk_instance.h"
 #include "dxvk_latency_builtin.h"
+#include "dxvk_latency_builtin_nv.h"
 
 namespace dxvk {
   
@@ -311,6 +312,9 @@ namespace dxvk {
     if (m_options.latencySleep != Tristate::True)
       return nullptr;
 
+    if (m_features.nvLowLatency2)
+      return new DxvkBuiltInLatencyTrackerNv(presenter);
+
     return new DxvkBuiltInLatencyTracker(
       m_options.latencyTolerance);
   }
diff --git a/src/dxvk/dxvk_latency_builtin_nv.cpp b/src/dxvk/dxvk_latency_builtin_nv.cpp
new file mode 100644
index 000000000..06d322a3b
--- /dev/null
+++ b/src/dxvk/dxvk_latency_builtin_nv.cpp
@@ -0,0 +1,213 @@
+#include "dxvk_latency_builtin_nv.h"
+
+namespace dxvk {
+
+  DxvkBuiltInLatencyTrackerNv::DxvkBuiltInLatencyTrackerNv(
+    const Rc<Presenter>&            presenter)
+  : m_presenter(presenter) {
+    Logger::info("Latency control enabled, using VK_NV_low_latency2");
+    auto limit = FpsLimiter::getEnvironmentOverride();
+
+    if (limit)
+      m_envFpsLimit = *limit;
+  }
+
+
+  DxvkBuiltInLatencyTrackerNv::~DxvkBuiltInLatencyTrackerNv() {
+    VkLatencySleepModeInfoNV latencyMode = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV };
+    latencyMode.lowLatencyMode = VK_FALSE;
+    latencyMode.lowLatencyBoost = VK_FALSE;
+    latencyMode.minimumIntervalUs = 0;
+
+    m_presenter->setLatencySleepModeNv(latencyMode);
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyCpuPresentBegin(
+          uint64_t                  frameId) {
+    // Not interesting here
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyCpuPresentEnd(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+    auto frame = getFrame(frameId);
+
+    if (frame)
+      frame->presentPending = VK_TRUE;
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyCsRenderBegin(
+          uint64_t                  frameId) {
+    m_presenter->setLatencyMarkerNv(frameId,
+      VK_LATENCY_MARKER_SIMULATION_END_NV);
+    m_presenter->setLatencyMarkerNv(frameId,
+      VK_LATENCY_MARKER_RENDERSUBMIT_START_NV);
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyCsRenderEnd(
+          uint64_t                  frameId) {
+    m_presenter->setLatencyMarkerNv(frameId,
+      VK_LATENCY_MARKER_RENDERSUBMIT_END_NV);
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyQueueSubmit(
+          uint64_t                  frameId) {
+    // Handled by driver
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyQueuePresentBegin(
+          uint64_t                  frameId) {
+    m_presenter->setLatencyMarkerNv(frameId,
+      VK_LATENCY_MARKER_PRESENT_START_NV);
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyQueuePresentEnd(
+          uint64_t                  frameId,
+          VkResult                  status) {
+    m_presenter->setLatencyMarkerNv(frameId,
+      VK_LATENCY_MARKER_PRESENT_END_NV);
+
+    std::unique_lock lock(m_mutex);
+    auto frame = getFrame(frameId);
+
+    if (frame)
+      frame->presentResult = status;
+
+    m_cond.notify_one();
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyGpuExecutionBegin(
+          uint64_t                  frameId) {
+    // Handled by driver
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyGpuExecutionEnd(
+          uint64_t                  frameId) {
+    // Handled by driver
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::notifyGpuPresentEnd(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+    auto frame = getFrame(frameId);
+
+    if (frame)
+      frame->frameEnd = dxvk::high_resolution_clock::now();
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::sleepAndBeginFrame(
+          uint64_t                  frameId,
+          double                    maxFrameRate) {
+    bool presentSuccessful = false;
+
+    duration sleepDuration(0u);
+
+    { std::unique_lock lock(m_mutex);
+
+      // Don't try to sleep if we haven't set up
+      // low latency mode for the swapchain yet
+      if (m_lowLatencyEnabled) {
+        auto curr = getFrame(frameId - 1u);
+
+        if (curr && curr->presentPending) {
+          m_cond.wait(lock, [curr] {
+            return curr->presentResult != VK_NOT_READY;
+          });
+
+          presentSuccessful = curr->presentResult >= 0;
+        }
+      }
+    }
+
+    if (presentSuccessful) {
+      auto t0 = dxvk::high_resolution_clock::now();
+      m_presenter->latencySleepNv();
+
+      sleepDuration += dxvk::high_resolution_clock::now() - t0;
+    }
+
+    { std::unique_lock lock(m_mutex);
+      // Set up low latency mode for subsequent frames
+      VkLatencySleepModeInfoNV latencyMode = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV };
+      latencyMode.lowLatencyMode = VK_TRUE;
+      latencyMode.lowLatencyBoost = VK_TRUE;
+      latencyMode.minimumIntervalUs = 0;
+
+      if (m_envFpsLimit > 0.0)
+        maxFrameRate = m_envFpsLimit;
+
+      if (maxFrameRate > 0.0)
+        latencyMode.minimumIntervalUs = uint64_t(1'000'000.0 / maxFrameRate);
+
+      m_presenter->setLatencySleepModeNv(latencyMode);
+      m_presenter->setLatencyMarkerNv(frameId,
+        VK_LATENCY_MARKER_INPUT_SAMPLE_NV);
+      m_presenter->setLatencyMarkerNv(frameId,
+        VK_LATENCY_MARKER_SIMULATION_START_NV);
+
+      auto next = initFrame(frameId);
+      next->frameStart = dxvk::high_resolution_clock::now();
+      next->sleepDuration = sleepDuration;
+
+      m_lowLatencyEnabled = true;
+    }
+  }
+
+
+  void DxvkBuiltInLatencyTrackerNv::discardTimings() {
+    std::unique_lock lock(m_mutex);
+    m_lastDiscard = m_lastFrameId;
+  }
+
+
+  DxvkLatencyStats DxvkBuiltInLatencyTrackerNv::getStatistics(
+          uint64_t                  frameId) {
+    std::unique_lock lock(m_mutex);
+
+    auto frame = getFrame(frameId);
+
+    while (frame && frame->frameEnd == time_point())
+      frame = getFrame(--frameId);
+
+    if (!frame)
+      return DxvkLatencyStats();
+
+    DxvkLatencyStats stats = { };
+    stats.frameLatency = std::chrono::duration_cast<std::chrono::microseconds>(frame->frameEnd - frame->frameStart);
+    stats.sleepDuration = std::chrono::duration_cast<std::chrono::microseconds>(frame->sleepDuration);
+    return stats;
+  }
+
+
+  DxvkLatencyFrameDataNv* DxvkBuiltInLatencyTrackerNv::initFrame(uint64_t frameId) {
+    auto& frame = m_frames[frameId % FrameCount];
+
+    frame = DxvkLatencyFrameDataNv();
+    frame.frameId = frameId;
+
+    m_lastFrameId = frameId;
+    return &m_frames[frameId % FrameCount];
+  }
+
+
+  DxvkLatencyFrameDataNv* DxvkBuiltInLatencyTrackerNv::getFrame(uint64_t frameId) {
+    auto& frame = m_frames[frameId % FrameCount];
+
+    if (frameId <= m_lastDiscard || frame.frameId != frameId)
+      return nullptr;
+
+    return &frame;
+  }
+
+}
diff --git a/src/dxvk/dxvk_latency_builtin_nv.h b/src/dxvk/dxvk_latency_builtin_nv.h
new file mode 100644
index 000000000..5f107fac0
--- /dev/null
+++ b/src/dxvk/dxvk_latency_builtin_nv.h
@@ -0,0 +1,114 @@
+#pragma once
+
+#include <array>
+
+#include "dxvk_latency.h"
+#include "dxvk_presenter.h"
+
+#include "../util/thread.h"
+
+#include "../util/util_sleep.h"
+#include "../util/util_time.h"
+
+#include "../util/config/config.h"
+
+#include "../util/sync/sync_spinlock.h"
+
+namespace dxvk {
+
+  /**
+   * \brief Internal timers for LL2 timing
+   */
+  struct DxvkLatencyFrameDataNv {
+    using time_point = dxvk::high_resolution_clock::time_point;
+    using duration = dxvk::high_resolution_clock::duration;
+
+    uint64_t    frameId         = 0u;
+    time_point  frameStart      = time_point();
+    time_point  frameEnd        = time_point();
+    duration    sleepDuration   = duration(0u);
+    VkResult    presentResult   = VK_NOT_READY;
+    VkBool32    presentPending  = VK_FALSE;
+  };
+
+
+  /**
+   * \brief Built-in latency tracker based on VK_NV_low_latency2
+   *
+   * Implements a simple latency reduction algorithm
+   * based on CPU timestamps received from the backend.
+   */
+  class DxvkBuiltInLatencyTrackerNv : public DxvkLatencyTracker {
+    using time_point = typename DxvkLatencyFrameDataNv::time_point;
+    using duration = typename DxvkLatencyFrameDataNv::duration;
+
+    constexpr static size_t FrameCount = 8u;
+  public:
+
+    DxvkBuiltInLatencyTrackerNv(
+      const Rc<Presenter>&            presenter);
+
+    ~DxvkBuiltInLatencyTrackerNv();
+
+    void notifyCpuPresentBegin(
+            uint64_t                  frameId);
+
+    void notifyCpuPresentEnd(
+            uint64_t                  frameId);
+
+    void notifyCsRenderBegin(
+            uint64_t                  frameId);
+
+    void notifyCsRenderEnd(
+            uint64_t                  frameId);
+
+    void notifyQueueSubmit(
+            uint64_t                  frameId);
+
+    void notifyQueuePresentBegin(
+            uint64_t                  frameId);
+
+    void notifyQueuePresentEnd(
+            uint64_t                  frameId,
+            VkResult                  status);
+
+    void notifyGpuExecutionBegin(
+            uint64_t                  frameId);
+
+    void notifyGpuExecutionEnd(
+            uint64_t                  frameId);
+
+    void notifyGpuPresentEnd(
+            uint64_t                  frameId);
+
+    void sleepAndBeginFrame(
+            uint64_t                  frameId,
+            double                    maxFrameRate);
+
+    void discardTimings();
+
+    DxvkLatencyStats getStatistics(
+            uint64_t                  frameId);
+
+  private:
+
+    Rc<Presenter>             m_presenter;
+    double                    m_envFpsLimit = 0.0;
+
+    dxvk::mutex               m_mutex;
+    dxvk::condition_variable  m_cond;
+
+    uint64_t                  m_lastFrameId = 0u;
+    uint64_t                  m_lastDiscard = 0u;
+
+    bool                      m_lowLatencyEnabled = false;
+
+    std::array<DxvkLatencyFrameDataNv, FrameCount> m_frames = { };
+
+    DxvkLatencyFrameDataNv* initFrame(uint64_t frameId);
+
+    DxvkLatencyFrameDataNv* getFrame(uint64_t frameId);
+
+  };
+
+}
diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build
index 789b911c8..4880040bc 100644
--- a/src/dxvk/meson.build
+++ b/src/dxvk/meson.build
@@ -91,6 +91,7 @@ dxvk_src = [
   'dxvk_image.cpp',
   'dxvk_instance.cpp',
   'dxvk_latency_builtin.cpp',
+  'dxvk_latency_builtin_nv.cpp',
   'dxvk_memory.cpp',
   'dxvk_meta_blit.cpp',
   'dxvk_meta_clear.cpp',