From d3b6502a17efd1b06e0811eda22a9f5c772f30ca Mon Sep 17 00:00:00 2001
From: Philip Rebohle <philip.rebohle@tu-dortmund.de>
Date: Mon, 22 Aug 2022 20:15:46 +0200
Subject: [PATCH] [dxvk] Introduce DxvkSparseBindSubmission

---
 src/dxvk/dxvk_sparse.cpp | 328 +++++++++++++++++++++++++++++++++++++++
 src/dxvk/dxvk_sparse.h   | 264 ++++++++++++++++++++++++++++++-
 2 files changed, 591 insertions(+), 1 deletion(-)

diff --git a/src/dxvk/dxvk_sparse.cpp b/src/dxvk/dxvk_sparse.cpp
index e521a49f1..c022b8063 100644
--- a/src/dxvk/dxvk_sparse.cpp
+++ b/src/dxvk/dxvk_sparse.cpp
@@ -1,3 +1,5 @@
+#include <sstream>
+
 #include "dxvk_buffer.h"
 #include "dxvk_device.h"
 #include "dxvk_image.h"
@@ -396,4 +398,330 @@ namespace dxvk {
     }
   }
 
+
+  DxvkSparseBindSubmission::DxvkSparseBindSubmission() {
+
+  }
+
+
+  DxvkSparseBindSubmission::~DxvkSparseBindSubmission() {
+
+  }
+
+
+  void DxvkSparseBindSubmission::waitSemaphore(
+          VkSemaphore             semaphore,
+          uint64_t                value) {
+    m_waitSemaphores.push_back(semaphore);
+    m_waitSemaphoreValues.push_back(value);
+  }
+
+
+  void DxvkSparseBindSubmission::signalSemaphore(
+          VkSemaphore             semaphore,
+          uint64_t                value) {
+    m_signalSemaphores.push_back(semaphore);
+    m_signalSemaphoreValues.push_back(value);
+  }
+
+
+  void DxvkSparseBindSubmission::bindBufferMemory(
+    const DxvkSparseBufferBindKey& key,
+    const DxvkSparsePageHandle&   memory) {
+    m_bufferBinds.insert_or_assign(key, memory);
+  }
+
+
+  void DxvkSparseBindSubmission::bindImageMemory(
+    const DxvkSparseImageBindKey& key,
+    const DxvkSparsePageHandle&   memory) {
+    m_imageBinds.insert_or_assign(key, memory);
+  }
+
+
+  void DxvkSparseBindSubmission::bindImageOpaqueMemory(
+    const DxvkSparseImageOpaqueBindKey& key,
+    const DxvkSparsePageHandle&   memory) {
+    m_imageOpaqueBinds.insert_or_assign(key, memory);
+  }
+
+
+  VkResult DxvkSparseBindSubmission::submit(
+          DxvkDevice*             device,
+          VkQueue                 queue) {
+    auto vk = device->vkd();
+
+    DxvkSparseBufferBindArrays buffer;
+    this->processBufferBinds(buffer);
+
+    DxvkSparseImageBindArrays image;
+    this->processImageBinds(image);
+
+    DxvkSparseImageOpaqueBindArrays opaque;
+    this->processOpaqueBinds(opaque);
+
+    // The sparse binding API has never been updated to take the new
+    // semaphore submission info structs, so we have to do this instead
+    VkTimelineSemaphoreSubmitInfo timelineInfo = { VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO };
+    timelineInfo.waitSemaphoreValueCount = m_waitSemaphoreValues.size();
+    timelineInfo.pWaitSemaphoreValues = m_waitSemaphoreValues.data();
+    timelineInfo.signalSemaphoreValueCount = m_signalSemaphoreValues.size();
+    timelineInfo.pSignalSemaphoreValues = m_signalSemaphoreValues.data();
+
+    VkBindSparseInfo bindInfo = { VK_STRUCTURE_TYPE_BIND_SPARSE_INFO };
+
+    if (!m_waitSemaphores.empty()) {
+      bindInfo.pNext = &timelineInfo;
+      bindInfo.waitSemaphoreCount = m_waitSemaphores.size();
+      bindInfo.pWaitSemaphores = m_waitSemaphores.data();
+    }
+
+    if (!buffer.infos.empty()) {
+      bindInfo.bufferBindCount = buffer.infos.size();
+      bindInfo.pBufferBinds = buffer.infos.data();
+    }
+
+    if (!opaque.infos.empty()) {
+      bindInfo.imageOpaqueBindCount = opaque.infos.size();
+      bindInfo.pImageOpaqueBinds = opaque.infos.data();
+    }
+
+    if (!image.infos.empty()) {
+      bindInfo.imageBindCount = image.infos.size();
+      bindInfo.pImageBinds = image.infos.data();
+    }
+
+    if (!m_signalSemaphores.empty()) {
+      bindInfo.pNext = &timelineInfo;
+      bindInfo.signalSemaphoreCount = m_signalSemaphores.size();
+      bindInfo.pSignalSemaphores = m_signalSemaphores.data();
+    }
+
+    VkResult vr = vk->vkQueueBindSparse(queue, 1, &bindInfo, VK_NULL_HANDLE);
+
+    if (vr) {
+      Logger::err(str::format("Sparse binding failed: ", vr));
+      this->logSparseBindingInfo(LogLevel::Error, &bindInfo);
+    }
+
+    this->reset();
+    return vr;
+  }
+
+
+  void DxvkSparseBindSubmission::reset() {
+    m_waitSemaphoreValues.clear();
+    m_waitSemaphores.clear();
+    m_signalSemaphoreValues.clear();
+    m_signalSemaphores.clear();
+
+    m_bufferBinds.clear();
+    m_imageBinds.clear();
+    m_imageOpaqueBinds.clear();
+  }
+
+
+  bool DxvkSparseBindSubmission::tryMergeMemoryBind(
+          VkSparseMemoryBind&               oldBind,
+    const VkSparseMemoryBind&               newBind) {
+    if (newBind.memory != oldBind.memory || newBind.flags != oldBind.flags)
+      return false;
+
+    // The resource region must be consistent
+    if (newBind.resourceOffset != oldBind.resourceOffset + oldBind.size)
+      return false;
+
+    // If memory is not null, the memory range must also be consistent
+    if (newBind.memory && newBind.memoryOffset != oldBind.memoryOffset + oldBind.size)
+      return false;
+
+    oldBind.size += newBind.size;
+    return true;
+  }
+
+
+  void DxvkSparseBindSubmission::processBufferBinds(
+          DxvkSparseBufferBindArrays&       buffer) {
+    std::vector<std::pair<VkBuffer, VkSparseMemoryBind>> ranges;
+    ranges.reserve(m_bufferBinds.size());
+
+    for (const auto& e : m_bufferBinds) {
+      const auto& key = e.first;
+      const auto& handle = e.second;
+
+      VkSparseMemoryBind bind = { };
+      bind.resourceOffset = key.offset;
+      bind.size           = key.size;
+      bind.memory         = handle.memory;
+      bind.memoryOffset   = handle.offset;
+
+      bool merged = false;
+
+      if (!ranges.empty() && ranges.back().first == key.buffer)
+        merged = tryMergeMemoryBind(ranges.back().second, bind);
+
+      if (!merged)
+        ranges.push_back({ key.buffer, bind });
+    }
+
+    populateOutputArrays(buffer.binds, buffer.infos, ranges);
+  }
+
+
+  void DxvkSparseBindSubmission::processImageBinds(
+          DxvkSparseImageBindArrays&        image) {
+    std::vector<std::pair<VkImage, VkSparseImageMemoryBind>> ranges;
+    ranges.reserve(m_imageBinds.size());
+
+    for (const auto& e : m_imageBinds) {
+      const auto& key = e.first;
+      const auto& handle = e.second;
+
+      VkSparseImageMemoryBind bind = { };
+      bind.subresource    = key.subresource;
+      bind.offset         = key.offset;
+      bind.extent         = key.extent;
+      bind.memory         = handle.memory;
+      bind.memoryOffset   = handle.offset;
+
+      ranges.push_back({ key.image, bind });
+    }
+
+    populateOutputArrays(image.binds, image.infos, ranges);
+  }
+
+
+  void DxvkSparseBindSubmission::processOpaqueBinds(
+          DxvkSparseImageOpaqueBindArrays&  opaque) {
+    std::vector<std::pair<VkImage, VkSparseMemoryBind>> ranges;
+    ranges.reserve(m_imageOpaqueBinds.size());
+
+    for (const auto& e : m_imageOpaqueBinds) {
+      const auto& key = e.first;
+      const auto& handle = e.second;
+
+      VkSparseMemoryBind bind = { };
+      bind.resourceOffset = key.offset;
+      bind.size           = key.size;
+      bind.memory         = handle.memory;
+      bind.memoryOffset   = handle.offset;
+      bind.flags          = key.flags;
+
+      bool merged = false;
+
+      if (!ranges.empty() && ranges.back().first == key.image)
+        merged = tryMergeMemoryBind(ranges.back().second, bind);
+
+      if (!merged)
+        ranges.push_back({ key.image, bind });
+    }
+
+    populateOutputArrays(opaque.binds, opaque.infos, ranges);
+  }
+
+
+  template<typename HandleType, typename BindType, typename InfoType>
+  void DxvkSparseBindSubmission::populateOutputArrays(
+          std::vector<BindType>&            binds,
+          std::vector<InfoType>&            infos,
+    const std::vector<std::pair<HandleType, BindType>>& input) {
+    HandleType handle = VK_NULL_HANDLE;
+
+    // Resize bind array so that pointers remain
+    // valid as we iterate over the input array
+    binds.resize(input.size());
+
+    for (size_t i = 0; i < input.size(); i++) {
+      binds[i] = input[i].second;
+
+      if (handle != input[i].first) {
+        // Create new info entry if the handle
+        // differs from that of the previous entry
+        handle = input[i].first;
+        infos.push_back({ handle, 1u, &binds[i] });
+      } else {
+        // Otherwise just increment the bind count
+        infos.back().bindCount += 1;
+      }
+    }
+  }
+
+
+  void DxvkSparseBindSubmission::logSparseBindingInfo(
+          LogLevel                          level,
+    const VkBindSparseInfo*                 info) {
+    std::stringstream str;
+    str << "VkBindSparseInfo:" << std::endl;
+
+    auto timelineInfo = static_cast<const VkTimelineSemaphoreSubmitInfo*>(info->pNext);
+
+    if (info->waitSemaphoreCount) {
+      str << "  Wait semaphores (" << std::dec << info->waitSemaphoreCount << "):" << std::endl;
+
+      for (uint32_t i = 0; i < info->waitSemaphoreCount; i++)
+        str << "    " << info->pWaitSemaphores[i] << " (" << timelineInfo->pWaitSemaphoreValues[i] << ")" << std::endl;
+    }
+
+    if (info->bufferBindCount) {
+      str << "  Buffer binds (" << std::dec << info->bufferBindCount << "):" << std::endl;
+
+      for (uint32_t i = 0; i < info->bufferBindCount; i++) {
+        const auto* bindInfo = &info->pBufferBinds[i];
+        str << "    VkBuffer " << bindInfo->buffer << " (" << bindInfo->bindCount << "):" << std::endl;
+
+        for (uint32_t j = 0; j < bindInfo->bindCount; j++) {
+          const auto* bind = &bindInfo->pBinds[j];
+          str << "        " << bind->resourceOffset << " -> " << bind->memory
+              << " (" << bind->memoryOffset << "," << bind->size << ")" << std::endl;
+        }
+      }
+    }
+
+    if (info->imageOpaqueBindCount) {
+      str << "  Opaque image binds (" << std::dec << info->imageOpaqueBindCount << "):" << std::endl;
+
+      for (uint32_t i = 0; i < info->imageOpaqueBindCount; i++) {
+        const auto* bindInfo = &info->pImageOpaqueBinds[i];
+        str << "    VkImage " << bindInfo->image << " (" << bindInfo->bindCount << "):" << std::endl;
+
+        for (uint32_t j = 0; j < bindInfo->bindCount; j++) {
+          const auto* bind = &bindInfo->pBinds[j];
+          str << "        " << bind->resourceOffset << " -> " << bind->memory
+              << " (" << bind->memoryOffset << "," << bind->size << ")" << std::endl;
+        }
+      }
+    }
+
+    if (info->imageBindCount) {
+      str << "  Opaque image binds (" << std::dec << info->imageOpaqueBindCount << "):" << std::endl;
+
+      for (uint32_t i = 0; i < info->imageBindCount; i++) {
+        const auto* bindInfo = &info->pImageBinds[i];
+        str << "    VkImage " << bindInfo->image << " (" << bindInfo->bindCount << "):" << std::endl;
+
+        for (uint32_t j = 0; j < bindInfo->bindCount; j++) {
+          const auto* bind = &bindInfo->pBinds[j];
+
+          str << "        Aspect 0x" << std::hex << bind->subresource.aspectMask
+              << ", Mip " << std::dec << bind->subresource.mipLevel
+              << ", Layer " << bind->subresource.arrayLayer
+              << ":" << std::endl;
+
+          str << "        " << bind->offset.x << "," << bind->offset.y << "," << bind->offset.z << ":"
+              << bind->extent.width << "x" << bind->extent.height << "x" << bind->extent.depth
+              << " -> " << bind->memory << " (" << bind->memoryOffset << ")" << std::endl;
+        }
+      }
+    }
+
+    if (info->signalSemaphoreCount) {
+      str << "  Signal semaphores (" << std::dec << info->signalSemaphoreCount << "):" << std::endl;
+
+      for (uint32_t i = 0; i < info->signalSemaphoreCount; i++)
+        str << "    " << info->pSignalSemaphores[i] << " (" << timelineInfo->pSignalSemaphoreValues[i] << ")" << std::endl;
+    }
+
+    Logger::log(level, str.str());
+  }
+
 }
diff --git a/src/dxvk/dxvk_sparse.h b/src/dxvk/dxvk_sparse.h
index f68aeb06a..158a2b5a7 100644
--- a/src/dxvk/dxvk_sparse.h
+++ b/src/dxvk/dxvk_sparse.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <map>
+
 #include "dxvk_memory.h"
 #include "dxvk_resource.h"
 
@@ -492,4 +494,264 @@ namespace dxvk {
 
   };
 
-}
\ No newline at end of file
+
+  /**
+   * \brief Key for sparse buffer binding entry
+   *
+   * Provides a strong ordering by resource, resource offset,
+   * and finally, size. The ordering can be used to easily
+   * merge adjacent ranges.
+   */
+  struct DxvkSparseBufferBindKey {
+    VkBuffer                    buffer;
+    VkDeviceSize                offset;
+    VkDeviceSize                size;
+
+    bool operator < (const DxvkSparseBufferBindKey& other) const {
+      if (buffer < other.buffer) return true;
+      if (buffer > other.buffer) return false;
+
+      if (offset < other.offset) return true;
+      if (offset > other.offset) return false;
+
+      return size < other.size;
+    }
+  };
+
+
+  /**
+   * \brief Key for sparse image binding entry
+   *
+   * Provides a strong ordering by resource, subresource,
+   * offset (z -> y -> x), and finally, extent (d -> h -> w).
+   * The ordering can be used to easily merge adjacent regions.
+   */
+  struct DxvkSparseImageBindKey {
+    VkImage                     image;
+    VkImageSubresource          subresource;
+    VkOffset3D                  offset;
+    VkExtent3D                  extent;
+
+    bool operator < (const DxvkSparseImageBindKey& other) const {
+      if (image < other.image) return true;
+      if (image > other.image) return false;
+
+      uint64_t aSubresource = this->encodeSubresource();
+      uint64_t bSubresource = other.encodeSubresource();
+
+      if (aSubresource < bSubresource) return true;
+      if (aSubresource > bSubresource) return false;
+
+      uint64_t aOffset = this->encodeOffset();
+      uint64_t bOffset = other.encodeOffset();
+
+      if (aOffset < bOffset) return true;
+      if (aOffset > bOffset) return false;
+
+      uint64_t aExtent = this->encodeExtent();
+      uint64_t bExtent = other.encodeExtent();
+
+      return aExtent < bExtent;
+    }
+
+    uint64_t encodeSubresource() const {
+      return uint64_t(subresource.aspectMask) << 48
+           | uint64_t(subresource.arrayLayer) << 24
+           | uint64_t(subresource.mipLevel);
+    }
+
+    uint64_t encodeOffset() const {
+      return uint64_t(offset.z) << 48
+           | uint64_t(offset.y) << 24
+           | uint64_t(offset.x);
+    }
+
+    uint64_t encodeExtent() const {
+      return uint64_t(extent.depth) << 48
+           | uint64_t(extent.height) << 24
+           | uint64_t(extent.width);
+    }
+  };
+
+
+  /**
+   * \brief Key for sparse opaque image binding entry
+   *
+   * Provides a strong ordering by resource, resource offset,
+   * and finally, size. The ordering can be used to easily
+   * merge adjacent ranges.
+   */
+  struct DxvkSparseImageOpaqueBindKey {
+    VkImage                     image;
+    VkDeviceSize                offset;
+    VkDeviceSize                size;
+    VkSparseMemoryBindFlags     flags;
+
+    bool operator < (const DxvkSparseImageOpaqueBindKey& other) const {
+      if (image < other.image) return true;
+      if (image > other.image) return false;
+
+      if (offset < other.offset) return true;
+      if (offset > other.offset) return false;
+
+      return size < other.size;
+    }
+  };
+
+
+  /**
+   * \brief Arrays required for buffer binds
+   */
+  struct DxvkSparseBufferBindArrays {
+    std::vector<VkSparseMemoryBind> binds;
+    std::vector<VkSparseBufferMemoryBindInfo> infos;
+  };
+
+
+  /**
+   * \brief Arrays required for image binds
+   */
+  struct DxvkSparseImageBindArrays {
+    std::vector<VkSparseImageMemoryBind> binds;
+    std::vector<VkSparseImageMemoryBindInfo> infos;
+  };
+
+
+  /**
+   * \brief Arrays required for opaque image binds
+   */
+  struct DxvkSparseImageOpaqueBindArrays {
+    std::vector<VkSparseMemoryBind> binds;
+    std::vector<VkSparseImageOpaqueMemoryBindInfo> infos;
+  };
+
+
+  /**
+   * \brief Sparse bind submission
+   *
+   * Stores information for a single sparse binding operation,
+   * and supports submitting that operation to a device queue.
+   *
+   * All methods to add bindings assume that the binding range is
+   * either identical to an existing range, in which case the old
+   * binding will be overwritten, or otherwise, that the range is
+   * disjoint from all existing ranges. Overlapping ranges are not
+   * supported. This condition is trivial to maintain when binding
+   * only one sparse page at a time.
+   */
+  class DxvkSparseBindSubmission {
+
+  public:
+
+    DxvkSparseBindSubmission();
+
+    ~DxvkSparseBindSubmission();
+
+    /**
+     * \brief Waits for a semaphore
+     *
+     * \param [in] semaphore Semaphore to wait for
+     * \param [in] value Semaphore value to wait on
+     */
+    void waitSemaphore(
+            VkSemaphore             semaphore,
+            uint64_t                value);
+
+    /**
+     * \brief Signals a semaphore
+     *
+     * \param [in] semaphore Semaphore to signal
+     * \param [in] value Calue to signal semaphore to
+     */
+    void signalSemaphore(
+            VkSemaphore             semaphore,
+            uint64_t                value);
+
+    /**
+     * \brief Adds a buffer memory bind
+     *
+     * \param [in] key Buffer range key
+     * \param [in] memory Page handle
+     */
+    void bindBufferMemory(
+      const DxvkSparseBufferBindKey& key,
+      const DxvkSparsePageHandle&   memory);
+
+    /**
+     * \brief Adds an image memory bind
+     *
+     * \param [in] key Image region key
+     * \param [in] memory Page handle
+     */
+    void bindImageMemory(
+      const DxvkSparseImageBindKey& key,
+      const DxvkSparsePageHandle&   memory);
+
+    /**
+     * \brief Adds an opaque image memory bind
+     *
+     * \param [in] key Opaque region key
+     * \param [in] memory Page handle
+     */
+    void bindImageOpaqueMemory(
+      const DxvkSparseImageOpaqueBindKey& key,
+      const DxvkSparsePageHandle&   memory);
+
+    /**
+     * \brief Submits sparse binding operation
+     *
+     * Generates structures required for the sparse bind, resolving
+     * any conflicts in the process and merging ranges where possible.
+     * Note that this operation is slow. Resets object after the call.
+     * \param [in] device DXVK device
+     * \param [in] queue Queue to perform the operation on
+     * \returns Return value of the sparse bind operation
+     */
+    VkResult submit(
+            DxvkDevice*             device,
+            VkQueue                 queue);
+
+    /**
+     * \brief Resets object
+     *
+     * Clears all internal structures.
+     */
+    void reset();
+
+  private:
+
+    std::vector<uint64_t>     m_waitSemaphoreValues;
+    std::vector<VkSemaphore>  m_waitSemaphores;
+    std::vector<uint64_t>     m_signalSemaphoreValues;
+    std::vector<VkSemaphore>  m_signalSemaphores;
+
+    std::map<DxvkSparseBufferBindKey,      DxvkSparsePageHandle> m_bufferBinds;
+    std::map<DxvkSparseImageBindKey,       DxvkSparsePageHandle> m_imageBinds;
+    std::map<DxvkSparseImageOpaqueBindKey, DxvkSparsePageHandle> m_imageOpaqueBinds;
+
+    static bool tryMergeMemoryBind(
+            VkSparseMemoryBind&               oldBind,
+      const VkSparseMemoryBind&               newBind);
+
+    void processBufferBinds(
+            DxvkSparseBufferBindArrays&       buffer);
+
+    void processImageBinds(
+            DxvkSparseImageBindArrays&        image);
+
+    void processOpaqueBinds(
+            DxvkSparseImageOpaqueBindArrays&  opaque);
+
+    template<typename HandleType, typename BindType, typename InfoType>
+    void populateOutputArrays(
+            std::vector<BindType>&            binds,
+            std::vector<InfoType>&            infos,
+      const std::vector<std::pair<HandleType, BindType>>& input);
+
+    void logSparseBindingInfo(
+            LogLevel                          level,
+      const VkBindSparseInfo*                 info);
+
+  };
+
+}