[dxvk,d3d11] Refactor CS command data allocation

Allows us to allocate a (potentially growing) array of
arbitrary data structures for a CS command.
This commit is contained in:
Philip Rebohle 2025-02-21 13:48:03 +01:00 committed by Philip Rebohle
parent 20dc389ab7
commit fc3d3ae331
4 changed files with 169 additions and 96 deletions

View file

@ -10,23 +10,13 @@ namespace dxvk {
* Used to identify the type of command * Used to identify the type of command
* data most recently added to a CS chunk. * data most recently added to a CS chunk.
*/ */
enum class D3D11CmdType { enum class D3D11CmdType : uint32_t {
None,
DrawIndirect, DrawIndirect,
DrawIndirectIndexed, DrawIndirectIndexed,
}; };
/**
* \brief Command data header
*
* Stores the command type. All command
* data structs must inherit this struct.
*/
struct D3D11CmdData {
D3D11CmdType type;
};
/** /**
* \brief Indirect draw command data * \brief Indirect draw command data
* *
@ -34,7 +24,7 @@ namespace dxvk {
* the first draw, as well as the number of * the first draw, as well as the number of
* draws to execute. * draws to execute.
*/ */
struct D3D11CmdDrawIndirectData : public D3D11CmdData { struct D3D11CmdDrawIndirectData {
uint32_t offset; uint32_t offset;
uint32_t count; uint32_t count;
uint32_t stride; uint32_t stride;

View file

@ -19,8 +19,7 @@ namespace dxvk {
m_flags (ContextFlags), m_flags (ContextFlags),
m_staging (Device, StagingBufferSize), m_staging (Device, StagingBufferSize),
m_csFlags (CsFlags), m_csFlags (CsFlags),
m_csChunk (AllocCsChunk()), m_csChunk (AllocCsChunk()) {
m_cmdData (nullptr) {
// Create local allocation cache with the same properties // Create local allocation cache with the same properties
// that we will use for common dynamic buffer types // that we will use for common dynamic buffer types
uint32_t cachedDynamic = pParent->GetOptions()->cachedDynamicResources; uint32_t cachedDynamic = pParent->GetOptions()->cachedDynamicResources;
@ -1125,28 +1124,28 @@ namespace dxvk {
if (unlikely(HasDirtyGraphicsBindings())) if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings(); ApplyDirtyGraphicsBindings();
// If possible, batch up multiple indirect draw calls of // If possible, batch multiple indirect draw calls into one single multidraw call
// the same type into one single multiDrawIndirect call if (m_csDataType == D3D11CmdType::DrawIndirectIndexed) {
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData); auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_csData->first());
auto stride = 0u; auto stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand));
if (cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed) if (stride) {
stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand)); cmdData->count += 1;
cmdData->stride = stride;
if (stride) { return;
cmdData->count += 1; }
cmdData->stride = stride;
} else {
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
});
cmdData->type = D3D11CmdType::DrawIndirectIndexed;
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
cmdData->stride = 0;
} }
// Need to start a new draw sequence
EmitCsCmd<D3D11CmdDrawIndirectData>(D3D11CmdType::DrawIndirectIndexed, 1u,
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data, size_t) {
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
});
auto cmdData = new (m_csData->first()) D3D11CmdDrawIndirectData();
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
cmdData->stride = 0;
} }
@ -1163,28 +1162,28 @@ namespace dxvk {
if (unlikely(HasDirtyGraphicsBindings())) if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings(); ApplyDirtyGraphicsBindings();
// If possible, batch up multiple indirect draw calls of // If possible, batch multiple indirect draw calls into one single multidraw call
// the same type into one single multiDrawIndirect call if (m_csDataType == D3D11CmdType::DrawIndirect) {
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData); auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_csData->first());
auto stride = 0u; auto stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand));
if (cmdData && cmdData->type == D3D11CmdType::DrawIndirect) if (stride) {
stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand)); cmdData->count += 1;
cmdData->stride = stride;
if (stride) { return;
cmdData->count += 1; }
cmdData->stride = stride;
} else {
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
ctx->drawIndirect(data->offset, data->count, data->stride, true);
});
cmdData->type = D3D11CmdType::DrawIndirect;
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
cmdData->stride = 0;
} }
// Need to start a new draw sequence
EmitCsCmd<D3D11CmdDrawIndirectData>(D3D11CmdType::DrawIndirect, 1u,
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data, size_t) {
ctx->drawIndirect(data->offset, data->count, data->stride, true);
});
auto cmdData = new (m_csData->first()) D3D11CmdDrawIndirectData();
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
cmdData->stride = 0;
} }

View file

@ -793,9 +793,11 @@ namespace dxvk {
DxvkStagingBuffer m_staging; DxvkStagingBuffer m_staging;
D3D11CmdType m_csDataType = D3D11CmdType::None;
DxvkCsChunkFlags m_csFlags; DxvkCsChunkFlags m_csFlags;
DxvkCsChunkRef m_csChunk; DxvkCsChunkRef m_csChunk;
D3D11CmdData* m_cmdData; DxvkCsDataBlock* m_csData = nullptr;
DxvkLocalAllocationCache m_allocationCache; DxvkLocalAllocationCache m_allocationCache;
@ -1152,7 +1154,10 @@ namespace dxvk {
template<bool AllowFlush = true, typename Cmd> template<bool AllowFlush = true, typename Cmd>
void EmitCs(Cmd&& command) { void EmitCs(Cmd&& command) {
m_cmdData = nullptr; if (unlikely(m_csDataType != D3D11CmdType::None)) {
m_csData = nullptr;
m_csDataType = D3D11CmdType::None;
}
if (unlikely(!m_csChunk->push(command))) { if (unlikely(!m_csChunk->push(command))) {
GetTypedContext()->EmitCsChunk(std::move(m_csChunk)); GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
@ -1165,12 +1170,12 @@ namespace dxvk {
} }
} }
template<typename M, bool AllowFlush = true, typename Cmd, typename... Args> template<typename M, bool AllowFlush = true, typename Cmd>
M* EmitCsCmd(Cmd&& command, Args&&... args) { void EmitCsCmd(D3D11CmdType type, size_t count, Cmd&& command) {
M* data = m_csChunk->pushCmd<M, Cmd, Args...>( m_csDataType = type;
command, std::forward<Args>(args)...); m_csData = m_csChunk->pushCmd<M, Cmd>(command, count);
if (unlikely(!data)) { if (unlikely(!m_csData)) {
GetTypedContext()->EmitCsChunk(std::move(m_csChunk)); GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk(); m_csChunk = AllocCsChunk();
@ -1179,19 +1184,17 @@ namespace dxvk {
// We must record this command after the potential // We must record this command after the potential
// flush since the caller may still access the data // flush since the caller may still access the data
data = m_csChunk->pushCmd<M, Cmd, Args...>( m_csData = m_csChunk->pushCmd<M, Cmd>(command, count);
command, std::forward<Args>(args)...);
} }
m_cmdData = data;
return data;
} }
void FlushCsChunk() { void FlushCsChunk() {
if (likely(!m_csChunk->empty())) { if (likely(!m_csChunk->empty())) {
m_csData = nullptr;
m_csDataType = D3D11CmdType::None;
GetTypedContext()->EmitCsChunk(std::move(m_csChunk)); GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk(); m_csChunk = AllocCsChunk();
m_cmdData = nullptr;
} }
} }

View file

@ -12,6 +12,8 @@
namespace dxvk { namespace dxvk {
constexpr static size_t DxvkCsChunkSize = 16384;
/** /**
* \brief Command stream operation * \brief Command stream operation
* *
@ -86,6 +88,41 @@ namespace dxvk {
}; };
/**
* \brief Command data block
*
* Provides functionality to allocate a potentially growing
* array of structures for a command to traverse.
*/
class DxvkCsDataBlock {
friend class DxvkCsChunk;
public:
/**
* \brief Number of structures allocated
* \returns Number of structures allocated
*/
size_t count() const {
return m_structCount;
}
/**
* \brief Retrieves pointer to first structure
* \returns Untyped pointer to first structure
*/
void* first() {
return reinterpret_cast<char*>(this) + m_dataOffset;
}
private:
uint32_t m_dataOffset = 0u;
uint16_t m_structSize = 0u;
uint16_t m_structCount = 0u;
};
/** /**
* \brief Typed command with metadata * \brief Typed command with metadata
* *
@ -98,26 +135,33 @@ namespace dxvk {
public: public:
template<typename... Args> DxvkCsDataCmd(T&& cmd)
DxvkCsDataCmd(T&& cmd, Args&&... args) : m_command(std::move(cmd)) { }
: m_command (std::move(cmd)),
m_data (std::forward<Args>(args)...) { } ~DxvkCsDataCmd() {
auto data = reinterpret_cast<M*>(m_data.first());
for (size_t i = 0; i < m_data.count(); i++)
data[i].~M();
}
DxvkCsDataCmd (DxvkCsDataCmd&&) = delete; DxvkCsDataCmd (DxvkCsDataCmd&&) = delete;
DxvkCsDataCmd& operator = (DxvkCsDataCmd&&) = delete; DxvkCsDataCmd& operator = (DxvkCsDataCmd&&) = delete;
void exec(DxvkContext* ctx) { void exec(DxvkContext* ctx) {
m_command(ctx, &m_data); // No const here so that the function can move objects efficiently
m_command(ctx, reinterpret_cast<M*>(m_data.first()), m_data.count());
} }
M* data() { DxvkCsDataBlock* data() {
return &m_data; return &m_data;
} }
private: private:
T m_command; alignas(M)
M m_data; T m_command;
DxvkCsDataBlock m_data;
}; };
@ -140,7 +184,7 @@ namespace dxvk {
* Stores a list of commands. * Stores a list of commands.
*/ */
class DxvkCsChunk : public RcObject { class DxvkCsChunk : public RcObject {
constexpr static size_t MaxBlockSize = 16384;
public: public:
DxvkCsChunk(); DxvkCsChunk();
@ -167,7 +211,7 @@ namespace dxvk {
template<typename T> template<typename T>
bool push(T& command) { bool push(T& command) {
using FuncType = DxvkCsTypedCmd<T>; using FuncType = DxvkCsTypedCmd<T>;
void* ptr = alloc<FuncType>(); void* ptr = alloc<FuncType>(0u);
if (unlikely(!ptr)) if (unlikely(!ptr))
return false; return false;
@ -186,21 +230,58 @@ namespace dxvk {
* \brief Adds a command with data to the chunk * \brief Adds a command with data to the chunk
* *
* \param [in] command The command to add * \param [in] command The command to add
* \param [in] args Constructor args for the data object * \param [in] count Number of items to allocate. Should be at least
* 1 in order to avoid the possibility of an empty command. Note
* that all allocated structures \e must be initialized before
* handing off the command to the worker thread.
* \returns Pointer to the data object, or \c nullptr * \returns Pointer to the data object, or \c nullptr
*/ */
template<typename M, typename T, typename... Args> template<typename M, typename T>
M* pushCmd(T& command, Args&&... args) { DxvkCsDataBlock* pushCmd(T& command, size_t count) {
size_t dataSize = count * sizeof(M);
// DxvkCsDataCmd is aligned to M
using FuncType = DxvkCsDataCmd<T, M>; using FuncType = DxvkCsDataCmd<T, M>;
void* ptr = alloc<FuncType>(); void* ptr = alloc<FuncType>(dataSize);
if (unlikely(!ptr)) if (unlikely(!ptr))
return nullptr; return nullptr;
auto next = new (ptr) FuncType(std::move(command), std::forward<Args>(args)...); // Command data is always packed tightly after the function object
auto next = new (ptr) FuncType(std::move(command));
append(next); append(next);
return next->data(); // Do some cursed pointer math here so that the block can figure out
// where its data is stored based on its own address. This saves a
// decent amount of CS chunk memory compared to storing a pointer.
auto block = next->data();
block->m_dataOffset = reinterpret_cast<uintptr_t>(&m_data[m_commandOffset - dataSize])
- reinterpret_cast<uintptr_t>(block);
block->m_structSize = sizeof(M);
block->m_structCount = count;
return block;
}
/**
* \brief Allocates more storage for a data block
*
* The data bock \e must be owned by the last command added to
* the CS chunk, or this may override subsequent command data.
* \param [in] block Data block
* \param [in] count Number of structures to allocate
* \returns Pointer to first allocated structure, or \c nullptr
*/
void* pushData(DxvkCsDataBlock* block, uint32_t count) {
uint32_t dataSize = block->m_structSize * count;
if (unlikely(m_commandOffset + dataSize > DxvkCsChunkSize))
return nullptr;
void* ptr = &m_data[m_commandOffset];
m_commandOffset += dataSize;
block->m_structCount += count;
return ptr;
} }
/** /**
@ -237,18 +318,18 @@ namespace dxvk {
DxvkCsChunkFlags m_flags; DxvkCsChunkFlags m_flags;
alignas(64) alignas(64)
char m_data[MaxBlockSize]; char m_data[DxvkCsChunkSize];
template<typename T> template<typename T>
void* alloc() { void* alloc(size_t extra) {
if (alignof(T) > alignof(DxvkCsCmd)) if (alignof(T) > alignof(DxvkCsCmd))
m_commandOffset = dxvk::align(m_commandOffset, alignof(T)); m_commandOffset = dxvk::align(m_commandOffset, alignof(T));
if (unlikely(m_commandOffset + sizeof(T) > MaxBlockSize)) if (unlikely(m_commandOffset + sizeof(T) + extra > DxvkCsChunkSize))
return nullptr; return nullptr;
void* result = &m_data[m_commandOffset]; void* result = &m_data[m_commandOffset];
m_commandOffset += sizeof(T); m_commandOffset += sizeof(T) + extra;
return result; return result;
} }