From 416f9c5a4abbd392dcca69b1aa1b4712b2df34ce Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sun, 2 Mar 2025 13:07:21 +0100 Subject: [PATCH] [d3d11] Embed UpdateBuffer data in CS chunk Tiny optimization that gets rid of a copy and also lets us use chunk memory more efficiently. --- src/d3d11/d3d11_context.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 9ba60418d..cad645607 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -5507,19 +5507,25 @@ namespace dxvk { if (Length <= MaxDirectUpdateSize && !((Offset | Length) & 0x3)) { // The backend has special code paths for small buffer updates, // however both offset and size must be aligned to four bytes. - std::array data; - std::memcpy(data.data(), pSrcData, Length); + // Write the data directly to the CS chunk. + uint32_t dwordCount = Length / sizeof(uint32_t); - EmitCs([ - cBufferData = data, + EmitCsCmd(D3D11CmdType::None, dwordCount, [ cBufferSlice = std::move(bufferSlice) - ] (DxvkContext* ctx) { + ] (DxvkContext* ctx, const uint32_t* data, size_t) { ctx->updateBuffer( cBufferSlice.buffer(), cBufferSlice.offset(), - cBufferSlice.length(), - cBufferData.data()); + cBufferSlice.length(), data); }); + + // Compiler should be able to vectorize here, but GCC only does + // if we cast the destination pointer to the correct type first + auto src = reinterpret_cast(pSrcData); + auto dst = reinterpret_cast(m_csData->first()); + + for (uint32_t i = 0; i < dwordCount; i++) + new (dst + i) uint32_t(src[i]); } else { // Write directly to a staging buffer and dispatch a copy DxvkBufferSlice stagingSlice = AllocStagingBuffer(Length);