1
0
Fork 0
mirror of https://github.com/melonDS-emu/melonDS.git synced 2025-03-06 21:00:31 +01:00
This commit is contained in:
FireNX70 2025-02-21 11:53:02 +01:00 committed by GitHub
commit 81ccd8c17f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 56 additions and 13 deletions

View file

@ -19,6 +19,9 @@
#include "GPU3D_Compute.h" #include "GPU3D_Compute.h"
#include <assert.h> #include <assert.h>
#include <algorithm>
#include "Utils.h"
#include "OpenGLSupport.h" #include "OpenGLSupport.h"
@ -50,6 +53,14 @@ bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, c
shaderSource += std::to_string(ScreenHeight); shaderSource += std::to_string(ScreenHeight);
shaderSource += "\n#define MaxWorkTiles "; shaderSource += "\n#define MaxWorkTiles ";
shaderSource += std::to_string(MaxWorkTiles); shaderSource += std::to_string(MaxWorkTiles);
shaderSource += "\n#define TileSize ";
shaderSource += std::to_string(TileSize);
shaderSource += "\nconst int CoarseTileCountY = ";
shaderSource += std::to_string(CoarseTileCountY) + ";";
shaderSource += "\n#define CoarseTileArea ";
shaderSource += std::to_string(CoarseTileArea);
shaderSource += "\n#define ClearCoarseBinMaskLocalSize ";
shaderSource += std::to_string(ClearCoarseBinMaskLocalSize);
shaderSource += ComputeRendererShaders::Common; shaderSource += ComputeRendererShaders::Common;
shaderSource += source; shaderSource += source;
@ -297,6 +308,8 @@ void ComputeRenderer::Reset(GPU& gpu)
void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates) void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates)
{ {
u8 TileScale;
CurGLCompositor.SetScaleFactor(scale); CurGLCompositor.SetScaleFactor(scale);
if (ScaleFactor != -1) if (ScaleFactor != -1)
@ -310,6 +323,22 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate
ScreenWidth = 256 * ScaleFactor; ScreenWidth = 256 * ScaleFactor;
ScreenHeight = 192 * ScaleFactor; ScreenHeight = 192 * ScaleFactor;
//Starting at 4.5x we want to double TileSize every time scale doubles
TileScale = 2 * ScaleFactor / 9;
TileScale = GetMSBit(TileScale);
TileScale <<= 1;
TileScale += TileScale == 0;
std::printf("Scale: %d\n", ScaleFactor);
std::printf("TileScale: %d\n", TileScale);
TileSize = std::min(8 * TileScale, 32);
CoarseTileCountY = TileSize < 32 ? 4 : 6;
ClearCoarseBinMaskLocalSize = TileSize < 32 ? 64 : 48;
CoarseTileArea = CoarseTileCountX * CoarseTileCountY;
CoarseTileW = CoarseTileCountX * TileSize;
CoarseTileH = CoarseTileCountY * TileSize;
TilesPerLine = ScreenWidth/TileSize; TilesPerLine = ScreenWidth/TileSize;
TileLines = ScreenHeight/TileSize; TileLines = ScreenHeight/TileSize;
@ -918,7 +947,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu)
glBindBufferBase(GL_UNIFORM_BUFFER, 0, MetaUniformMemory); glBindBufferBase(GL_UNIFORM_BUFFER, 0, MetaUniformMemory);
glUseProgram(ShaderClearCoarseBinMask); glUseProgram(ShaderClearCoarseBinMask);
glDispatchCompute(TilesPerLine*TileLines/32, 1, 1); glDispatchCompute(TilesPerLine*TileLines/ClearCoarseBinMaskLocalSize, 1, 1);
bool wbuffer = false; bool wbuffer = false;
if (numYSpans > 0) if (numYSpans > 0)
@ -932,23 +961,23 @@ void ComputeRenderer::RenderFrame(GPU& gpu)
glBindImageTexture(0, YSpanIndicesTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16UI); glBindImageTexture(0, YSpanIndicesTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16UI);
glUseProgram(ShaderInterpXSpans[wbuffer]); glUseProgram(ShaderInterpXSpans[wbuffer]);
glDispatchCompute((numSetupIndices + 31) / 32, 1, 1); glDispatchCompute((numSetupIndices + 31) / 32, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
// bin polygons // bin polygons
glUseProgram(ShaderBinCombined); glUseProgram(ShaderBinCombined);
glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH);
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
// calculate list offsets // calculate list offsets
glUseProgram(ShaderCalculateWorkListOffset); glUseProgram(ShaderCalculateWorkListOffset);
glDispatchCompute((numVariants + 31) / 32, 1, 1); glDispatchCompute((numVariants + 31) / 32, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
// sort shader work // sort shader work
glUseProgram(ShaderSortWork); glUseProgram(ShaderSortWork);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
glDispatchComputeIndirect(offsetof(BinResultHeader, SortWorkWorkCount)); glDispatchComputeIndirect(offsetof(BinResultHeader, SortWorkWorkCount));
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);

View file

@ -163,11 +163,13 @@ private:
float TextureLayer; float TextureLayer;
}; };
static constexpr int TileSize = 8; int TileSize;
static constexpr int CoarseTileCountX = 8; static constexpr int CoarseTileCountX = 8;
static constexpr int CoarseTileCountY = 4; int CoarseTileCountY;
static constexpr int CoarseTileW = CoarseTileCountX * TileSize; int CoarseTileArea;
static constexpr int CoarseTileH = CoarseTileCountY * TileSize; int CoarseTileW;
int CoarseTileH;
int ClearCoarseBinMaskLocalSize;
static constexpr int BinStride = 2048/32; static constexpr int BinStride = 2048/32;
static constexpr int CoarseBinStride = BinStride/32; static constexpr int CoarseBinStride = BinStride/32;

View file

@ -339,9 +339,7 @@ const uint ResultAttrStart = ResultDepthStart+ScreenWidth*ScreenHeight*2;
const char* Common = R"( const char* Common = R"(
#define TileSize 8
const int CoarseTileCountX = 8; const int CoarseTileCountX = 8;
const int CoarseTileCountY = 4;
const int CoarseTileW = (CoarseTileCountX * TileSize); const int CoarseTileW = (CoarseTileCountX * TileSize);
const int CoarseTileH = (CoarseTileCountY * TileSize); const int CoarseTileH = (CoarseTileCountY * TileSize);
@ -848,7 +846,7 @@ void main()
const std::string ClearCoarseBinMask = const std::string ClearCoarseBinMask =
BinningBuffer + R"( BinningBuffer + R"(
layout (local_size_x = 32) in; layout (local_size_x = ClearCoarseBinMaskLocalSize) in;
void main() void main()
{ {
@ -864,7 +862,7 @@ const std::string BinCombined =
XSpanSetupBuffer + XSpanSetupBuffer +
WorkDescBuffer + R"( WorkDescBuffer + R"(
layout (local_size_x = 32) in; layout (local_size_x = CoarseTileArea) in;
bool BinPolygon(Polygon polygon, ivec2 topLeft, ivec2 botRight) bool BinPolygon(Polygon polygon, ivec2 topLeft, ivec2 botRight)
{ {

View file

@ -38,6 +38,20 @@ std::pair<std::unique_ptr<u8[]>, u32> PadToPowerOf2(const u8* data, u32 len) noe
std::unique_ptr<u8[]> CopyToUnique(const u8* data, u32 len) noexcept; std::unique_ptr<u8[]> CopyToUnique(const u8* data, u32 len) noexcept;
template <typename T>
T GetMSBit(T val)
{
val |= (val >> 1);
val |= (val >> 2);
val |= (val >> 4);
if constexpr(sizeof(val) > 1) val |= (val >> 8);
if constexpr(sizeof(val) > 2) val |= (val >> 16);
if constexpr(sizeof(val) > 4) val |= (val >> 32);
return val - (val >> 1);
}
} }
#endif // MELONDS_UTILS_H #endif // MELONDS_UTILS_H