mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-03-06 21:00:31 +01:00
Merge 6f8ce9fe89
into 63b468927e
This commit is contained in:
commit
81ccd8c17f
4 changed files with 56 additions and 13 deletions
|
@ -19,6 +19,9 @@
|
||||||
#include "GPU3D_Compute.h"
|
#include "GPU3D_Compute.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "Utils.h"
|
||||||
|
|
||||||
#include "OpenGLSupport.h"
|
#include "OpenGLSupport.h"
|
||||||
|
|
||||||
|
@ -50,6 +53,14 @@ bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, c
|
||||||
shaderSource += std::to_string(ScreenHeight);
|
shaderSource += std::to_string(ScreenHeight);
|
||||||
shaderSource += "\n#define MaxWorkTiles ";
|
shaderSource += "\n#define MaxWorkTiles ";
|
||||||
shaderSource += std::to_string(MaxWorkTiles);
|
shaderSource += std::to_string(MaxWorkTiles);
|
||||||
|
shaderSource += "\n#define TileSize ";
|
||||||
|
shaderSource += std::to_string(TileSize);
|
||||||
|
shaderSource += "\nconst int CoarseTileCountY = ";
|
||||||
|
shaderSource += std::to_string(CoarseTileCountY) + ";";
|
||||||
|
shaderSource += "\n#define CoarseTileArea ";
|
||||||
|
shaderSource += std::to_string(CoarseTileArea);
|
||||||
|
shaderSource += "\n#define ClearCoarseBinMaskLocalSize ";
|
||||||
|
shaderSource += std::to_string(ClearCoarseBinMaskLocalSize);
|
||||||
|
|
||||||
shaderSource += ComputeRendererShaders::Common;
|
shaderSource += ComputeRendererShaders::Common;
|
||||||
shaderSource += source;
|
shaderSource += source;
|
||||||
|
@ -297,6 +308,8 @@ void ComputeRenderer::Reset(GPU& gpu)
|
||||||
|
|
||||||
void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates)
|
void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates)
|
||||||
{
|
{
|
||||||
|
u8 TileScale;
|
||||||
|
|
||||||
CurGLCompositor.SetScaleFactor(scale);
|
CurGLCompositor.SetScaleFactor(scale);
|
||||||
|
|
||||||
if (ScaleFactor != -1)
|
if (ScaleFactor != -1)
|
||||||
|
@ -310,6 +323,22 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate
|
||||||
ScreenWidth = 256 * ScaleFactor;
|
ScreenWidth = 256 * ScaleFactor;
|
||||||
ScreenHeight = 192 * ScaleFactor;
|
ScreenHeight = 192 * ScaleFactor;
|
||||||
|
|
||||||
|
//Starting at 4.5x we want to double TileSize every time scale doubles
|
||||||
|
TileScale = 2 * ScaleFactor / 9;
|
||||||
|
TileScale = GetMSBit(TileScale);
|
||||||
|
TileScale <<= 1;
|
||||||
|
TileScale += TileScale == 0;
|
||||||
|
|
||||||
|
std::printf("Scale: %d\n", ScaleFactor);
|
||||||
|
std::printf("TileScale: %d\n", TileScale);
|
||||||
|
|
||||||
|
TileSize = std::min(8 * TileScale, 32);
|
||||||
|
CoarseTileCountY = TileSize < 32 ? 4 : 6;
|
||||||
|
ClearCoarseBinMaskLocalSize = TileSize < 32 ? 64 : 48;
|
||||||
|
CoarseTileArea = CoarseTileCountX * CoarseTileCountY;
|
||||||
|
CoarseTileW = CoarseTileCountX * TileSize;
|
||||||
|
CoarseTileH = CoarseTileCountY * TileSize;
|
||||||
|
|
||||||
TilesPerLine = ScreenWidth/TileSize;
|
TilesPerLine = ScreenWidth/TileSize;
|
||||||
TileLines = ScreenHeight/TileSize;
|
TileLines = ScreenHeight/TileSize;
|
||||||
|
|
||||||
|
@ -918,7 +947,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu)
|
||||||
glBindBufferBase(GL_UNIFORM_BUFFER, 0, MetaUniformMemory);
|
glBindBufferBase(GL_UNIFORM_BUFFER, 0, MetaUniformMemory);
|
||||||
|
|
||||||
glUseProgram(ShaderClearCoarseBinMask);
|
glUseProgram(ShaderClearCoarseBinMask);
|
||||||
glDispatchCompute(TilesPerLine*TileLines/32, 1, 1);
|
glDispatchCompute(TilesPerLine*TileLines/ClearCoarseBinMaskLocalSize, 1, 1);
|
||||||
|
|
||||||
bool wbuffer = false;
|
bool wbuffer = false;
|
||||||
if (numYSpans > 0)
|
if (numYSpans > 0)
|
||||||
|
@ -932,23 +961,23 @@ void ComputeRenderer::RenderFrame(GPU& gpu)
|
||||||
glBindImageTexture(0, YSpanIndicesTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16UI);
|
glBindImageTexture(0, YSpanIndicesTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16UI);
|
||||||
glUseProgram(ShaderInterpXSpans[wbuffer]);
|
glUseProgram(ShaderInterpXSpans[wbuffer]);
|
||||||
glDispatchCompute((numSetupIndices + 31) / 32, 1, 1);
|
glDispatchCompute((numSetupIndices + 31) / 32, 1, 1);
|
||||||
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
|
||||||
// bin polygons
|
// bin polygons
|
||||||
glUseProgram(ShaderBinCombined);
|
glUseProgram(ShaderBinCombined);
|
||||||
glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH);
|
glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH);
|
||||||
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
|
||||||
// calculate list offsets
|
// calculate list offsets
|
||||||
glUseProgram(ShaderCalculateWorkListOffset);
|
glUseProgram(ShaderCalculateWorkListOffset);
|
||||||
glDispatchCompute((numVariants + 31) / 32, 1, 1);
|
glDispatchCompute((numVariants + 31) / 32, 1, 1);
|
||||||
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
|
||||||
// sort shader work
|
// sort shader work
|
||||||
glUseProgram(ShaderSortWork);
|
glUseProgram(ShaderSortWork);
|
||||||
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
|
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
|
||||||
glDispatchComputeIndirect(offsetof(BinResultHeader, SortWorkWorkCount));
|
glDispatchComputeIndirect(offsetof(BinResultHeader, SortWorkWorkCount));
|
||||||
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
|
||||||
glActiveTexture(GL_TEXTURE0);
|
glActiveTexture(GL_TEXTURE0);
|
||||||
|
|
||||||
|
|
|
@ -163,11 +163,13 @@ private:
|
||||||
float TextureLayer;
|
float TextureLayer;
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr int TileSize = 8;
|
int TileSize;
|
||||||
static constexpr int CoarseTileCountX = 8;
|
static constexpr int CoarseTileCountX = 8;
|
||||||
static constexpr int CoarseTileCountY = 4;
|
int CoarseTileCountY;
|
||||||
static constexpr int CoarseTileW = CoarseTileCountX * TileSize;
|
int CoarseTileArea;
|
||||||
static constexpr int CoarseTileH = CoarseTileCountY * TileSize;
|
int CoarseTileW;
|
||||||
|
int CoarseTileH;
|
||||||
|
int ClearCoarseBinMaskLocalSize;
|
||||||
|
|
||||||
static constexpr int BinStride = 2048/32;
|
static constexpr int BinStride = 2048/32;
|
||||||
static constexpr int CoarseBinStride = BinStride/32;
|
static constexpr int CoarseBinStride = BinStride/32;
|
||||||
|
|
|
@ -339,9 +339,7 @@ const uint ResultAttrStart = ResultDepthStart+ScreenWidth*ScreenHeight*2;
|
||||||
|
|
||||||
const char* Common = R"(
|
const char* Common = R"(
|
||||||
|
|
||||||
#define TileSize 8
|
|
||||||
const int CoarseTileCountX = 8;
|
const int CoarseTileCountX = 8;
|
||||||
const int CoarseTileCountY = 4;
|
|
||||||
const int CoarseTileW = (CoarseTileCountX * TileSize);
|
const int CoarseTileW = (CoarseTileCountX * TileSize);
|
||||||
const int CoarseTileH = (CoarseTileCountY * TileSize);
|
const int CoarseTileH = (CoarseTileCountY * TileSize);
|
||||||
|
|
||||||
|
@ -848,7 +846,7 @@ void main()
|
||||||
|
|
||||||
const std::string ClearCoarseBinMask =
|
const std::string ClearCoarseBinMask =
|
||||||
BinningBuffer + R"(
|
BinningBuffer + R"(
|
||||||
layout (local_size_x = 32) in;
|
layout (local_size_x = ClearCoarseBinMaskLocalSize) in;
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
|
@ -864,7 +862,7 @@ const std::string BinCombined =
|
||||||
XSpanSetupBuffer +
|
XSpanSetupBuffer +
|
||||||
WorkDescBuffer + R"(
|
WorkDescBuffer + R"(
|
||||||
|
|
||||||
layout (local_size_x = 32) in;
|
layout (local_size_x = CoarseTileArea) in;
|
||||||
|
|
||||||
bool BinPolygon(Polygon polygon, ivec2 topLeft, ivec2 botRight)
|
bool BinPolygon(Polygon polygon, ivec2 topLeft, ivec2 botRight)
|
||||||
{
|
{
|
||||||
|
|
14
src/Utils.h
14
src/Utils.h
|
@ -38,6 +38,20 @@ std::pair<std::unique_ptr<u8[]>, u32> PadToPowerOf2(const u8* data, u32 len) noe
|
||||||
|
|
||||||
std::unique_ptr<u8[]> CopyToUnique(const u8* data, u32 len) noexcept;
|
std::unique_ptr<u8[]> CopyToUnique(const u8* data, u32 len) noexcept;
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T GetMSBit(T val)
|
||||||
|
{
|
||||||
|
val |= (val >> 1);
|
||||||
|
val |= (val >> 2);
|
||||||
|
val |= (val >> 4);
|
||||||
|
|
||||||
|
if constexpr(sizeof(val) > 1) val |= (val >> 8);
|
||||||
|
if constexpr(sizeof(val) > 2) val |= (val >> 16);
|
||||||
|
if constexpr(sizeof(val) > 4) val |= (val >> 32);
|
||||||
|
|
||||||
|
return val - (val >> 1);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // MELONDS_UTILS_H
|
#endif // MELONDS_UTILS_H
|
||||||
|
|
Loading…
Add table
Reference in a new issue