mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-03-06 21:00:31 +01:00
Merge 660d30baad
into 63b468927e
This commit is contained in:
commit
7d623c7ca0
4 changed files with 260 additions and 75 deletions
|
@ -451,6 +451,8 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
|
|||
|
||||
file->Var32(&poly->NumVertices);
|
||||
|
||||
file->VarArray(poly->SlopePosition, sizeof(s32)*10*2);
|
||||
|
||||
file->VarArray(poly->FinalZ, sizeof(s32)*10);
|
||||
file->VarArray(poly->FinalW, sizeof(s32)*10);
|
||||
file->Bool32(&poly->WBuffer);
|
||||
|
@ -489,7 +491,7 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
|
|||
poly->Degenerate = true;
|
||||
}
|
||||
|
||||
if (poly->YBottom > 192) poly->Degenerate = true;
|
||||
if (poly->YBottom > 192 && !poly->Translucent) poly->Degenerate = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1100,8 +1102,10 @@ void GPU3D::SubmitPolygon() noexcept
|
|||
}
|
||||
|
||||
// compute screen coordinates
|
||||
|
||||
for (int i = clipstart; i < nverts; i++)
|
||||
// hardware does this pass for shared vertices in polygon strips, even though it was already done for them last polygon
|
||||
// however it doesn't recalculate all of the previous polygon's internal info (used for determining how to rasterize it)
|
||||
// despite potentially changing their coordinates if a viewport change occured mid-strip...
|
||||
for (int i = (UpdateLastPoly ? 0 : clipstart); i < nverts; i++)
|
||||
{
|
||||
Vertex* vtx = &clippedvertices[i];
|
||||
|
||||
|
@ -1214,6 +1218,7 @@ void GPU3D::SubmitPolygon() noexcept
|
|||
|
||||
poly->Degenerate = false;
|
||||
poly->Type = 0;
|
||||
poly->OOBRendering = UpdateLastPoly;
|
||||
|
||||
poly->FacingView = facingview;
|
||||
|
||||
|
@ -1234,6 +1239,19 @@ void GPU3D::SubmitPolygon() noexcept
|
|||
{
|
||||
poly->Vertices[0] = reusedvertices[0];
|
||||
poly->Vertices[1] = reusedvertices[1];
|
||||
|
||||
// null vertices poly invalidation:
|
||||
// 1. Start a polygon strip
|
||||
// 2. Submit at least one polygon
|
||||
// 3. Swap buffers
|
||||
// 4. Don't send a begin command
|
||||
// 4. submit a new polygon (1 vertex for tri, 2 for quad)
|
||||
// 5. if the new polygon reuses vertices, it will be "degenerate" due to them being null pointers (theory)
|
||||
if (NullVertices)
|
||||
{
|
||||
poly->Degenerate = true;
|
||||
NullVertices -= (PolygonMode - 1); // subt. 1 if tri strip, subt. 2 if quad strip.
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1247,6 +1265,31 @@ void GPU3D::SubmitPolygon() noexcept
|
|||
NumVertices += 2;
|
||||
}
|
||||
|
||||
// if a viewport command was submitted mid-polygon strip the "true" coords and sort order of a vertex in the last polygon can be changed retroactively
|
||||
if (UpdateLastPoly)
|
||||
{
|
||||
// update final coords and sortkey to match new vertex coordinates
|
||||
// yes, *only* those values... this causes the polygon to be rasterized in an extremely glitchy manner
|
||||
poly->Vertices[0]->FinalPosition[0] = clippedvertices[0].FinalPosition[0];
|
||||
poly->Vertices[0]->FinalPosition[1] = clippedvertices[0].FinalPosition[1];
|
||||
poly->Vertices[1]->FinalPosition[0] = clippedvertices[1].FinalPosition[0];
|
||||
poly->Vertices[1]->FinalPosition[1] = clippedvertices[1].FinalPosition[1];
|
||||
|
||||
s32 ytop = 192, ybot = 0;
|
||||
Vertex** lastpolyvtx = LastStripPolygon->Vertices;
|
||||
for (int i = 0; i < LastStripPolygon->NumVertices; i++)
|
||||
{
|
||||
if (lastpolyvtx[i]->FinalPosition[1] < ytop)
|
||||
ytop = lastpolyvtx[i]->FinalPosition[1];
|
||||
if (lastpolyvtx[i]->FinalPosition[1] > ybot)
|
||||
ybot = lastpolyvtx[i]->FinalPosition[1];
|
||||
}
|
||||
LastStripPolygon->SortKey = (ybot << 8) | ytop;
|
||||
if (LastStripPolygon->Translucent) LastStripPolygon->SortKey |= 0x10000;
|
||||
|
||||
// clear update flag
|
||||
UpdateLastPoly = false;
|
||||
}
|
||||
poly->NumVertices += 2;
|
||||
}
|
||||
|
||||
|
@ -1268,6 +1311,7 @@ void GPU3D::SubmitPolygon() noexcept
|
|||
}
|
||||
|
||||
// determine bounds of the polygon
|
||||
// including where slopes begin and end
|
||||
// also determine the W shift and normalize W
|
||||
// normalization works both ways
|
||||
// (ie two W's that span 12 bits or less will be brought to 16 bits)
|
||||
|
@ -1294,6 +1338,10 @@ void GPU3D::SubmitPolygon() noexcept
|
|||
vbot = i;
|
||||
}
|
||||
|
||||
// these values are used to determine where to begin/end slopes
|
||||
poly->SlopePosition[i][0] = vtx->FinalPosition[0];
|
||||
poly->SlopePosition[i][1] = vtx->FinalPosition[1];
|
||||
|
||||
u32 w = (u32)vtx->Position[3];
|
||||
if (w == 0) poly->Degenerate = true;
|
||||
|
||||
|
@ -1305,7 +1353,7 @@ void GPU3D::SubmitPolygon() noexcept
|
|||
poly->YTop = ytop; poly->YBottom = ybot;
|
||||
poly->XTop = xtop; poly->XBottom = xbot;
|
||||
|
||||
if (ybot > 192) poly->Degenerate = true;
|
||||
if (ybot > 192 && !poly->Translucent) poly->Degenerate = true;
|
||||
|
||||
poly->SortKey = (ybot << 8) | ytop;
|
||||
if (poly->Translucent) poly->SortKey |= 0x10000;
|
||||
|
@ -2066,7 +2114,9 @@ void GPU3D::ExecuteCommand() noexcept
|
|||
VertexNumInPoly = 0;
|
||||
NumConsecutivePolygons = 0;
|
||||
LastStripPolygon = NULL;
|
||||
UpdateLastPoly = false;
|
||||
CurPolygonAttr = PolygonAttr;
|
||||
NullVertices = 0;
|
||||
break;
|
||||
|
||||
case 0x41: // end polygons
|
||||
|
@ -2089,6 +2139,8 @@ void GPU3D::ExecuteCommand() noexcept
|
|||
PolygonPipeline = 0;
|
||||
VertexSlotCounter = 0;
|
||||
VertexSlotsFree = 1;
|
||||
// previous polygon's vertices will be counted as "null" if a buffer swap occurs
|
||||
if (PolygonMode >= 2) NullVertices = 2;
|
||||
break;
|
||||
|
||||
case 0x60: // viewport x1,y1,x2,y2
|
||||
|
@ -2100,6 +2152,9 @@ void GPU3D::ExecuteCommand() noexcept
|
|||
Viewport[3] = (191 - (entry.Param >> 24)) & 0xFF; // y1
|
||||
Viewport[4] = (Viewport[2] - Viewport[0] + 1) & 0x1FF; // width
|
||||
Viewport[5] = (Viewport[1] - Viewport[3] + 1) & 0xFF; // height
|
||||
|
||||
// set a flag that tells the next polygon to emulate a bug with polygon strips
|
||||
if (LastStripPolygon) UpdateLastPoly = true;
|
||||
break;
|
||||
|
||||
case 0x72: // vec test
|
||||
|
|
17
src/GPU3D.h
17
src/GPU3D.h
|
@ -39,8 +39,13 @@ struct Vertex
|
|||
|
||||
// final vertex attributes.
|
||||
// allows them to be reused in polygon strips.
|
||||
|
||||
|
||||
// with sw renderer FinalPosition is primarily used for calculating the slope of a polygon (not where it begins/ends)
|
||||
// (it does get used to determine where slopes should start and end with the gl renderers)
|
||||
// the initial set of coordinates gets updated by the next polygon in a strip
|
||||
// which can cause rendering issues if they wind up different than their initial value (due to a viewport change)
|
||||
s32 FinalPosition[2];
|
||||
|
||||
s32 FinalColor[3];
|
||||
|
||||
// hi-res position (4-bit fractional part)
|
||||
|
@ -55,6 +60,13 @@ struct Polygon
|
|||
Vertex* Vertices[10];
|
||||
u32 NumVertices;
|
||||
|
||||
// essentially a per-polygon copy of its vertices' coordinates
|
||||
// (not 100% sure why they do it like this? but a glitch requires this for proper behavior, so we gotta do it too)
|
||||
// unlike each vertices' final position variable, it is *not* updated by the next polygon in a polygon strip
|
||||
// it is used by the software renderer to determine where to begin/end each slope
|
||||
// TODO: track hires versions of this for the hardware renderers to use?
|
||||
s32 SlopePosition[10][2];
|
||||
|
||||
s32 FinalZ[10];
|
||||
s32 FinalW[10];
|
||||
bool WBuffer;
|
||||
|
@ -72,6 +84,7 @@ struct Polygon
|
|||
bool IsShadow;
|
||||
|
||||
int Type; // 0=regular 1=line
|
||||
bool OOBRendering;
|
||||
|
||||
u32 VTop, VBottom; // vertex indices
|
||||
s32 YTop, YBottom; // Y coords
|
||||
|
@ -272,6 +285,7 @@ public:
|
|||
u32 RenderClearAttr2 = 0;
|
||||
|
||||
bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize
|
||||
bool UpdateLastPoly = false; // used to track whether the next polygon should update the previous one's vtx coordinates (as a small optimization)
|
||||
|
||||
bool AbortFrame = false;
|
||||
|
||||
|
@ -310,6 +324,7 @@ public:
|
|||
u32 VertexNumInPoly = 0;
|
||||
u32 NumConsecutivePolygons = 0;
|
||||
Polygon* LastStripPolygon = nullptr;
|
||||
u32 NullVertices = 0;
|
||||
u32 NumOpaquePolygons = 0;
|
||||
|
||||
Vertex VertexRAM[6144 * 2] {};
|
||||
|
|
|
@ -598,11 +598,12 @@ void SoftRenderer::PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 c
|
|||
AttrBuffer[pixeladdr] = attr;
|
||||
}
|
||||
|
||||
template <bool oob>
|
||||
void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
|
||||
{
|
||||
Polygon* polygon = rp->PolyData;
|
||||
|
||||
while (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
|
||||
while ((y >= polygon->SlopePosition[rp->NextVL][1]) && rp->CurVL != polygon->VBottom)
|
||||
{
|
||||
rp->CurVL = rp->NextVL;
|
||||
|
||||
|
@ -620,16 +621,28 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y
|
|||
}
|
||||
}
|
||||
|
||||
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
|
||||
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1],
|
||||
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
|
||||
// note: if the end or current position in a slope is above the start point
|
||||
// it seems to seek forwards(?) until the value overflows at 256
|
||||
// this can be emulated by just adding 256 to them
|
||||
s32 y1 = polygon->Vertices[rp->NextVL]->FinalPosition[1];
|
||||
if constexpr (oob)
|
||||
{
|
||||
if (y < polygon->Vertices[rp->CurVL]->FinalPosition[1])
|
||||
y += 256;
|
||||
|
||||
if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1])
|
||||
y1 += 256;
|
||||
}
|
||||
rp->XL = rp->SlopeL.Setup<oob>(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
|
||||
polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
|
||||
}
|
||||
|
||||
template <bool oob>
|
||||
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
|
||||
{
|
||||
Polygon* polygon = rp->PolyData;
|
||||
|
||||
while (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
|
||||
while ((y >= polygon->SlopePosition[rp->NextVR][1]) && rp->CurVR != polygon->VBottom)
|
||||
{
|
||||
rp->CurVR = rp->NextVR;
|
||||
|
||||
|
@ -647,11 +660,23 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32
|
|||
}
|
||||
}
|
||||
|
||||
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
|
||||
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1],
|
||||
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
|
||||
// note: if the end or current position in a slope is above the start point
|
||||
// it seems to seek forwards(?) until the value overflows at 256
|
||||
// this can be emulated by just adding 256 to them
|
||||
s32 y1 = polygon->Vertices[rp->NextVR]->FinalPosition[1];
|
||||
if constexpr (oob)
|
||||
{
|
||||
if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1])
|
||||
y += 256;
|
||||
|
||||
if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1])
|
||||
y1 += 256;
|
||||
}
|
||||
rp->XR = rp->SlopeR.Setup<oob>(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
|
||||
polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
|
||||
}
|
||||
|
||||
template <bool oob>
|
||||
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const
|
||||
{
|
||||
u32 nverts = polygon->NumVertices;
|
||||
|
@ -678,8 +703,10 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
|
|||
rp->NextVR = rp->CurVR + 1;
|
||||
if (rp->NextVR >= nverts) rp->NextVR = 0;
|
||||
}
|
||||
|
||||
if (ybot == ytop)
|
||||
|
||||
// 0px tall line polygons are checked for at rasterization, this matters for when viewports are updated mid-polygon-strip
|
||||
// therefore we need to check if the last two vertices are actually still at the same y axis as the others
|
||||
if ((ybot == ytop) && (ybot == polygon->Vertices[nverts-1]->FinalPosition[1]) && (ybot == polygon->Vertices[nverts-2]->FinalPosition[1]) && (ybot == polygon->Vertices[nverts-3]->FinalPosition[1]))
|
||||
{
|
||||
vtop = 0; vbot = 0;
|
||||
int i;
|
||||
|
@ -700,11 +727,38 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
|
|||
}
|
||||
else
|
||||
{
|
||||
SetupPolygonLeftEdge(rp, ytop);
|
||||
SetupPolygonRightEdge(rp, ytop);
|
||||
// note: if the end or current position in a slope is above the start point
|
||||
// it seems to seek forwards(?) until the value overflows at 256
|
||||
// this can be emulated by just adding 256 to them
|
||||
s32 y = ytop;
|
||||
s32 y1 = polygon->Vertices[rp->NextVL]->FinalPosition[1];
|
||||
if (oob)
|
||||
{
|
||||
if (y < polygon->Vertices[rp->CurVL]->FinalPosition[1])
|
||||
y += 256;
|
||||
|
||||
if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1])
|
||||
y1 += 256;
|
||||
}
|
||||
rp->XL = rp->SlopeL.Setup<oob>(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
|
||||
polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
|
||||
|
||||
y1 = polygon->Vertices[rp->NextVR]->FinalPosition[1];
|
||||
if constexpr (oob)
|
||||
{
|
||||
y = ytop;
|
||||
if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1])
|
||||
y += 256;
|
||||
|
||||
if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1])
|
||||
y1 += 256;
|
||||
}
|
||||
rp->XR = rp->SlopeR.Setup<oob>(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
|
||||
polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool oob>
|
||||
void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y)
|
||||
{
|
||||
Polygon* polygon = rp->PolyData;
|
||||
|
@ -730,14 +784,14 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||
|
||||
if (polygon->YTop != polygon->YBottom)
|
||||
{
|
||||
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
|
||||
if ((y >= polygon->SlopePosition[rp->NextVL][1] || (oob && y == polygon->Vertices[rp->CurVL]->FinalPosition[1])) && rp->CurVL != polygon->VBottom)
|
||||
{
|
||||
SetupPolygonLeftEdge(rp, y);
|
||||
SetupPolygonLeftEdge<oob>(rp, y);
|
||||
}
|
||||
|
||||
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
|
||||
if ((y >= polygon->SlopePosition[rp->NextVR][1] || (oob && y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) && rp->CurVR != polygon->VBottom)
|
||||
{
|
||||
SetupPolygonRightEdge(rp, y);
|
||||
SetupPolygonRightEdge<oob>(rp, y);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -746,8 +800,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||
bool l_filledge, r_filledge;
|
||||
s32 l_edgelen, r_edgelen;
|
||||
s32 l_edgecov, r_edgecov;
|
||||
Interpolator<1>* interp_start;
|
||||
Interpolator<1>* interp_end;
|
||||
Interpolator<1, false>* interp_start;
|
||||
Interpolator<1, false>* interp_end;
|
||||
|
||||
xstart = rp->XL;
|
||||
xend = rp->XR;
|
||||
|
@ -839,11 +893,30 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||
if (y == polygon->YTop) yedge = 0x4;
|
||||
else if (y == polygon->YBottom-1) yedge = 0x8;
|
||||
int edge;
|
||||
|
||||
|
||||
xend += 1;
|
||||
Interpolator<0, oob> interpX(xstart, xend, wl, wr);
|
||||
|
||||
if constexpr (oob)
|
||||
{
|
||||
// CHECKME: should the unclamped values be used for timings?
|
||||
// negative values are clamped to 0
|
||||
if (xstart < 0)
|
||||
{
|
||||
l_edgelen += xstart;
|
||||
if (l_edgelen < 1) l_edgelen = 1;
|
||||
xstart = 0;
|
||||
if (xend < 1) xend = 1;
|
||||
}
|
||||
// too big values are clamped to 511
|
||||
if (xend > 511)
|
||||
{
|
||||
r_edgelen += 256 - xend;
|
||||
xend = 511;
|
||||
}
|
||||
}
|
||||
|
||||
s32 x = xstart;
|
||||
Interpolator<0> interpX(xstart, xend+1, wl, wr);
|
||||
|
||||
if (x < 0) x = 0;
|
||||
s32 xlimit;
|
||||
|
||||
// for shadow masks: set stencil bits where the depth test fails.
|
||||
|
@ -852,7 +925,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||
// part 1: left edge
|
||||
edge = yedge | 0x1;
|
||||
xlimit = xstart+l_edgelen;
|
||||
if (xlimit > xend+1) xlimit = xend+1;
|
||||
if (xlimit > xend) xlimit = xend;
|
||||
if (xlimit > 256) xlimit = 256;
|
||||
|
||||
if (!l_filledge) x = xlimit;
|
||||
|
@ -879,8 +952,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||
|
||||
// part 2: polygon inside
|
||||
edge = yedge;
|
||||
xlimit = xend-r_edgelen+1;
|
||||
if (xlimit > xend+1) xlimit = xend+1;
|
||||
xlimit = xend-r_edgelen;
|
||||
if (xlimit > xend) xlimit = xend;
|
||||
if (xlimit > 256) xlimit = 256;
|
||||
if (wireframe && !edge) x = std::max(x, xlimit);
|
||||
else for (; x < xlimit; x++)
|
||||
|
@ -905,7 +978,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||
|
||||
// part 3: right edge
|
||||
edge = yedge | 0x2;
|
||||
xlimit = xend+1;
|
||||
xlimit = xend;
|
||||
if (xlimit > 256) xlimit = 256;
|
||||
|
||||
if (r_filledge)
|
||||
|
@ -929,10 +1002,11 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
|
|||
}
|
||||
}
|
||||
|
||||
rp->XL = rp->SlopeL.Step();
|
||||
rp->XR = rp->SlopeR.Step();
|
||||
rp->XL = rp->SlopeL.Step<oob>();
|
||||
rp->XR = rp->SlopeR.Step<oob>();
|
||||
}
|
||||
|
||||
template <bool oob>
|
||||
void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y)
|
||||
{
|
||||
Polygon* polygon = rp->PolyData;
|
||||
|
@ -955,14 +1029,14 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||
|
||||
if (polygon->YTop != polygon->YBottom)
|
||||
{
|
||||
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
|
||||
if ((y >= polygon->SlopePosition[rp->NextVL][1] || (oob && y == polygon->Vertices[rp->CurVL]->FinalPosition[1])) && rp->CurVL != polygon->VBottom)
|
||||
{
|
||||
SetupPolygonLeftEdge(rp, y);
|
||||
SetupPolygonLeftEdge<oob>(rp, y);
|
||||
}
|
||||
|
||||
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
|
||||
if ((y >= polygon->SlopePosition[rp->NextVR][1] || (oob && y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) && rp->CurVR != polygon->VBottom)
|
||||
{
|
||||
SetupPolygonRightEdge(rp, y);
|
||||
SetupPolygonRightEdge<oob>(rp, y);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -971,8 +1045,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||
bool l_filledge, r_filledge;
|
||||
s32 l_edgelen, r_edgelen;
|
||||
s32 l_edgecov, r_edgecov;
|
||||
Interpolator<1>* interp_start;
|
||||
Interpolator<1>* interp_end;
|
||||
Interpolator<1, false>* interp_start;
|
||||
Interpolator<1, false>* interp_end;
|
||||
|
||||
xstart = rp->XL;
|
||||
xend = rp->XR;
|
||||
|
@ -1090,10 +1164,29 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||
else if (y == polygon->YBottom-1) yedge = 0x8;
|
||||
int edge;
|
||||
|
||||
s32 x = xstart;
|
||||
Interpolator<0> interpX(xstart, xend+1, wl, wr);
|
||||
xend+=1;
|
||||
Interpolator<0, oob> interpX(xstart, xend, wl, wr);
|
||||
|
||||
if constexpr (oob)
|
||||
{
|
||||
// CHECKME: should the unclamped values be used for timings?
|
||||
// negative values are clamped to 0
|
||||
if (xstart < 0)
|
||||
{
|
||||
l_edgelen += xstart;
|
||||
if (l_edgelen < 1) l_edgelen = 1;
|
||||
xstart = 0;
|
||||
if (xend < 1) xend = 1;
|
||||
}
|
||||
// too big values are clamped to 511
|
||||
if (xend > 511)
|
||||
{
|
||||
r_edgelen += 256 - xend;
|
||||
xend = 511;
|
||||
}
|
||||
}
|
||||
|
||||
if (x < 0) x = 0;
|
||||
s32 x = xstart;
|
||||
s32 xlimit;
|
||||
|
||||
s32 xcov = 0;
|
||||
|
@ -1101,7 +1194,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||
// part 1: left edge
|
||||
edge = yedge | 0x1;
|
||||
xlimit = xstart+l_edgelen;
|
||||
if (xlimit > xend+1) xlimit = xend+1;
|
||||
if (xlimit > xend) xlimit = xend;
|
||||
if (xlimit > 256) xlimit = 256;
|
||||
if (l_edgecov & (1<<31))
|
||||
{
|
||||
|
@ -1201,8 +1294,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||
|
||||
// part 2: polygon inside
|
||||
edge = yedge;
|
||||
xlimit = xend-r_edgelen+1;
|
||||
if (xlimit > xend+1) xlimit = xend+1;
|
||||
xlimit = xend-r_edgelen;
|
||||
if (xlimit > xend) xlimit = xend;
|
||||
if (xlimit > 256) xlimit = 256;
|
||||
|
||||
if (wireframe && !edge) x = std::max(x, xlimit);
|
||||
|
@ -1290,7 +1383,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||
|
||||
// part 3: right edge
|
||||
edge = yedge | 0x2;
|
||||
xlimit = xend+1;
|
||||
xlimit = xend;
|
||||
if (xlimit > 256) xlimit = 256;
|
||||
if (r_edgecov & (1<<31))
|
||||
{
|
||||
|
@ -1387,8 +1480,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
|
|||
}
|
||||
}
|
||||
|
||||
rp->XL = rp->SlopeL.Step();
|
||||
rp->XR = rp->SlopeR.Step();
|
||||
rp->XL = rp->SlopeL.Step<oob>();
|
||||
rp->XR = rp->SlopeR.Step<oob>();
|
||||
}
|
||||
|
||||
void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys)
|
||||
|
@ -1400,10 +1493,20 @@ void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys)
|
|||
|
||||
if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
|
||||
{
|
||||
if (polygon->IsShadowMask)
|
||||
RenderShadowMaskScanline(gpu.GPU3D, rp, y);
|
||||
if (polygon->OOBRendering) [[unlikely]] // enable some extra handling for polygons that might render out of ordinary bounds
|
||||
{
|
||||
if (polygon->IsShadowMask)
|
||||
RenderShadowMaskScanline<true>(gpu.GPU3D, rp, y);
|
||||
else
|
||||
RenderPolygonScanline<true>(gpu, rp, y);
|
||||
}
|
||||
else
|
||||
RenderPolygonScanline(gpu, rp, y);
|
||||
{
|
||||
if (polygon->IsShadowMask)
|
||||
RenderShadowMaskScanline<false>(gpu.GPU3D, rp, y);
|
||||
else
|
||||
RenderPolygonScanline<false>(gpu, rp, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1712,7 +1815,11 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg
|
|||
for (int i = 0; i < npolys; i++)
|
||||
{
|
||||
if (polygons[i]->Degenerate) continue;
|
||||
SetupPolygon(&PolygonList[j++], polygons[i]);
|
||||
|
||||
if (polygons[i]->OOBRendering) [[unlikely]] // enable some extra handling for polygons that might render out of ordinary bounds
|
||||
SetupPolygon<true>(&PolygonList[j++], polygons[i]);
|
||||
else
|
||||
SetupPolygon<false>(&PolygonList[j++], polygons[i]);
|
||||
}
|
||||
|
||||
RenderScanline(gpu, 0, j);
|
||||
|
|
|
@ -64,7 +64,7 @@ private:
|
|||
// interpolation, avoiding precision loss from the aforementioned approximation.
|
||||
// Which is desirable when using the GPU to draw 2D graphics.
|
||||
|
||||
template<int dir>
|
||||
template<int dir, bool oob>
|
||||
class Interpolator
|
||||
{
|
||||
public:
|
||||
|
@ -78,7 +78,11 @@ private:
|
|||
{
|
||||
this->x0 = x0;
|
||||
this->x1 = x1;
|
||||
this->xdiff = x1 - x0;
|
||||
|
||||
if (oob)
|
||||
this->xdiff = std::min(x1, 511) - std::max(x0, 0);
|
||||
else
|
||||
this->xdiff = x1 - x0;
|
||||
|
||||
// calculate reciprocal for Z interpolation
|
||||
// TODO eventually: use a faster reciprocal function?
|
||||
|
@ -129,6 +133,7 @@ private:
|
|||
constexpr void SetX(s32 x)
|
||||
{
|
||||
x -= x0;
|
||||
//if (x > xdiff) x = xdiff; // may or may not be correct
|
||||
this->x = x;
|
||||
if (xdiff != 0 && !linear)
|
||||
{
|
||||
|
@ -141,10 +146,12 @@ private:
|
|||
else yfactor = (s32)(num / den);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
constexpr s32 Interpolate(s32 y0, s32 y1) const
|
||||
{
|
||||
if (xdiff == 0 || y0 == y1) return y0;
|
||||
if (xdiff == 0 || y0 == y1 || x == 0) return y0;
|
||||
|
||||
if (oob && (x0 <= 0 && x1 > 511)) return y1;
|
||||
|
||||
if (!linear)
|
||||
{
|
||||
|
@ -163,10 +170,12 @@ private:
|
|||
return y1 + (s64)(y0-y1) * (xdiff - x) / xdiff;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const
|
||||
{
|
||||
if (xdiff == 0 || z0 == z1) return z0;
|
||||
if (xdiff == 0 || z0 == z1 || x == 0) return z0;
|
||||
|
||||
if (oob && (x0 <= 0 && x1 > 511)) return z1;
|
||||
|
||||
if (wbuffer)
|
||||
{
|
||||
|
@ -252,6 +261,7 @@ private:
|
|||
return x0;
|
||||
}
|
||||
|
||||
template<bool oob>
|
||||
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
|
||||
{
|
||||
this->x0 = x0;
|
||||
|
@ -284,7 +294,7 @@ private:
|
|||
// instead, 1/y is calculated and then multiplied by x
|
||||
// TODO: this is still not perfect (see for example x=169 y=33)
|
||||
if (ylen == 0)
|
||||
Increment = 0;
|
||||
Increment = xlen << 18;
|
||||
else if (ylen == xlen && xlen != 1)
|
||||
Increment = 0x40000;
|
||||
else
|
||||
|
@ -314,8 +324,7 @@ private:
|
|||
}
|
||||
|
||||
dx += (y - y0) * Increment;
|
||||
|
||||
s32 x = XVal();
|
||||
if (oob) dx &= 0xFFFFFFF;
|
||||
|
||||
int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
|
||||
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1);
|
||||
|
@ -324,28 +333,27 @@ private:
|
|||
// used for calculating AA coverage
|
||||
if (XMajor) xcov_incr = (ylen << 10) / xlen;
|
||||
|
||||
return x;
|
||||
return XVal();
|
||||
}
|
||||
|
||||
template<bool oob>
|
||||
constexpr s32 Step()
|
||||
{
|
||||
dx += Increment;
|
||||
dx += Increment; // seems to be a 28 bit integer
|
||||
if (oob) dx &= 0xFFFFFFF;
|
||||
y++;
|
||||
|
||||
s32 x = XVal();
|
||||
Interp.SetX(y);
|
||||
return x;
|
||||
return XVal();
|
||||
}
|
||||
|
||||
constexpr s32 XVal() const
|
||||
{
|
||||
s32 ret = 0;
|
||||
s32 ret;
|
||||
if (Negative) ret = x0 - (dx >> 18);
|
||||
else ret = x0 + (dx >> 18);
|
||||
|
||||
if (ret < xmin) ret = xmin;
|
||||
else if (ret > xmax) ret = xmax;
|
||||
return ret;
|
||||
return ret;// << 21 >> 21; checkme: is this commented bit actually correct?
|
||||
}
|
||||
|
||||
template<bool swapped>
|
||||
|
@ -418,7 +426,7 @@ private:
|
|||
s32 Increment;
|
||||
bool Negative;
|
||||
bool XMajor;
|
||||
Interpolator<1> Interp;
|
||||
Interpolator<1, false> Interp;
|
||||
|
||||
private:
|
||||
s32 x0, xmin, xmax;
|
||||
|
@ -448,11 +456,11 @@ private:
|
|||
void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const;
|
||||
u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const;
|
||||
void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
|
||||
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const;
|
||||
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const;
|
||||
void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
|
||||
void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y);
|
||||
void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y);
|
||||
template<bool oob> void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const;
|
||||
template<bool oob> void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const;
|
||||
template<bool oob> void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
|
||||
template<bool oob> void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y);
|
||||
template<bool oob> void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y);
|
||||
void RenderScanline(const GPU& gpu, s32 y, int npolys);
|
||||
u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const;
|
||||
void ScanlineFinalPass(const GPU3D& gpu3d, s32 y);
|
||||
|
|
Loading…
Add table
Reference in a new issue