1
0
Fork 0
mirror of https://github.com/melonDS-emu/melonDS.git synced 2025-03-06 21:00:31 +01:00
This commit is contained in:
Jakly 2025-02-25 13:37:11 +07:00 committed by GitHub
commit 7d623c7ca0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 260 additions and 75 deletions

View file

@ -451,6 +451,8 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Var32(&poly->NumVertices);
file->VarArray(poly->SlopePosition, sizeof(s32)*10*2);
file->VarArray(poly->FinalZ, sizeof(s32)*10);
file->VarArray(poly->FinalW, sizeof(s32)*10);
file->Bool32(&poly->WBuffer);
@ -489,7 +491,7 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
poly->Degenerate = true;
}
if (poly->YBottom > 192) poly->Degenerate = true;
if (poly->YBottom > 192 && !poly->Translucent) poly->Degenerate = true;
}
}
@ -1100,8 +1102,10 @@ void GPU3D::SubmitPolygon() noexcept
}
// compute screen coordinates
for (int i = clipstart; i < nverts; i++)
// hardware does this pass for shared vertices in polygon strips, even though it was already done for them last polygon
// however it doesn't recalculate all of the previous polygon's internal info (used for determining how to rasterize it)
// despite potentially changing their coordinates if a viewport change occured mid-strip...
for (int i = (UpdateLastPoly ? 0 : clipstart); i < nverts; i++)
{
Vertex* vtx = &clippedvertices[i];
@ -1214,6 +1218,7 @@ void GPU3D::SubmitPolygon() noexcept
poly->Degenerate = false;
poly->Type = 0;
poly->OOBRendering = UpdateLastPoly;
poly->FacingView = facingview;
@ -1234,6 +1239,19 @@ void GPU3D::SubmitPolygon() noexcept
{
poly->Vertices[0] = reusedvertices[0];
poly->Vertices[1] = reusedvertices[1];
// null vertices poly invalidation:
// 1. Start a polygon strip
// 2. Submit at least one polygon
// 3. Swap buffers
// 4. Don't send a begin command
// 4. submit a new polygon (1 vertex for tri, 2 for quad)
// 5. if the new polygon reuses vertices, it will be "degenerate" due to them being null pointers (theory)
if (NullVertices)
{
poly->Degenerate = true;
NullVertices -= (PolygonMode - 1); // subt. 1 if tri strip, subt. 2 if quad strip.
}
}
else
{
@ -1247,6 +1265,31 @@ void GPU3D::SubmitPolygon() noexcept
NumVertices += 2;
}
// if a viewport command was submitted mid-polygon strip the "true" coords and sort order of a vertex in the last polygon can be changed retroactively
if (UpdateLastPoly)
{
// update final coords and sortkey to match new vertex coordinates
// yes, *only* those values... this causes the polygon to be rasterized in an extremely glitchy manner
poly->Vertices[0]->FinalPosition[0] = clippedvertices[0].FinalPosition[0];
poly->Vertices[0]->FinalPosition[1] = clippedvertices[0].FinalPosition[1];
poly->Vertices[1]->FinalPosition[0] = clippedvertices[1].FinalPosition[0];
poly->Vertices[1]->FinalPosition[1] = clippedvertices[1].FinalPosition[1];
s32 ytop = 192, ybot = 0;
Vertex** lastpolyvtx = LastStripPolygon->Vertices;
for (int i = 0; i < LastStripPolygon->NumVertices; i++)
{
if (lastpolyvtx[i]->FinalPosition[1] < ytop)
ytop = lastpolyvtx[i]->FinalPosition[1];
if (lastpolyvtx[i]->FinalPosition[1] > ybot)
ybot = lastpolyvtx[i]->FinalPosition[1];
}
LastStripPolygon->SortKey = (ybot << 8) | ytop;
if (LastStripPolygon->Translucent) LastStripPolygon->SortKey |= 0x10000;
// clear update flag
UpdateLastPoly = false;
}
poly->NumVertices += 2;
}
@ -1268,6 +1311,7 @@ void GPU3D::SubmitPolygon() noexcept
}
// determine bounds of the polygon
// including where slopes begin and end
// also determine the W shift and normalize W
// normalization works both ways
// (ie two W's that span 12 bits or less will be brought to 16 bits)
@ -1294,6 +1338,10 @@ void GPU3D::SubmitPolygon() noexcept
vbot = i;
}
// these values are used to determine where to begin/end slopes
poly->SlopePosition[i][0] = vtx->FinalPosition[0];
poly->SlopePosition[i][1] = vtx->FinalPosition[1];
u32 w = (u32)vtx->Position[3];
if (w == 0) poly->Degenerate = true;
@ -1305,7 +1353,7 @@ void GPU3D::SubmitPolygon() noexcept
poly->YTop = ytop; poly->YBottom = ybot;
poly->XTop = xtop; poly->XBottom = xbot;
if (ybot > 192) poly->Degenerate = true;
if (ybot > 192 && !poly->Translucent) poly->Degenerate = true;
poly->SortKey = (ybot << 8) | ytop;
if (poly->Translucent) poly->SortKey |= 0x10000;
@ -2066,7 +2114,9 @@ void GPU3D::ExecuteCommand() noexcept
VertexNumInPoly = 0;
NumConsecutivePolygons = 0;
LastStripPolygon = NULL;
UpdateLastPoly = false;
CurPolygonAttr = PolygonAttr;
NullVertices = 0;
break;
case 0x41: // end polygons
@ -2089,6 +2139,8 @@ void GPU3D::ExecuteCommand() noexcept
PolygonPipeline = 0;
VertexSlotCounter = 0;
VertexSlotsFree = 1;
// previous polygon's vertices will be counted as "null" if a buffer swap occurs
if (PolygonMode >= 2) NullVertices = 2;
break;
case 0x60: // viewport x1,y1,x2,y2
@ -2100,6 +2152,9 @@ void GPU3D::ExecuteCommand() noexcept
Viewport[3] = (191 - (entry.Param >> 24)) & 0xFF; // y1
Viewport[4] = (Viewport[2] - Viewport[0] + 1) & 0x1FF; // width
Viewport[5] = (Viewport[1] - Viewport[3] + 1) & 0xFF; // height
// set a flag that tells the next polygon to emulate a bug with polygon strips
if (LastStripPolygon) UpdateLastPoly = true;
break;
case 0x72: // vec test

View file

@ -39,8 +39,13 @@ struct Vertex
// final vertex attributes.
// allows them to be reused in polygon strips.
// with sw renderer FinalPosition is primarily used for calculating the slope of a polygon (not where it begins/ends)
// (it does get used to determine where slopes should start and end with the gl renderers)
// the initial set of coordinates gets updated by the next polygon in a strip
// which can cause rendering issues if they wind up different than their initial value (due to a viewport change)
s32 FinalPosition[2];
s32 FinalColor[3];
// hi-res position (4-bit fractional part)
@ -55,6 +60,13 @@ struct Polygon
Vertex* Vertices[10];
u32 NumVertices;
// essentially a per-polygon copy of its vertices' coordinates
// (not 100% sure why they do it like this? but a glitch requires this for proper behavior, so we gotta do it too)
// unlike each vertices' final position variable, it is *not* updated by the next polygon in a polygon strip
// it is used by the software renderer to determine where to begin/end each slope
// TODO: track hires versions of this for the hardware renderers to use?
s32 SlopePosition[10][2];
s32 FinalZ[10];
s32 FinalW[10];
bool WBuffer;
@ -72,6 +84,7 @@ struct Polygon
bool IsShadow;
int Type; // 0=regular 1=line
bool OOBRendering;
u32 VTop, VBottom; // vertex indices
s32 YTop, YBottom; // Y coords
@ -272,6 +285,7 @@ public:
u32 RenderClearAttr2 = 0;
bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize
bool UpdateLastPoly = false; // used to track whether the next polygon should update the previous one's vtx coordinates (as a small optimization)
bool AbortFrame = false;
@ -310,6 +324,7 @@ public:
u32 VertexNumInPoly = 0;
u32 NumConsecutivePolygons = 0;
Polygon* LastStripPolygon = nullptr;
u32 NullVertices = 0;
u32 NumOpaquePolygons = 0;
Vertex VertexRAM[6144 * 2] {};

View file

@ -598,11 +598,12 @@ void SoftRenderer::PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 c
AttrBuffer[pixeladdr] = attr;
}
template <bool oob>
void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
{
Polygon* polygon = rp->PolyData;
while (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
while ((y >= polygon->SlopePosition[rp->NextVL][1]) && rp->CurVL != polygon->VBottom)
{
rp->CurVL = rp->NextVL;
@ -620,16 +621,28 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y
}
}
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1],
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
// note: if the end or current position in a slope is above the start point
// it seems to seek forwards(?) until the value overflows at 256
// this can be emulated by just adding 256 to them
s32 y1 = polygon->Vertices[rp->NextVL]->FinalPosition[1];
if constexpr (oob)
{
if (y < polygon->Vertices[rp->CurVL]->FinalPosition[1])
y += 256;
if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1])
y1 += 256;
}
rp->XL = rp->SlopeL.Setup<oob>(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
}
template <bool oob>
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
{
Polygon* polygon = rp->PolyData;
while (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
while ((y >= polygon->SlopePosition[rp->NextVR][1]) && rp->CurVR != polygon->VBottom)
{
rp->CurVR = rp->NextVR;
@ -647,11 +660,23 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32
}
}
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1],
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
// note: if the end or current position in a slope is above the start point
// it seems to seek forwards(?) until the value overflows at 256
// this can be emulated by just adding 256 to them
s32 y1 = polygon->Vertices[rp->NextVR]->FinalPosition[1];
if constexpr (oob)
{
if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1])
y += 256;
if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1])
y1 += 256;
}
rp->XR = rp->SlopeR.Setup<oob>(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
}
template <bool oob>
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const
{
u32 nverts = polygon->NumVertices;
@ -678,8 +703,10 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
rp->NextVR = rp->CurVR + 1;
if (rp->NextVR >= nverts) rp->NextVR = 0;
}
if (ybot == ytop)
// 0px tall line polygons are checked for at rasterization, this matters for when viewports are updated mid-polygon-strip
// therefore we need to check if the last two vertices are actually still at the same y axis as the others
if ((ybot == ytop) && (ybot == polygon->Vertices[nverts-1]->FinalPosition[1]) && (ybot == polygon->Vertices[nverts-2]->FinalPosition[1]) && (ybot == polygon->Vertices[nverts-3]->FinalPosition[1]))
{
vtop = 0; vbot = 0;
int i;
@ -700,11 +727,38 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
}
else
{
SetupPolygonLeftEdge(rp, ytop);
SetupPolygonRightEdge(rp, ytop);
// note: if the end or current position in a slope is above the start point
// it seems to seek forwards(?) until the value overflows at 256
// this can be emulated by just adding 256 to them
s32 y = ytop;
s32 y1 = polygon->Vertices[rp->NextVL]->FinalPosition[1];
if (oob)
{
if (y < polygon->Vertices[rp->CurVL]->FinalPosition[1])
y += 256;
if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1])
y1 += 256;
}
rp->XL = rp->SlopeL.Setup<oob>(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
y1 = polygon->Vertices[rp->NextVR]->FinalPosition[1];
if constexpr (oob)
{
y = ytop;
if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1])
y += 256;
if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1])
y1 += 256;
}
rp->XR = rp->SlopeR.Setup<oob>(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
}
}
template <bool oob>
void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y)
{
Polygon* polygon = rp->PolyData;
@ -730,14 +784,14 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
if (polygon->YTop != polygon->YBottom)
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
if ((y >= polygon->SlopePosition[rp->NextVL][1] || (oob && y == polygon->Vertices[rp->CurVL]->FinalPosition[1])) && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
SetupPolygonLeftEdge<oob>(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
if ((y >= polygon->SlopePosition[rp->NextVR][1] || (oob && y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
SetupPolygonRightEdge<oob>(rp, y);
}
}
@ -746,8 +800,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
bool l_filledge, r_filledge;
s32 l_edgelen, r_edgelen;
s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start;
Interpolator<1>* interp_end;
Interpolator<1, false>* interp_start;
Interpolator<1, false>* interp_end;
xstart = rp->XL;
xend = rp->XR;
@ -839,11 +893,30 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
if (y == polygon->YTop) yedge = 0x4;
else if (y == polygon->YBottom-1) yedge = 0x8;
int edge;
xend += 1;
Interpolator<0, oob> interpX(xstart, xend, wl, wr);
if constexpr (oob)
{
// CHECKME: should the unclamped values be used for timings?
// negative values are clamped to 0
if (xstart < 0)
{
l_edgelen += xstart;
if (l_edgelen < 1) l_edgelen = 1;
xstart = 0;
if (xend < 1) xend = 1;
}
// too big values are clamped to 511
if (xend > 511)
{
r_edgelen += 256 - xend;
xend = 511;
}
}
s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr);
if (x < 0) x = 0;
s32 xlimit;
// for shadow masks: set stencil bits where the depth test fails.
@ -852,7 +925,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 1: left edge
edge = yedge | 0x1;
xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (!l_filledge) x = xlimit;
@ -879,8 +952,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 2: polygon inside
edge = yedge;
xlimit = xend-r_edgelen+1;
if (xlimit > xend+1) xlimit = xend+1;
xlimit = xend-r_edgelen;
if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++)
@ -905,7 +978,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 3: right edge
edge = yedge | 0x2;
xlimit = xend+1;
xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (r_filledge)
@ -929,10 +1002,11 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
}
}
rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step();
rp->XL = rp->SlopeL.Step<oob>();
rp->XR = rp->SlopeR.Step<oob>();
}
template <bool oob>
void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y)
{
Polygon* polygon = rp->PolyData;
@ -955,14 +1029,14 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
if (polygon->YTop != polygon->YBottom)
{
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom)
if ((y >= polygon->SlopePosition[rp->NextVL][1] || (oob && y == polygon->Vertices[rp->CurVL]->FinalPosition[1])) && rp->CurVL != polygon->VBottom)
{
SetupPolygonLeftEdge(rp, y);
SetupPolygonLeftEdge<oob>(rp, y);
}
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom)
if ((y >= polygon->SlopePosition[rp->NextVR][1] || (oob && y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) && rp->CurVR != polygon->VBottom)
{
SetupPolygonRightEdge(rp, y);
SetupPolygonRightEdge<oob>(rp, y);
}
}
@ -971,8 +1045,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
bool l_filledge, r_filledge;
s32 l_edgelen, r_edgelen;
s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start;
Interpolator<1>* interp_end;
Interpolator<1, false>* interp_start;
Interpolator<1, false>* interp_end;
xstart = rp->XL;
xend = rp->XR;
@ -1090,10 +1164,29 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
else if (y == polygon->YBottom-1) yedge = 0x8;
int edge;
s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr);
xend+=1;
Interpolator<0, oob> interpX(xstart, xend, wl, wr);
if constexpr (oob)
{
// CHECKME: should the unclamped values be used for timings?
// negative values are clamped to 0
if (xstart < 0)
{
l_edgelen += xstart;
if (l_edgelen < 1) l_edgelen = 1;
xstart = 0;
if (xend < 1) xend = 1;
}
// too big values are clamped to 511
if (xend > 511)
{
r_edgelen += 256 - xend;
xend = 511;
}
}
if (x < 0) x = 0;
s32 x = xstart;
s32 xlimit;
s32 xcov = 0;
@ -1101,7 +1194,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 1: left edge
edge = yedge | 0x1;
xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1;
if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (l_edgecov & (1<<31))
{
@ -1201,8 +1294,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 2: polygon inside
edge = yedge;
xlimit = xend-r_edgelen+1;
if (xlimit > xend+1) xlimit = xend+1;
xlimit = xend-r_edgelen;
if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (wireframe && !edge) x = std::max(x, xlimit);
@ -1290,7 +1383,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 3: right edge
edge = yedge | 0x2;
xlimit = xend+1;
xlimit = xend;
if (xlimit > 256) xlimit = 256;
if (r_edgecov & (1<<31))
{
@ -1387,8 +1480,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
}
}
rp->XL = rp->SlopeL.Step();
rp->XR = rp->SlopeR.Step();
rp->XL = rp->SlopeL.Step<oob>();
rp->XR = rp->SlopeR.Step<oob>();
}
void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys)
@ -1400,10 +1493,20 @@ void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys)
if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
{
if (polygon->IsShadowMask)
RenderShadowMaskScanline(gpu.GPU3D, rp, y);
if (polygon->OOBRendering) [[unlikely]] // enable some extra handling for polygons that might render out of ordinary bounds
{
if (polygon->IsShadowMask)
RenderShadowMaskScanline<true>(gpu.GPU3D, rp, y);
else
RenderPolygonScanline<true>(gpu, rp, y);
}
else
RenderPolygonScanline(gpu, rp, y);
{
if (polygon->IsShadowMask)
RenderShadowMaskScanline<false>(gpu.GPU3D, rp, y);
else
RenderPolygonScanline<false>(gpu, rp, y);
}
}
}
}
@ -1712,7 +1815,11 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg
for (int i = 0; i < npolys; i++)
{
if (polygons[i]->Degenerate) continue;
SetupPolygon(&PolygonList[j++], polygons[i]);
if (polygons[i]->OOBRendering) [[unlikely]] // enable some extra handling for polygons that might render out of ordinary bounds
SetupPolygon<true>(&PolygonList[j++], polygons[i]);
else
SetupPolygon<false>(&PolygonList[j++], polygons[i]);
}
RenderScanline(gpu, 0, j);

View file

@ -64,7 +64,7 @@ private:
// interpolation, avoiding precision loss from the aforementioned approximation.
// Which is desirable when using the GPU to draw 2D graphics.
template<int dir>
template<int dir, bool oob>
class Interpolator
{
public:
@ -78,7 +78,11 @@ private:
{
this->x0 = x0;
this->x1 = x1;
this->xdiff = x1 - x0;
if (oob)
this->xdiff = std::min(x1, 511) - std::max(x0, 0);
else
this->xdiff = x1 - x0;
// calculate reciprocal for Z interpolation
// TODO eventually: use a faster reciprocal function?
@ -129,6 +133,7 @@ private:
constexpr void SetX(s32 x)
{
x -= x0;
//if (x > xdiff) x = xdiff; // may or may not be correct
this->x = x;
if (xdiff != 0 && !linear)
{
@ -141,10 +146,12 @@ private:
else yfactor = (s32)(num / den);
}
}
constexpr s32 Interpolate(s32 y0, s32 y1) const
{
if (xdiff == 0 || y0 == y1) return y0;
if (xdiff == 0 || y0 == y1 || x == 0) return y0;
if (oob && (x0 <= 0 && x1 > 511)) return y1;
if (!linear)
{
@ -163,10 +170,12 @@ private:
return y1 + (s64)(y0-y1) * (xdiff - x) / xdiff;
}
}
constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const
{
if (xdiff == 0 || z0 == z1) return z0;
if (xdiff == 0 || z0 == z1 || x == 0) return z0;
if (oob && (x0 <= 0 && x1 > 511)) return z1;
if (wbuffer)
{
@ -252,6 +261,7 @@ private:
return x0;
}
template<bool oob>
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
{
this->x0 = x0;
@ -284,7 +294,7 @@ private:
// instead, 1/y is calculated and then multiplied by x
// TODO: this is still not perfect (see for example x=169 y=33)
if (ylen == 0)
Increment = 0;
Increment = xlen << 18;
else if (ylen == xlen && xlen != 1)
Increment = 0x40000;
else
@ -314,8 +324,7 @@ private:
}
dx += (y - y0) * Increment;
s32 x = XVal();
if (oob) dx &= 0xFFFFFFF;
int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1);
@ -324,28 +333,27 @@ private:
// used for calculating AA coverage
if (XMajor) xcov_incr = (ylen << 10) / xlen;
return x;
return XVal();
}
template<bool oob>
constexpr s32 Step()
{
dx += Increment;
dx += Increment; // seems to be a 28 bit integer
if (oob) dx &= 0xFFFFFFF;
y++;
s32 x = XVal();
Interp.SetX(y);
return x;
return XVal();
}
constexpr s32 XVal() const
{
s32 ret = 0;
s32 ret;
if (Negative) ret = x0 - (dx >> 18);
else ret = x0 + (dx >> 18);
if (ret < xmin) ret = xmin;
else if (ret > xmax) ret = xmax;
return ret;
return ret;// << 21 >> 21; checkme: is this commented bit actually correct?
}
template<bool swapped>
@ -418,7 +426,7 @@ private:
s32 Increment;
bool Negative;
bool XMajor;
Interpolator<1> Interp;
Interpolator<1, false> Interp;
private:
s32 x0, xmin, xmax;
@ -448,11 +456,11 @@ private:
void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const;
u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const;
void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const;
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const;
void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y);
void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y);
template<bool oob> void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const;
template<bool oob> void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const;
template<bool oob> void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
template<bool oob> void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y);
template<bool oob> void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y);
void RenderScanline(const GPU& gpu, s32 y, int npolys);
u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const;
void ScanlineFinalPass(const GPU3D& gpu3d, s32 y);