1
0
Fork 0
mirror of https://github.com/melonDS-emu/melonDS.git synced 2025-03-06 21:00:31 +01:00
This commit is contained in:
Jakly 2025-02-25 13:37:11 +07:00 committed by GitHub
commit 7d623c7ca0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 260 additions and 75 deletions

View file

@ -451,6 +451,8 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Var32(&poly->NumVertices); file->Var32(&poly->NumVertices);
file->VarArray(poly->SlopePosition, sizeof(s32)*10*2);
file->VarArray(poly->FinalZ, sizeof(s32)*10); file->VarArray(poly->FinalZ, sizeof(s32)*10);
file->VarArray(poly->FinalW, sizeof(s32)*10); file->VarArray(poly->FinalW, sizeof(s32)*10);
file->Bool32(&poly->WBuffer); file->Bool32(&poly->WBuffer);
@ -489,7 +491,7 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
poly->Degenerate = true; poly->Degenerate = true;
} }
if (poly->YBottom > 192) poly->Degenerate = true; if (poly->YBottom > 192 && !poly->Translucent) poly->Degenerate = true;
} }
} }
@ -1100,8 +1102,10 @@ void GPU3D::SubmitPolygon() noexcept
} }
// compute screen coordinates // compute screen coordinates
// hardware does this pass for shared vertices in polygon strips, even though it was already done for them last polygon
for (int i = clipstart; i < nverts; i++) // however it doesn't recalculate all of the previous polygon's internal info (used for determining how to rasterize it)
// despite potentially changing their coordinates if a viewport change occured mid-strip...
for (int i = (UpdateLastPoly ? 0 : clipstart); i < nverts; i++)
{ {
Vertex* vtx = &clippedvertices[i]; Vertex* vtx = &clippedvertices[i];
@ -1214,6 +1218,7 @@ void GPU3D::SubmitPolygon() noexcept
poly->Degenerate = false; poly->Degenerate = false;
poly->Type = 0; poly->Type = 0;
poly->OOBRendering = UpdateLastPoly;
poly->FacingView = facingview; poly->FacingView = facingview;
@ -1234,6 +1239,19 @@ void GPU3D::SubmitPolygon() noexcept
{ {
poly->Vertices[0] = reusedvertices[0]; poly->Vertices[0] = reusedvertices[0];
poly->Vertices[1] = reusedvertices[1]; poly->Vertices[1] = reusedvertices[1];
// null vertices poly invalidation:
// 1. Start a polygon strip
// 2. Submit at least one polygon
// 3. Swap buffers
// 4. Don't send a begin command
// 4. submit a new polygon (1 vertex for tri, 2 for quad)
// 5. if the new polygon reuses vertices, it will be "degenerate" due to them being null pointers (theory)
if (NullVertices)
{
poly->Degenerate = true;
NullVertices -= (PolygonMode - 1); // subt. 1 if tri strip, subt. 2 if quad strip.
}
} }
else else
{ {
@ -1247,6 +1265,31 @@ void GPU3D::SubmitPolygon() noexcept
NumVertices += 2; NumVertices += 2;
} }
// if a viewport command was submitted mid-polygon strip the "true" coords and sort order of a vertex in the last polygon can be changed retroactively
if (UpdateLastPoly)
{
// update final coords and sortkey to match new vertex coordinates
// yes, *only* those values... this causes the polygon to be rasterized in an extremely glitchy manner
poly->Vertices[0]->FinalPosition[0] = clippedvertices[0].FinalPosition[0];
poly->Vertices[0]->FinalPosition[1] = clippedvertices[0].FinalPosition[1];
poly->Vertices[1]->FinalPosition[0] = clippedvertices[1].FinalPosition[0];
poly->Vertices[1]->FinalPosition[1] = clippedvertices[1].FinalPosition[1];
s32 ytop = 192, ybot = 0;
Vertex** lastpolyvtx = LastStripPolygon->Vertices;
for (int i = 0; i < LastStripPolygon->NumVertices; i++)
{
if (lastpolyvtx[i]->FinalPosition[1] < ytop)
ytop = lastpolyvtx[i]->FinalPosition[1];
if (lastpolyvtx[i]->FinalPosition[1] > ybot)
ybot = lastpolyvtx[i]->FinalPosition[1];
}
LastStripPolygon->SortKey = (ybot << 8) | ytop;
if (LastStripPolygon->Translucent) LastStripPolygon->SortKey |= 0x10000;
// clear update flag
UpdateLastPoly = false;
}
poly->NumVertices += 2; poly->NumVertices += 2;
} }
@ -1268,6 +1311,7 @@ void GPU3D::SubmitPolygon() noexcept
} }
// determine bounds of the polygon // determine bounds of the polygon
// including where slopes begin and end
// also determine the W shift and normalize W // also determine the W shift and normalize W
// normalization works both ways // normalization works both ways
// (ie two W's that span 12 bits or less will be brought to 16 bits) // (ie two W's that span 12 bits or less will be brought to 16 bits)
@ -1294,6 +1338,10 @@ void GPU3D::SubmitPolygon() noexcept
vbot = i; vbot = i;
} }
// these values are used to determine where to begin/end slopes
poly->SlopePosition[i][0] = vtx->FinalPosition[0];
poly->SlopePosition[i][1] = vtx->FinalPosition[1];
u32 w = (u32)vtx->Position[3]; u32 w = (u32)vtx->Position[3];
if (w == 0) poly->Degenerate = true; if (w == 0) poly->Degenerate = true;
@ -1305,7 +1353,7 @@ void GPU3D::SubmitPolygon() noexcept
poly->YTop = ytop; poly->YBottom = ybot; poly->YTop = ytop; poly->YBottom = ybot;
poly->XTop = xtop; poly->XBottom = xbot; poly->XTop = xtop; poly->XBottom = xbot;
if (ybot > 192) poly->Degenerate = true; if (ybot > 192 && !poly->Translucent) poly->Degenerate = true;
poly->SortKey = (ybot << 8) | ytop; poly->SortKey = (ybot << 8) | ytop;
if (poly->Translucent) poly->SortKey |= 0x10000; if (poly->Translucent) poly->SortKey |= 0x10000;
@ -2066,7 +2114,9 @@ void GPU3D::ExecuteCommand() noexcept
VertexNumInPoly = 0; VertexNumInPoly = 0;
NumConsecutivePolygons = 0; NumConsecutivePolygons = 0;
LastStripPolygon = NULL; LastStripPolygon = NULL;
UpdateLastPoly = false;
CurPolygonAttr = PolygonAttr; CurPolygonAttr = PolygonAttr;
NullVertices = 0;
break; break;
case 0x41: // end polygons case 0x41: // end polygons
@ -2089,6 +2139,8 @@ void GPU3D::ExecuteCommand() noexcept
PolygonPipeline = 0; PolygonPipeline = 0;
VertexSlotCounter = 0; VertexSlotCounter = 0;
VertexSlotsFree = 1; VertexSlotsFree = 1;
// previous polygon's vertices will be counted as "null" if a buffer swap occurs
if (PolygonMode >= 2) NullVertices = 2;
break; break;
case 0x60: // viewport x1,y1,x2,y2 case 0x60: // viewport x1,y1,x2,y2
@ -2100,6 +2152,9 @@ void GPU3D::ExecuteCommand() noexcept
Viewport[3] = (191 - (entry.Param >> 24)) & 0xFF; // y1 Viewport[3] = (191 - (entry.Param >> 24)) & 0xFF; // y1
Viewport[4] = (Viewport[2] - Viewport[0] + 1) & 0x1FF; // width Viewport[4] = (Viewport[2] - Viewport[0] + 1) & 0x1FF; // width
Viewport[5] = (Viewport[1] - Viewport[3] + 1) & 0xFF; // height Viewport[5] = (Viewport[1] - Viewport[3] + 1) & 0xFF; // height
// set a flag that tells the next polygon to emulate a bug with polygon strips
if (LastStripPolygon) UpdateLastPoly = true;
break; break;
case 0x72: // vec test case 0x72: // vec test

View file

@ -39,8 +39,13 @@ struct Vertex
// final vertex attributes. // final vertex attributes.
// allows them to be reused in polygon strips. // allows them to be reused in polygon strips.
// with sw renderer FinalPosition is primarily used for calculating the slope of a polygon (not where it begins/ends)
// (it does get used to determine where slopes should start and end with the gl renderers)
// the initial set of coordinates gets updated by the next polygon in a strip
// which can cause rendering issues if they wind up different than their initial value (due to a viewport change)
s32 FinalPosition[2]; s32 FinalPosition[2];
s32 FinalColor[3]; s32 FinalColor[3];
// hi-res position (4-bit fractional part) // hi-res position (4-bit fractional part)
@ -55,6 +60,13 @@ struct Polygon
Vertex* Vertices[10]; Vertex* Vertices[10];
u32 NumVertices; u32 NumVertices;
// essentially a per-polygon copy of its vertices' coordinates
// (not 100% sure why they do it like this? but a glitch requires this for proper behavior, so we gotta do it too)
// unlike each vertices' final position variable, it is *not* updated by the next polygon in a polygon strip
// it is used by the software renderer to determine where to begin/end each slope
// TODO: track hires versions of this for the hardware renderers to use?
s32 SlopePosition[10][2];
s32 FinalZ[10]; s32 FinalZ[10];
s32 FinalW[10]; s32 FinalW[10];
bool WBuffer; bool WBuffer;
@ -72,6 +84,7 @@ struct Polygon
bool IsShadow; bool IsShadow;
int Type; // 0=regular 1=line int Type; // 0=regular 1=line
bool OOBRendering;
u32 VTop, VBottom; // vertex indices u32 VTop, VBottom; // vertex indices
s32 YTop, YBottom; // Y coords s32 YTop, YBottom; // Y coords
@ -272,6 +285,7 @@ public:
u32 RenderClearAttr2 = 0; u32 RenderClearAttr2 = 0;
bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize
bool UpdateLastPoly = false; // used to track whether the next polygon should update the previous one's vtx coordinates (as a small optimization)
bool AbortFrame = false; bool AbortFrame = false;
@ -310,6 +324,7 @@ public:
u32 VertexNumInPoly = 0; u32 VertexNumInPoly = 0;
u32 NumConsecutivePolygons = 0; u32 NumConsecutivePolygons = 0;
Polygon* LastStripPolygon = nullptr; Polygon* LastStripPolygon = nullptr;
u32 NullVertices = 0;
u32 NumOpaquePolygons = 0; u32 NumOpaquePolygons = 0;
Vertex VertexRAM[6144 * 2] {}; Vertex VertexRAM[6144 * 2] {};

View file

@ -598,11 +598,12 @@ void SoftRenderer::PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 c
AttrBuffer[pixeladdr] = attr; AttrBuffer[pixeladdr] = attr;
} }
template <bool oob>
void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) const void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
{ {
Polygon* polygon = rp->PolyData; Polygon* polygon = rp->PolyData;
while (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom) while ((y >= polygon->SlopePosition[rp->NextVL][1]) && rp->CurVL != polygon->VBottom)
{ {
rp->CurVL = rp->NextVL; rp->CurVL = rp->NextVL;
@ -620,16 +621,28 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y
} }
} }
rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], // note: if the end or current position in a slope is above the start point
polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1], // it seems to seek forwards(?) until the value overflows at 256
polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); // this can be emulated by just adding 256 to them
s32 y1 = polygon->Vertices[rp->NextVL]->FinalPosition[1];
if constexpr (oob)
{
if (y < polygon->Vertices[rp->CurVL]->FinalPosition[1])
y += 256;
if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1])
y1 += 256;
}
rp->XL = rp->SlopeL.Setup<oob>(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
} }
template <bool oob>
void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const
{ {
Polygon* polygon = rp->PolyData; Polygon* polygon = rp->PolyData;
while (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) while ((y >= polygon->SlopePosition[rp->NextVR][1]) && rp->CurVR != polygon->VBottom)
{ {
rp->CurVR = rp->NextVR; rp->CurVR = rp->NextVR;
@ -647,11 +660,23 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32
} }
} }
rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], // note: if the end or current position in a slope is above the start point
polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1], // it seems to seek forwards(?) until the value overflows at 256
polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); // this can be emulated by just adding 256 to them
s32 y1 = polygon->Vertices[rp->NextVR]->FinalPosition[1];
if constexpr (oob)
{
if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1])
y += 256;
if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1])
y1 += 256;
}
rp->XR = rp->SlopeR.Setup<oob>(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
} }
template <bool oob>
void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const
{ {
u32 nverts = polygon->NumVertices; u32 nverts = polygon->NumVertices;
@ -678,8 +703,10 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
rp->NextVR = rp->CurVR + 1; rp->NextVR = rp->CurVR + 1;
if (rp->NextVR >= nverts) rp->NextVR = 0; if (rp->NextVR >= nverts) rp->NextVR = 0;
} }
if (ybot == ytop) // 0px tall line polygons are checked for at rasterization, this matters for when viewports are updated mid-polygon-strip
// therefore we need to check if the last two vertices are actually still at the same y axis as the others
if ((ybot == ytop) && (ybot == polygon->Vertices[nverts-1]->FinalPosition[1]) && (ybot == polygon->Vertices[nverts-2]->FinalPosition[1]) && (ybot == polygon->Vertices[nverts-3]->FinalPosition[1]))
{ {
vtop = 0; vbot = 0; vtop = 0; vbot = 0;
int i; int i;
@ -700,11 +727,38 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly
} }
else else
{ {
SetupPolygonLeftEdge(rp, ytop); // note: if the end or current position in a slope is above the start point
SetupPolygonRightEdge(rp, ytop); // it seems to seek forwards(?) until the value overflows at 256
// this can be emulated by just adding 256 to them
s32 y = ytop;
s32 y1 = polygon->Vertices[rp->NextVL]->FinalPosition[1];
if (oob)
{
if (y < polygon->Vertices[rp->CurVL]->FinalPosition[1])
y += 256;
if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1])
y1 += 256;
}
rp->XL = rp->SlopeL.Setup<oob>(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0],
polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y);
y1 = polygon->Vertices[rp->NextVR]->FinalPosition[1];
if constexpr (oob)
{
y = ytop;
if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1])
y += 256;
if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1])
y1 += 256;
}
rp->XR = rp->SlopeR.Setup<oob>(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0],
polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y);
} }
} }
template <bool oob>
void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y) void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y)
{ {
Polygon* polygon = rp->PolyData; Polygon* polygon = rp->PolyData;
@ -730,14 +784,14 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
if (polygon->YTop != polygon->YBottom) if (polygon->YTop != polygon->YBottom)
{ {
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom) if ((y >= polygon->SlopePosition[rp->NextVL][1] || (oob && y == polygon->Vertices[rp->CurVL]->FinalPosition[1])) && rp->CurVL != polygon->VBottom)
{ {
SetupPolygonLeftEdge(rp, y); SetupPolygonLeftEdge<oob>(rp, y);
} }
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) if ((y >= polygon->SlopePosition[rp->NextVR][1] || (oob && y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) && rp->CurVR != polygon->VBottom)
{ {
SetupPolygonRightEdge(rp, y); SetupPolygonRightEdge<oob>(rp, y);
} }
} }
@ -746,8 +800,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
bool l_filledge, r_filledge; bool l_filledge, r_filledge;
s32 l_edgelen, r_edgelen; s32 l_edgelen, r_edgelen;
s32 l_edgecov, r_edgecov; s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start; Interpolator<1, false>* interp_start;
Interpolator<1>* interp_end; Interpolator<1, false>* interp_end;
xstart = rp->XL; xstart = rp->XL;
xend = rp->XR; xend = rp->XR;
@ -839,11 +893,30 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
if (y == polygon->YTop) yedge = 0x4; if (y == polygon->YTop) yedge = 0x4;
else if (y == polygon->YBottom-1) yedge = 0x8; else if (y == polygon->YBottom-1) yedge = 0x8;
int edge; int edge;
xend += 1;
Interpolator<0, oob> interpX(xstart, xend, wl, wr);
if constexpr (oob)
{
// CHECKME: should the unclamped values be used for timings?
// negative values are clamped to 0
if (xstart < 0)
{
l_edgelen += xstart;
if (l_edgelen < 1) l_edgelen = 1;
xstart = 0;
if (xend < 1) xend = 1;
}
// too big values are clamped to 511
if (xend > 511)
{
r_edgelen += 256 - xend;
xend = 511;
}
}
s32 x = xstart; s32 x = xstart;
Interpolator<0> interpX(xstart, xend+1, wl, wr);
if (x < 0) x = 0;
s32 xlimit; s32 xlimit;
// for shadow masks: set stencil bits where the depth test fails. // for shadow masks: set stencil bits where the depth test fails.
@ -852,7 +925,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 1: left edge // part 1: left edge
edge = yedge | 0x1; edge = yedge | 0x1;
xlimit = xstart+l_edgelen; xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (!l_filledge) x = xlimit; if (!l_filledge) x = xlimit;
@ -879,8 +952,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 2: polygon inside // part 2: polygon inside
edge = yedge; edge = yedge;
xlimit = xend-r_edgelen+1; xlimit = xend-r_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (wireframe && !edge) x = std::max(x, xlimit); if (wireframe && !edge) x = std::max(x, xlimit);
else for (; x < xlimit; x++) else for (; x < xlimit; x++)
@ -905,7 +978,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
// part 3: right edge // part 3: right edge
edge = yedge | 0x2; edge = yedge | 0x2;
xlimit = xend+1; xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (r_filledge) if (r_filledge)
@ -929,10 +1002,11 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
} }
} }
rp->XL = rp->SlopeL.Step(); rp->XL = rp->SlopeL.Step<oob>();
rp->XR = rp->SlopeR.Step(); rp->XR = rp->SlopeR.Step<oob>();
} }
template <bool oob>
void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y) void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y)
{ {
Polygon* polygon = rp->PolyData; Polygon* polygon = rp->PolyData;
@ -955,14 +1029,14 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
if (polygon->YTop != polygon->YBottom) if (polygon->YTop != polygon->YBottom)
{ {
if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom) if ((y >= polygon->SlopePosition[rp->NextVL][1] || (oob && y == polygon->Vertices[rp->CurVL]->FinalPosition[1])) && rp->CurVL != polygon->VBottom)
{ {
SetupPolygonLeftEdge(rp, y); SetupPolygonLeftEdge<oob>(rp, y);
} }
if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) if ((y >= polygon->SlopePosition[rp->NextVR][1] || (oob && y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) && rp->CurVR != polygon->VBottom)
{ {
SetupPolygonRightEdge(rp, y); SetupPolygonRightEdge<oob>(rp, y);
} }
} }
@ -971,8 +1045,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
bool l_filledge, r_filledge; bool l_filledge, r_filledge;
s32 l_edgelen, r_edgelen; s32 l_edgelen, r_edgelen;
s32 l_edgecov, r_edgecov; s32 l_edgecov, r_edgecov;
Interpolator<1>* interp_start; Interpolator<1, false>* interp_start;
Interpolator<1>* interp_end; Interpolator<1, false>* interp_end;
xstart = rp->XL; xstart = rp->XL;
xend = rp->XR; xend = rp->XR;
@ -1090,10 +1164,29 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
else if (y == polygon->YBottom-1) yedge = 0x8; else if (y == polygon->YBottom-1) yedge = 0x8;
int edge; int edge;
s32 x = xstart; xend+=1;
Interpolator<0> interpX(xstart, xend+1, wl, wr); Interpolator<0, oob> interpX(xstart, xend, wl, wr);
if constexpr (oob)
{
// CHECKME: should the unclamped values be used for timings?
// negative values are clamped to 0
if (xstart < 0)
{
l_edgelen += xstart;
if (l_edgelen < 1) l_edgelen = 1;
xstart = 0;
if (xend < 1) xend = 1;
}
// too big values are clamped to 511
if (xend > 511)
{
r_edgelen += 256 - xend;
xend = 511;
}
}
if (x < 0) x = 0; s32 x = xstart;
s32 xlimit; s32 xlimit;
s32 xcov = 0; s32 xcov = 0;
@ -1101,7 +1194,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 1: left edge // part 1: left edge
edge = yedge | 0x1; edge = yedge | 0x1;
xlimit = xstart+l_edgelen; xlimit = xstart+l_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (l_edgecov & (1<<31)) if (l_edgecov & (1<<31))
{ {
@ -1201,8 +1294,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 2: polygon inside // part 2: polygon inside
edge = yedge; edge = yedge;
xlimit = xend-r_edgelen+1; xlimit = xend-r_edgelen;
if (xlimit > xend+1) xlimit = xend+1; if (xlimit > xend) xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (wireframe && !edge) x = std::max(x, xlimit); if (wireframe && !edge) x = std::max(x, xlimit);
@ -1290,7 +1383,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
// part 3: right edge // part 3: right edge
edge = yedge | 0x2; edge = yedge | 0x2;
xlimit = xend+1; xlimit = xend;
if (xlimit > 256) xlimit = 256; if (xlimit > 256) xlimit = 256;
if (r_edgecov & (1<<31)) if (r_edgecov & (1<<31))
{ {
@ -1387,8 +1480,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3
} }
} }
rp->XL = rp->SlopeL.Step(); rp->XL = rp->SlopeL.Step<oob>();
rp->XR = rp->SlopeR.Step(); rp->XR = rp->SlopeR.Step<oob>();
} }
void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys)
@ -1400,10 +1493,20 @@ void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys)
if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop))) if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
{ {
if (polygon->IsShadowMask) if (polygon->OOBRendering) [[unlikely]] // enable some extra handling for polygons that might render out of ordinary bounds
RenderShadowMaskScanline(gpu.GPU3D, rp, y); {
if (polygon->IsShadowMask)
RenderShadowMaskScanline<true>(gpu.GPU3D, rp, y);
else
RenderPolygonScanline<true>(gpu, rp, y);
}
else else
RenderPolygonScanline(gpu, rp, y); {
if (polygon->IsShadowMask)
RenderShadowMaskScanline<false>(gpu.GPU3D, rp, y);
else
RenderPolygonScanline<false>(gpu, rp, y);
}
} }
} }
} }
@ -1712,7 +1815,11 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg
for (int i = 0; i < npolys; i++) for (int i = 0; i < npolys; i++)
{ {
if (polygons[i]->Degenerate) continue; if (polygons[i]->Degenerate) continue;
SetupPolygon(&PolygonList[j++], polygons[i]);
if (polygons[i]->OOBRendering) [[unlikely]] // enable some extra handling for polygons that might render out of ordinary bounds
SetupPolygon<true>(&PolygonList[j++], polygons[i]);
else
SetupPolygon<false>(&PolygonList[j++], polygons[i]);
} }
RenderScanline(gpu, 0, j); RenderScanline(gpu, 0, j);

View file

@ -64,7 +64,7 @@ private:
// interpolation, avoiding precision loss from the aforementioned approximation. // interpolation, avoiding precision loss from the aforementioned approximation.
// Which is desirable when using the GPU to draw 2D graphics. // Which is desirable when using the GPU to draw 2D graphics.
template<int dir> template<int dir, bool oob>
class Interpolator class Interpolator
{ {
public: public:
@ -78,7 +78,11 @@ private:
{ {
this->x0 = x0; this->x0 = x0;
this->x1 = x1; this->x1 = x1;
this->xdiff = x1 - x0;
if (oob)
this->xdiff = std::min(x1, 511) - std::max(x0, 0);
else
this->xdiff = x1 - x0;
// calculate reciprocal for Z interpolation // calculate reciprocal for Z interpolation
// TODO eventually: use a faster reciprocal function? // TODO eventually: use a faster reciprocal function?
@ -129,6 +133,7 @@ private:
constexpr void SetX(s32 x) constexpr void SetX(s32 x)
{ {
x -= x0; x -= x0;
//if (x > xdiff) x = xdiff; // may or may not be correct
this->x = x; this->x = x;
if (xdiff != 0 && !linear) if (xdiff != 0 && !linear)
{ {
@ -141,10 +146,12 @@ private:
else yfactor = (s32)(num / den); else yfactor = (s32)(num / den);
} }
} }
constexpr s32 Interpolate(s32 y0, s32 y1) const constexpr s32 Interpolate(s32 y0, s32 y1) const
{ {
if (xdiff == 0 || y0 == y1) return y0; if (xdiff == 0 || y0 == y1 || x == 0) return y0;
if (oob && (x0 <= 0 && x1 > 511)) return y1;
if (!linear) if (!linear)
{ {
@ -163,10 +170,12 @@ private:
return y1 + (s64)(y0-y1) * (xdiff - x) / xdiff; return y1 + (s64)(y0-y1) * (xdiff - x) / xdiff;
} }
} }
constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const
{ {
if (xdiff == 0 || z0 == z1) return z0; if (xdiff == 0 || z0 == z1 || x == 0) return z0;
if (oob && (x0 <= 0 && x1 > 511)) return z1;
if (wbuffer) if (wbuffer)
{ {
@ -252,6 +261,7 @@ private:
return x0; return x0;
} }
template<bool oob>
constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
{ {
this->x0 = x0; this->x0 = x0;
@ -284,7 +294,7 @@ private:
// instead, 1/y is calculated and then multiplied by x // instead, 1/y is calculated and then multiplied by x
// TODO: this is still not perfect (see for example x=169 y=33) // TODO: this is still not perfect (see for example x=169 y=33)
if (ylen == 0) if (ylen == 0)
Increment = 0; Increment = xlen << 18;
else if (ylen == xlen && xlen != 1) else if (ylen == xlen && xlen != 1)
Increment = 0x40000; Increment = 0x40000;
else else
@ -314,8 +324,7 @@ private:
} }
dx += (y - y0) * Increment; dx += (y - y0) * Increment;
if (oob) dx &= 0xFFFFFFF;
s32 x = XVal();
int interpoffset = (Increment >= 0x40000) && (side ^ Negative); int interpoffset = (Increment >= 0x40000) && (side ^ Negative);
Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1);
@ -324,28 +333,27 @@ private:
// used for calculating AA coverage // used for calculating AA coverage
if (XMajor) xcov_incr = (ylen << 10) / xlen; if (XMajor) xcov_incr = (ylen << 10) / xlen;
return x; return XVal();
} }
template<bool oob>
constexpr s32 Step() constexpr s32 Step()
{ {
dx += Increment; dx += Increment; // seems to be a 28 bit integer
if (oob) dx &= 0xFFFFFFF;
y++; y++;
s32 x = XVal();
Interp.SetX(y); Interp.SetX(y);
return x; return XVal();
} }
constexpr s32 XVal() const constexpr s32 XVal() const
{ {
s32 ret = 0; s32 ret;
if (Negative) ret = x0 - (dx >> 18); if (Negative) ret = x0 - (dx >> 18);
else ret = x0 + (dx >> 18); else ret = x0 + (dx >> 18);
if (ret < xmin) ret = xmin; return ret;// << 21 >> 21; checkme: is this commented bit actually correct?
else if (ret > xmax) ret = xmax;
return ret;
} }
template<bool swapped> template<bool swapped>
@ -418,7 +426,7 @@ private:
s32 Increment; s32 Increment;
bool Negative; bool Negative;
bool XMajor; bool XMajor;
Interpolator<1> Interp; Interpolator<1, false> Interp;
private: private:
s32 x0, xmin, xmax; s32 x0, xmin, xmax;
@ -448,11 +456,11 @@ private:
void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const; void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const;
u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const; u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const;
void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow);
void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const; template<bool oob> void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const;
void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const; template<bool oob> void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const;
void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; template<bool oob> void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const;
void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y); template<bool oob> void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y);
void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y); template<bool oob> void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y);
void RenderScanline(const GPU& gpu, s32 y, int npolys); void RenderScanline(const GPU& gpu, s32 y, int npolys);
u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const; u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const;
void ScanlineFinalPass(const GPU3D& gpu3d, s32 y); void ScanlineFinalPass(const GPU3D& gpu3d, s32 y);