diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 4a1426aa..d5492d46 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -451,6 +451,8 @@ void GPU3D::DoSavestate(Savestate* file) noexcept file->Var32(&poly->NumVertices); + file->VarArray(poly->SlopePosition, sizeof(s32)*10*2); + file->VarArray(poly->FinalZ, sizeof(s32)*10); file->VarArray(poly->FinalW, sizeof(s32)*10); file->Bool32(&poly->WBuffer); @@ -489,7 +491,7 @@ void GPU3D::DoSavestate(Savestate* file) noexcept poly->Degenerate = true; } - if (poly->YBottom > 192) poly->Degenerate = true; + if (poly->YBottom > 192 && !poly->Translucent) poly->Degenerate = true; } } @@ -1100,8 +1102,10 @@ void GPU3D::SubmitPolygon() noexcept } // compute screen coordinates - - for (int i = clipstart; i < nverts; i++) + // hardware does this pass for shared vertices in polygon strips, even though it was already done for them last polygon + // however it doesn't recalculate all of the previous polygon's internal info (used for determining how to rasterize it) + // despite potentially changing their coordinates if a viewport change occured mid-strip... + for (int i = (UpdateLastPoly ? 0 : clipstart); i < nverts; i++) { Vertex* vtx = &clippedvertices[i]; @@ -1214,6 +1218,7 @@ void GPU3D::SubmitPolygon() noexcept poly->Degenerate = false; poly->Type = 0; + poly->OOBRendering = UpdateLastPoly; poly->FacingView = facingview; @@ -1234,6 +1239,19 @@ void GPU3D::SubmitPolygon() noexcept { poly->Vertices[0] = reusedvertices[0]; poly->Vertices[1] = reusedvertices[1]; + + // null vertices poly invalidation: + // 1. Start a polygon strip + // 2. Submit at least one polygon + // 3. Swap buffers + // 4. Don't send a begin command + // 4. submit a new polygon (1 vertex for tri, 2 for quad) + // 5. if the new polygon reuses vertices, it will be "degenerate" due to them being null pointers (theory) + if (NullVertices) + { + poly->Degenerate = true; + NullVertices -= (PolygonMode - 1); // subt. 1 if tri strip, subt. 2 if quad strip. + } } else { @@ -1247,6 +1265,31 @@ void GPU3D::SubmitPolygon() noexcept NumVertices += 2; } + // if a viewport command was submitted mid-polygon strip the "true" coords and sort order of a vertex in the last polygon can be changed retroactively + if (UpdateLastPoly) + { + // update final coords and sortkey to match new vertex coordinates + // yes, *only* those values... this causes the polygon to be rasterized in an extremely glitchy manner + poly->Vertices[0]->FinalPosition[0] = clippedvertices[0].FinalPosition[0]; + poly->Vertices[0]->FinalPosition[1] = clippedvertices[0].FinalPosition[1]; + poly->Vertices[1]->FinalPosition[0] = clippedvertices[1].FinalPosition[0]; + poly->Vertices[1]->FinalPosition[1] = clippedvertices[1].FinalPosition[1]; + + s32 ytop = 192, ybot = 0; + Vertex** lastpolyvtx = LastStripPolygon->Vertices; + for (int i = 0; i < LastStripPolygon->NumVertices; i++) + { + if (lastpolyvtx[i]->FinalPosition[1] < ytop) + ytop = lastpolyvtx[i]->FinalPosition[1]; + if (lastpolyvtx[i]->FinalPosition[1] > ybot) + ybot = lastpolyvtx[i]->FinalPosition[1]; + } + LastStripPolygon->SortKey = (ybot << 8) | ytop; + if (LastStripPolygon->Translucent) LastStripPolygon->SortKey |= 0x10000; + + // clear update flag + UpdateLastPoly = false; + } poly->NumVertices += 2; } @@ -1268,6 +1311,7 @@ void GPU3D::SubmitPolygon() noexcept } // determine bounds of the polygon + // including where slopes begin and end // also determine the W shift and normalize W // normalization works both ways // (ie two W's that span 12 bits or less will be brought to 16 bits) @@ -1294,6 +1338,10 @@ void GPU3D::SubmitPolygon() noexcept vbot = i; } + // these values are used to determine where to begin/end slopes + poly->SlopePosition[i][0] = vtx->FinalPosition[0]; + poly->SlopePosition[i][1] = vtx->FinalPosition[1]; + u32 w = (u32)vtx->Position[3]; if (w == 0) poly->Degenerate = true; @@ -1305,7 +1353,7 @@ void GPU3D::SubmitPolygon() noexcept poly->YTop = ytop; poly->YBottom = ybot; poly->XTop = xtop; poly->XBottom = xbot; - if (ybot > 192) poly->Degenerate = true; + if (ybot > 192 && !poly->Translucent) poly->Degenerate = true; poly->SortKey = (ybot << 8) | ytop; if (poly->Translucent) poly->SortKey |= 0x10000; @@ -2066,7 +2114,9 @@ void GPU3D::ExecuteCommand() noexcept VertexNumInPoly = 0; NumConsecutivePolygons = 0; LastStripPolygon = NULL; + UpdateLastPoly = false; CurPolygonAttr = PolygonAttr; + NullVertices = 0; break; case 0x41: // end polygons @@ -2089,6 +2139,8 @@ void GPU3D::ExecuteCommand() noexcept PolygonPipeline = 0; VertexSlotCounter = 0; VertexSlotsFree = 1; + // previous polygon's vertices will be counted as "null" if a buffer swap occurs + if (PolygonMode >= 2) NullVertices = 2; break; case 0x60: // viewport x1,y1,x2,y2 @@ -2100,6 +2152,9 @@ void GPU3D::ExecuteCommand() noexcept Viewport[3] = (191 - (entry.Param >> 24)) & 0xFF; // y1 Viewport[4] = (Viewport[2] - Viewport[0] + 1) & 0x1FF; // width Viewport[5] = (Viewport[1] - Viewport[3] + 1) & 0xFF; // height + + // set a flag that tells the next polygon to emulate a bug with polygon strips + if (LastStripPolygon) UpdateLastPoly = true; break; case 0x72: // vec test diff --git a/src/GPU3D.h b/src/GPU3D.h index d10df55f..41a3133b 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -39,8 +39,13 @@ struct Vertex // final vertex attributes. // allows them to be reused in polygon strips. - + + // with sw renderer FinalPosition is primarily used for calculating the slope of a polygon (not where it begins/ends) + // (it does get used to determine where slopes should start and end with the gl renderers) + // the initial set of coordinates gets updated by the next polygon in a strip + // which can cause rendering issues if they wind up different than their initial value (due to a viewport change) s32 FinalPosition[2]; + s32 FinalColor[3]; // hi-res position (4-bit fractional part) @@ -55,6 +60,13 @@ struct Polygon Vertex* Vertices[10]; u32 NumVertices; + // essentially a per-polygon copy of its vertices' coordinates + // (not 100% sure why they do it like this? but a glitch requires this for proper behavior, so we gotta do it too) + // unlike each vertices' final position variable, it is *not* updated by the next polygon in a polygon strip + // it is used by the software renderer to determine where to begin/end each slope + // TODO: track hires versions of this for the hardware renderers to use? + s32 SlopePosition[10][2]; + s32 FinalZ[10]; s32 FinalW[10]; bool WBuffer; @@ -72,6 +84,7 @@ struct Polygon bool IsShadow; int Type; // 0=regular 1=line + bool OOBRendering; u32 VTop, VBottom; // vertex indices s32 YTop, YBottom; // Y coords @@ -272,6 +285,7 @@ public: u32 RenderClearAttr2 = 0; bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize + bool UpdateLastPoly = false; // used to track whether the next polygon should update the previous one's vtx coordinates (as a small optimization) bool AbortFrame = false; @@ -310,6 +324,7 @@ public: u32 VertexNumInPoly = 0; u32 NumConsecutivePolygons = 0; Polygon* LastStripPolygon = nullptr; + u32 NullVertices = 0; u32 NumOpaquePolygons = 0; Vertex VertexRAM[6144 * 2] {}; diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index a9d0bd64..e90195bd 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -598,11 +598,12 @@ void SoftRenderer::PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 c AttrBuffer[pixeladdr] = attr; } +template void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) const { Polygon* polygon = rp->PolyData; - while (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom) + while ((y >= polygon->SlopePosition[rp->NextVL][1]) && rp->CurVL != polygon->VBottom) { rp->CurVL = rp->NextVL; @@ -620,16 +621,28 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y } } - rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], - polygon->Vertices[rp->CurVL]->FinalPosition[1], polygon->Vertices[rp->NextVL]->FinalPosition[1], - polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); + // note: if the end or current position in a slope is above the start point + // it seems to seek forwards(?) until the value overflows at 256 + // this can be emulated by just adding 256 to them + s32 y1 = polygon->Vertices[rp->NextVL]->FinalPosition[1]; + if constexpr (oob) + { + if (y < polygon->Vertices[rp->CurVL]->FinalPosition[1]) + y += 256; + + if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1]) + y1 += 256; + } + rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], + polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); } +template void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const { Polygon* polygon = rp->PolyData; - while (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) + while ((y >= polygon->SlopePosition[rp->NextVR][1]) && rp->CurVR != polygon->VBottom) { rp->CurVR = rp->NextVR; @@ -647,11 +660,23 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 } } - rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], - polygon->Vertices[rp->CurVR]->FinalPosition[1], polygon->Vertices[rp->NextVR]->FinalPosition[1], - polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); + // note: if the end or current position in a slope is above the start point + // it seems to seek forwards(?) until the value overflows at 256 + // this can be emulated by just adding 256 to them + s32 y1 = polygon->Vertices[rp->NextVR]->FinalPosition[1]; + if constexpr (oob) + { + if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1]) + y += 256; + + if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1]) + y1 += 256; + } + rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], + polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); } +template void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const { u32 nverts = polygon->NumVertices; @@ -678,8 +703,10 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly rp->NextVR = rp->CurVR + 1; if (rp->NextVR >= nverts) rp->NextVR = 0; } - - if (ybot == ytop) + + // 0px tall line polygons are checked for at rasterization, this matters for when viewports are updated mid-polygon-strip + // therefore we need to check if the last two vertices are actually still at the same y axis as the others + if ((ybot == ytop) && (ybot == polygon->Vertices[nverts-1]->FinalPosition[1]) && (ybot == polygon->Vertices[nverts-2]->FinalPosition[1]) && (ybot == polygon->Vertices[nverts-3]->FinalPosition[1])) { vtop = 0; vbot = 0; int i; @@ -700,11 +727,38 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly } else { - SetupPolygonLeftEdge(rp, ytop); - SetupPolygonRightEdge(rp, ytop); + // note: if the end or current position in a slope is above the start point + // it seems to seek forwards(?) until the value overflows at 256 + // this can be emulated by just adding 256 to them + s32 y = ytop; + s32 y1 = polygon->Vertices[rp->NextVL]->FinalPosition[1]; + if (oob) + { + if (y < polygon->Vertices[rp->CurVL]->FinalPosition[1]) + y += 256; + + if (y1 < polygon->Vertices[rp->CurVL]->FinalPosition[1]) + y1 += 256; + } + rp->XL = rp->SlopeL.Setup(polygon->Vertices[rp->CurVL]->FinalPosition[0], polygon->Vertices[rp->NextVL]->FinalPosition[0], + polygon->Vertices[rp->CurVL]->FinalPosition[1], y1, polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); + + y1 = polygon->Vertices[rp->NextVR]->FinalPosition[1]; + if constexpr (oob) + { + y = ytop; + if (y < polygon->Vertices[rp->CurVR]->FinalPosition[1]) + y += 256; + + if (y1 < polygon->Vertices[rp->CurVR]->FinalPosition[1]) + y1 += 256; + } + rp->XR = rp->SlopeR.Setup(polygon->Vertices[rp->CurVR]->FinalPosition[0], polygon->Vertices[rp->NextVR]->FinalPosition[0], + polygon->Vertices[rp->CurVR]->FinalPosition[1], y1, polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); } } +template void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -730,14 +784,14 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* if (polygon->YTop != polygon->YBottom) { - if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom) + if ((y >= polygon->SlopePosition[rp->NextVL][1] || (oob && y == polygon->Vertices[rp->CurVL]->FinalPosition[1])) && rp->CurVL != polygon->VBottom) { - SetupPolygonLeftEdge(rp, y); + SetupPolygonLeftEdge(rp, y); } - if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) + if ((y >= polygon->SlopePosition[rp->NextVR][1] || (oob && y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) && rp->CurVR != polygon->VBottom) { - SetupPolygonRightEdge(rp, y); + SetupPolygonRightEdge(rp, y); } } @@ -746,8 +800,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* bool l_filledge, r_filledge; s32 l_edgelen, r_edgelen; s32 l_edgecov, r_edgecov; - Interpolator<1>* interp_start; - Interpolator<1>* interp_end; + Interpolator<1, false>* interp_start; + Interpolator<1, false>* interp_end; xstart = rp->XL; xend = rp->XR; @@ -839,11 +893,30 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* if (y == polygon->YTop) yedge = 0x4; else if (y == polygon->YBottom-1) yedge = 0x8; int edge; - + + xend += 1; + Interpolator<0, oob> interpX(xstart, xend, wl, wr); + + if constexpr (oob) + { + // CHECKME: should the unclamped values be used for timings? + // negative values are clamped to 0 + if (xstart < 0) + { + l_edgelen += xstart; + if (l_edgelen < 1) l_edgelen = 1; + xstart = 0; + if (xend < 1) xend = 1; + } + // too big values are clamped to 511 + if (xend > 511) + { + r_edgelen += 256 - xend; + xend = 511; + } + } + s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); - - if (x < 0) x = 0; s32 xlimit; // for shadow masks: set stencil bits where the depth test fails. @@ -852,7 +925,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* // part 1: left edge edge = yedge | 0x1; xlimit = xstart+l_edgelen; - if (xlimit > xend+1) xlimit = xend+1; + if (xlimit > xend) xlimit = xend; if (xlimit > 256) xlimit = 256; if (!l_filledge) x = xlimit; @@ -879,8 +952,8 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* // part 2: polygon inside edge = yedge; - xlimit = xend-r_edgelen+1; - if (xlimit > xend+1) xlimit = xend+1; + xlimit = xend-r_edgelen; + if (xlimit > xend) xlimit = xend; if (xlimit > 256) xlimit = 256; if (wireframe && !edge) x = std::max(x, xlimit); else for (; x < xlimit; x++) @@ -905,7 +978,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* // part 3: right edge edge = yedge | 0x2; - xlimit = xend+1; + xlimit = xend; if (xlimit > 256) xlimit = 256; if (r_filledge) @@ -929,10 +1002,11 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* } } - rp->XL = rp->SlopeL.Step(); - rp->XR = rp->SlopeR.Step(); + rp->XL = rp->SlopeL.Step(); + rp->XR = rp->SlopeR.Step(); } +template void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -955,14 +1029,14 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 if (polygon->YTop != polygon->YBottom) { - if (y >= polygon->Vertices[rp->NextVL]->FinalPosition[1] && rp->CurVL != polygon->VBottom) + if ((y >= polygon->SlopePosition[rp->NextVL][1] || (oob && y == polygon->Vertices[rp->CurVL]->FinalPosition[1])) && rp->CurVL != polygon->VBottom) { - SetupPolygonLeftEdge(rp, y); + SetupPolygonLeftEdge(rp, y); } - if (y >= polygon->Vertices[rp->NextVR]->FinalPosition[1] && rp->CurVR != polygon->VBottom) + if ((y >= polygon->SlopePosition[rp->NextVR][1] || (oob && y == polygon->Vertices[rp->CurVR]->FinalPosition[1])) && rp->CurVR != polygon->VBottom) { - SetupPolygonRightEdge(rp, y); + SetupPolygonRightEdge(rp, y); } } @@ -971,8 +1045,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 bool l_filledge, r_filledge; s32 l_edgelen, r_edgelen; s32 l_edgecov, r_edgecov; - Interpolator<1>* interp_start; - Interpolator<1>* interp_end; + Interpolator<1, false>* interp_start; + Interpolator<1, false>* interp_end; xstart = rp->XL; xend = rp->XR; @@ -1090,10 +1164,29 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 else if (y == polygon->YBottom-1) yedge = 0x8; int edge; - s32 x = xstart; - Interpolator<0> interpX(xstart, xend+1, wl, wr); + xend+=1; + Interpolator<0, oob> interpX(xstart, xend, wl, wr); + + if constexpr (oob) + { + // CHECKME: should the unclamped values be used for timings? + // negative values are clamped to 0 + if (xstart < 0) + { + l_edgelen += xstart; + if (l_edgelen < 1) l_edgelen = 1; + xstart = 0; + if (xend < 1) xend = 1; + } + // too big values are clamped to 511 + if (xend > 511) + { + r_edgelen += 256 - xend; + xend = 511; + } + } - if (x < 0) x = 0; + s32 x = xstart; s32 xlimit; s32 xcov = 0; @@ -1101,7 +1194,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // part 1: left edge edge = yedge | 0x1; xlimit = xstart+l_edgelen; - if (xlimit > xend+1) xlimit = xend+1; + if (xlimit > xend) xlimit = xend; if (xlimit > 256) xlimit = 256; if (l_edgecov & (1<<31)) { @@ -1201,8 +1294,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // part 2: polygon inside edge = yedge; - xlimit = xend-r_edgelen+1; - if (xlimit > xend+1) xlimit = xend+1; + xlimit = xend-r_edgelen; + if (xlimit > xend) xlimit = xend; if (xlimit > 256) xlimit = 256; if (wireframe && !edge) x = std::max(x, xlimit); @@ -1290,7 +1383,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // part 3: right edge edge = yedge | 0x2; - xlimit = xend+1; + xlimit = xend; if (xlimit > 256) xlimit = 256; if (r_edgecov & (1<<31)) { @@ -1387,8 +1480,8 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 } } - rp->XL = rp->SlopeL.Step(); - rp->XR = rp->SlopeR.Step(); + rp->XL = rp->SlopeL.Step(); + rp->XR = rp->SlopeR.Step(); } void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) @@ -1400,10 +1493,20 @@ void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop))) { - if (polygon->IsShadowMask) - RenderShadowMaskScanline(gpu.GPU3D, rp, y); + if (polygon->OOBRendering) [[unlikely]] // enable some extra handling for polygons that might render out of ordinary bounds + { + if (polygon->IsShadowMask) + RenderShadowMaskScanline(gpu.GPU3D, rp, y); + else + RenderPolygonScanline(gpu, rp, y); + } else - RenderPolygonScanline(gpu, rp, y); + { + if (polygon->IsShadowMask) + RenderShadowMaskScanline(gpu.GPU3D, rp, y); + else + RenderPolygonScanline(gpu, rp, y); + } } } } @@ -1712,7 +1815,11 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg for (int i = 0; i < npolys; i++) { if (polygons[i]->Degenerate) continue; - SetupPolygon(&PolygonList[j++], polygons[i]); + + if (polygons[i]->OOBRendering) [[unlikely]] // enable some extra handling for polygons that might render out of ordinary bounds + SetupPolygon(&PolygonList[j++], polygons[i]); + else + SetupPolygon(&PolygonList[j++], polygons[i]); } RenderScanline(gpu, 0, j); diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 73d02e4f..45a4de2b 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -64,7 +64,7 @@ private: // interpolation, avoiding precision loss from the aforementioned approximation. // Which is desirable when using the GPU to draw 2D graphics. - template + template class Interpolator { public: @@ -78,7 +78,11 @@ private: { this->x0 = x0; this->x1 = x1; - this->xdiff = x1 - x0; + + if (oob) + this->xdiff = std::min(x1, 511) - std::max(x0, 0); + else + this->xdiff = x1 - x0; // calculate reciprocal for Z interpolation // TODO eventually: use a faster reciprocal function? @@ -129,6 +133,7 @@ private: constexpr void SetX(s32 x) { x -= x0; + //if (x > xdiff) x = xdiff; // may or may not be correct this->x = x; if (xdiff != 0 && !linear) { @@ -141,10 +146,12 @@ private: else yfactor = (s32)(num / den); } } - + constexpr s32 Interpolate(s32 y0, s32 y1) const { - if (xdiff == 0 || y0 == y1) return y0; + if (xdiff == 0 || y0 == y1 || x == 0) return y0; + + if (oob && (x0 <= 0 && x1 > 511)) return y1; if (!linear) { @@ -163,10 +170,12 @@ private: return y1 + (s64)(y0-y1) * (xdiff - x) / xdiff; } } - + constexpr s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) const { - if (xdiff == 0 || z0 == z1) return z0; + if (xdiff == 0 || z0 == z1 || x == 0) return z0; + + if (oob && (x0 <= 0 && x1 > 511)) return z1; if (wbuffer) { @@ -252,6 +261,7 @@ private: return x0; } + template constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) { this->x0 = x0; @@ -284,7 +294,7 @@ private: // instead, 1/y is calculated and then multiplied by x // TODO: this is still not perfect (see for example x=169 y=33) if (ylen == 0) - Increment = 0; + Increment = xlen << 18; else if (ylen == xlen && xlen != 1) Increment = 0x40000; else @@ -314,8 +324,7 @@ private: } dx += (y - y0) * Increment; - - s32 x = XVal(); + if (oob) dx &= 0xFFFFFFF; int interpoffset = (Increment >= 0x40000) && (side ^ Negative); Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); @@ -324,28 +333,27 @@ private: // used for calculating AA coverage if (XMajor) xcov_incr = (ylen << 10) / xlen; - return x; + return XVal(); } + template constexpr s32 Step() { - dx += Increment; + dx += Increment; // seems to be a 28 bit integer + if (oob) dx &= 0xFFFFFFF; y++; - s32 x = XVal(); Interp.SetX(y); - return x; + return XVal(); } constexpr s32 XVal() const { - s32 ret = 0; + s32 ret; if (Negative) ret = x0 - (dx >> 18); else ret = x0 + (dx >> 18); - if (ret < xmin) ret = xmin; - else if (ret > xmax) ret = xmax; - return ret; + return ret;// << 21 >> 21; checkme: is this commented bit actually correct? } template @@ -418,7 +426,7 @@ private: s32 Increment; bool Negative; bool XMajor; - Interpolator<1> Interp; + Interpolator<1, false> Interp; private: s32 x0, xmin, xmax; @@ -448,11 +456,11 @@ private: void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const; u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const; void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); - void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const; - void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const; - void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; - void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y); - void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y); + template void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const; + template void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const; + template void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; + template void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y); + template void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y); void RenderScanline(const GPU& gpu, s32 y, int npolys); u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const; void ScanlineFinalPass(const GPU3D& gpu3d, s32 y);