From 9dc9e1c61a3d9884add8ac3a3d82e396b17c9624 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 27 Jul 2023 02:23:36 -0400 Subject: [PATCH 1/7] fix certain slopes not having gaps --- src/GPU3D_Soft.h | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 2f5664e2..fefd84d3 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -280,7 +280,6 @@ private: // slope increment has a 18-bit fractional part // note: for some reason, x/y isn't calculated directly, // instead, 1/y is calculated and then multiplied by x - // TODO: this is still not perfect (see for example x=169 y=33) if (ylen == 0) Increment = 0; else if (ylen == xlen && xlen != 1) @@ -313,21 +312,41 @@ private: dx += (y - y0) * Increment; + if (XMajor) + { + // used for calculating AA coverage + xcov_incr = (ylen << 10) / xlen; + + if (side ^ Negative) + { + dxold = dx; + // I dont think the first span can have a gap but i think its technically correct to do this calc anyway? + // could probably be removed as a minor optimization + dx &= ~0x1FF; + } + } + s32 x = XVal(); int interpoffset = (Increment >= 0x40000) && (side ^ Negative); Interp.Setup(y0-interpoffset, y1-interpoffset, w0, w1); Interp.SetX(y); - // used for calculating AA coverage - if (XMajor) xcov_incr = (ylen << 10) / xlen; - return x; } constexpr s32 Step() - { - dx += Increment; + { + if (XMajor && (side ^ Negative)) // tl, br (right side start) & tr, bl (left side start) + { + // round dx; required to create gaps in lines like on hw + // increment using dxold to make the line not completely borked after rendering a gap + dx = (dxold & ~0x1FF) + Increment; + dxold += Increment; + } + else + dx += Increment; + y++; s32 x = XVal(); @@ -353,15 +372,18 @@ private: // only needed for aa calcs, as actual line spans are broken if constexpr (!swapped || side) { - if (side ^ Negative) - *length = (dx >> 18) - ((dx-Increment) >> 18); - else - *length = ((dx+Increment) >> 18) - (dx >> 18); + if (side ^ Negative) // tr, bl (left side end) & tl br (right side end) + // dxold used here to avoid rounding the endpoint + *length = (dx >> 18) - (dxold - Increment >> 18); + else // tl, br (left side end) & tr, bl (right side end) + // dx rounded down to create gaps in lines like on hw + *length = ((dx & ~0x1FF) + Increment >> 18) - (dx >> 18); } // for X-major edges, we return the coverage // for the first pixel, and the increment for // further pixels on the same scanline + // TODO: check how coverage interacts with line gaps s32 startx = dx >> 18; if (Negative) startx = xlen - startx; if (side) startx = startx - *length + 1; @@ -421,7 +443,7 @@ private: private: s32 x0, xmin, xmax; s32 xlen, ylen; - s32 dx; + s32 dx, dxold; s32 y; s32 xcov_incr; From 04766585f199d151b40985f1d9fda3c4a315a9ef Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 27 Aug 2023 12:14:00 -0400 Subject: [PATCH 2/7] slight tweak to comments --- src/GPU3D_Soft.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index fefd84d3..0789ec3d 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -320,7 +320,8 @@ private: if (side ^ Negative) { dxold = dx; - // I dont think the first span can have a gap but i think its technically correct to do this calc anyway? + // I dont think the first span can have a gap but + // I think its technically correct to do this calc anyway? // could probably be removed as a minor optimization dx &= ~0x1FF; } @@ -337,7 +338,7 @@ private: constexpr s32 Step() { - if (XMajor && (side ^ Negative)) // tl, br (right side start) & tr, bl (left side start) + if (XMajor && (side ^ Negative)) // tl, br = \ (right side end) & tr, bl = / (left side start) { // round dx; required to create gaps in lines like on hw // increment using dxold to make the line not completely borked after rendering a gap @@ -372,10 +373,10 @@ private: // only needed for aa calcs, as actual line spans are broken if constexpr (!swapped || side) { - if (side ^ Negative) // tr, bl (left side end) & tl br (right side end) + if (side ^ Negative) // tr, bl = / (left side end) & tl br = \ (right side start) // dxold used here to avoid rounding the endpoint *length = (dx >> 18) - (dxold - Increment >> 18); - else // tl, br (left side end) & tr, bl (right side end) + else // tl, br = \ (left side end) & tr, bl = / (right side start) // dx rounded down to create gaps in lines like on hw *length = ((dx & ~0x1FF) + Increment >> 18) - (dx >> 18); } @@ -383,7 +384,7 @@ private: // for X-major edges, we return the coverage // for the first pixel, and the increment for // further pixels on the same scanline - // TODO: check how coverage interacts with line gaps + // TODO: check how coverage interacts with line gaps, I think it's correct though? s32 startx = dx >> 18; if (Negative) startx = xlen - startx; if (side) startx = startx - *length + 1; From 2ef067b60ce4268ce14228e0b78f6b9574382615 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 26 May 2024 09:43:50 -0400 Subject: [PATCH 3/7] give better credit --- src/GPU3D_Soft.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 0789ec3d..72ab8816 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -369,6 +369,7 @@ private: template constexpr void EdgeParams_XMajor(s32* length, s32* coverage) const { + // credit to StrikerX3 for working out this weird rounding nonsense // only do length calc for right side when swapped as it's // only needed for aa calcs, as actual line spans are broken if constexpr (!swapped || side) From 3785d9943804a9771306db5e910df4b88bd66c44 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 26 May 2024 09:45:53 -0400 Subject: [PATCH 4/7] better spot --- src/GPU3D_Soft.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 16f2556c..6a6c149c 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -371,11 +371,11 @@ private: template constexpr void EdgeParams_XMajor(s32* length, s32* coverage) const { - // credit to StrikerX3 for working out this weird rounding nonsense // only do length calc for right side when swapped as it's // only needed for aa calcs, as actual line spans are broken if constexpr (!swapped || side) { + // credit to StrikerX3 for working out this weird rounding nonsense if (side ^ Negative) // tr, bl = / (left side end) & tl br = \ (right side start) // dxold used here to avoid rounding the endpoint *length = (dx >> 18) - (dxold - Increment >> 18); From 5646c02ef37017d863f72d0318b09d03989c874e Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 26 May 2024 09:47:42 -0400 Subject: [PATCH 5/7] actually use tab key --- src/GPU3D_Soft.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 6a6c149c..856d4165 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -377,11 +377,11 @@ private: { // credit to StrikerX3 for working out this weird rounding nonsense if (side ^ Negative) // tr, bl = / (left side end) & tl br = \ (right side start) - // dxold used here to avoid rounding the endpoint - *length = (dx >> 18) - (dxold - Increment >> 18); + // dxold used here to avoid rounding the endpoint + *length = (dx >> 18) - (dxold - Increment >> 18); else // tl, br = \ (left side end) & tr, bl = / (right side start) - // dx rounded down to create gaps in lines like on hw - *length = ((dx & ~0x1FF) + Increment >> 18) - (dx >> 18); + // dx rounded down to create gaps in lines like on hw + *length = ((dx & ~0x1FF) + Increment >> 18) - (dx >> 18); } // for X-major edges, we return the coverage From 5ca5c1a4a1fdc5e249fc452722176cce3af37645 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 2 Aug 2024 13:34:05 -0400 Subject: [PATCH 6/7] rework entire fix to do things in a (potentially) more hw accurate way also remove some tangentially related but kinda dumb code i wrote --- src/GPU3D_Soft.cpp | 42 ++++++++++++++++-------- src/GPU3D_Soft.h | 79 ++++++++-------------------------------------- 2 files changed, 43 insertions(+), 78 deletions(-) diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 1221ed59..7f03b3b7 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -757,6 +757,12 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + + rp->SlopeL.EdgeParams(&l_edgelen, &l_edgecov); + rp->SlopeR.EdgeParams(&r_edgelen, &r_edgecov); + + if (rp->SlopeL.XMajor && rp->SlopeL.Negative) xstart -= l_edgelen; + if (rp->SlopeR.XMajor && !rp->SlopeR.Negative) xend += r_edgelen; // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -773,11 +779,14 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interp_start = &rp->SlopeR.Interp; interp_end = &rp->SlopeL.Interp; - - rp->SlopeR.EdgeParams(&l_edgelen, &l_edgecov); - rp->SlopeL.EdgeParams(&r_edgelen, &r_edgecov); - + std::swap(xstart, xend); + + l_edgelen = 1; + r_edgelen = 1; + + // ignore fixing coverage, shadow masks don't use it + std::swap(wl, wr); std::swap(zl, zr); @@ -806,9 +815,6 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* interp_start = &rp->SlopeL.Interp; interp_end = &rp->SlopeR.Interp; - rp->SlopeL.EdgeParams(&l_edgelen, &l_edgecov); - rp->SlopeR.EdgeParams(&r_edgelen, &r_edgecov); - // CHECKME: edge fill rules for unswapped opaque shadow mask polygons if ((gpu3d.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu3d.RenderDispCnt & (1<<3))) || wireframe) { @@ -982,6 +988,12 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 s32 zl = rp->SlopeL.Interp.InterpolateZ(polygon->FinalZ[rp->CurVL], polygon->FinalZ[rp->NextVL], polygon->WBuffer); s32 zr = rp->SlopeR.Interp.InterpolateZ(polygon->FinalZ[rp->CurVR], polygon->FinalZ[rp->NextVR], polygon->WBuffer); + + rp->SlopeL.EdgeParams(&l_edgelen, &l_edgecov); + rp->SlopeR.EdgeParams(&r_edgelen, &r_edgecov); + + if (rp->SlopeL.XMajor && rp->SlopeL.Negative) xstart -= l_edgelen; + if (rp->SlopeR.XMajor && !rp->SlopeR.Negative) xend += r_edgelen; // right vertical edges are pushed 1px to the left as long as either: // the left edge slope is not 0, or the span is not 0 pixels wide, and it is not at the leftmost pixel of the screen @@ -1003,10 +1015,17 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interp_start = &rp->SlopeR.Interp; interp_end = &rp->SlopeL.Interp; - rp->SlopeR.EdgeParams(&l_edgelen, &l_edgecov); - rp->SlopeL.EdgeParams(&r_edgelen, &r_edgecov); - std::swap(xstart, xend); + + l_edgelen = 1; + r_edgelen = 1; + + std::swap(l_edgecov, r_edgecov); + + // yes this breaks vertical slopes, blame hw + if (!rp->SlopeR.XMajor) l_edgecov = 31-l_edgecov; + if (!rp->SlopeL.XMajor) r_edgecov = 31-r_edgecov; + std::swap(wl, wr); std::swap(zl, zr); @@ -1041,9 +1060,6 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 interp_start = &rp->SlopeL.Interp; interp_end = &rp->SlopeR.Interp; - rp->SlopeL.EdgeParams(&l_edgelen, &l_edgecov); - rp->SlopeR.EdgeParams(&r_edgelen, &r_edgecov); - // edge fill rules for unswapped opaque edges: // * right edge is filled if slope > 1 // * left edge is filled if slope <= 1 diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index e42b2ca2..85d500dc 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -295,11 +295,11 @@ private: XMajor = (Increment > 0x40000); - if constexpr (side) + if (side) { // right - if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000); + if (XMajor) dx = Negative ? (0x20000 + 0x40000) : -0x20000; else if (Increment != 0) dx = Negative ? 0x40000 : 0; else dx = 0; } @@ -307,27 +307,15 @@ private: { // left - if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000; + if (XMajor) dx = 0x20000; else if (Increment != 0) dx = Negative ? 0x40000 : 0; else dx = 0; } dx += (y - y0) * Increment; - if (XMajor) - { - // used for calculating AA coverage - xcov_incr = (ylen << 10) / xlen; - - if (side ^ Negative) - { - dxold = dx; - // I dont think the first span can have a gap but - // I think its technically correct to do this calc anyway? - // could probably be removed as a minor optimization - dx &= ~0x1FF; - } - } + // used for calculating AA coverage + if (XMajor) xcov_incr = (ylen << 10) / xlen; s32 x = XVal(); @@ -340,15 +328,7 @@ private: constexpr s32 Step() { - if (XMajor && (side ^ Negative)) // tl, br = \ (right side end) & tr, bl = / (left side start) - { - // round dx; required to create gaps in lines like on hw - // increment using dxold to make the line not completely borked after rendering a gap - dx = (dxold & ~0x1FF) + Increment; - dxold += Increment; - } - else - dx += Increment; + dx += Increment; y++; @@ -363,26 +343,13 @@ private: if (Negative) ret = x0 - (dx >> 18); else ret = x0 + (dx >> 18); - if (ret < xmin) ret = xmin; - else if (ret > xmax) ret = xmax; return ret; } - template constexpr void EdgeParams_XMajor(s32* length, s32* coverage) const { - // only do length calc for right side when swapped as it's - // only needed for aa calcs, as actual line spans are broken - if constexpr (!swapped || side) - { - // credit to StrikerX3 for working out this weird rounding nonsense - if (side ^ Negative) // tr, bl = / (left side end) & tl br = \ (right side start) - // dxold used here to avoid rounding the endpoint - *length = (dx >> 18) - (dxold - Increment >> 18); - else // tl, br = \ (left side end) & tr, bl = / (right side start) - // dx rounded down to create gaps in lines like on hw - *length = ((dx & ~0x1FF) + Increment >> 18) - (dx >> 18); - } + // credit to StrikerX3 for their efforts researching the strange rounding behavior of the "length" calculation + *length = (dx & (0x1FF << 9)) + Increment >> 18; // for X-major edges, we return the coverage // for the first pixel, and the increment for @@ -394,49 +361,31 @@ private: s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen; *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF); - - if constexpr (swapped) *length = 1; } - template constexpr void EdgeParams_YMajor(s32* length, s32* coverage) const { *length = 1; - if (Increment == 0) - { - // for some reason vertical edges' aa values - // are inverted too when the edges are swapped - if constexpr (swapped) - *coverage = 0; - else - *coverage = 31; - } + if (Increment == 0) *coverage = 31; else { s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4; if ((cov >> 5) != (dx >> 18)) cov = 31; cov &= 0x1F; - if constexpr (swapped) - { - if (side ^ Negative) cov = 0x1F - cov; - } - else - { - if (!(side ^ Negative)) cov = 0x1F - cov; - } + + if (!(side ^ Negative)) cov = 0x1F - cov; *coverage = cov; } } - template constexpr void EdgeParams(s32* length, s32* coverage) const { if (XMajor) - return EdgeParams_XMajor(length, coverage); + return EdgeParams_XMajor(length, coverage); else - return EdgeParams_YMajor(length, coverage); + return EdgeParams_YMajor(length, coverage); } s32 Increment; @@ -447,7 +396,7 @@ private: private: s32 x0, xmin, xmax; s32 xlen, ylen; - s32 dx, dxold; + s32 dx; s32 y; s32 xcov_incr; From c2abc1050cc25222f81c2cf1fde3c0d7fb6e3b76 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 2 Aug 2024 14:39:55 -0400 Subject: [PATCH 7/7] fix anti-aliasing kinda lazily though, will improve when it's time to rework aa --- src/GPU3D_Soft.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 85d500dc..e1fd4d05 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -355,7 +355,11 @@ private: // for the first pixel, and the increment for // further pixels on the same scanline // TODO: check how coverage interacts with line gaps, I think it's correct though? - s32 startx = dx >> 18; + s32 startx = dx; + if (side ^ Negative) startx += Increment; + startx >>= 18; + //s32 startcov = (startx * ylen) /9 xlen; + if (Negative) startx = xlen - startx; if (side) startx = startx - *length + 1;