From ed79b1772c5cfff95346b69f8b2ba1ac355b1637 Mon Sep 17 00:00:00 2001
From: Jaklyy <102590697+Jaklyy@users.noreply.github.com>
Date: Tue, 17 Oct 2023 18:53:50 -0400
Subject: [PATCH 1/4] invert coverage to fix xmajor aa

if the calculated y coordinate differs from the expected y coordinate, invert its coverage.
---
 src/GPU3D_Soft.cpp | 12 ++----------
 src/GPU3D_Soft.h   | 12 ++++++++++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index 03c6265e..c131dc0f 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -1070,11 +1070,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
     xlimit = xstart+l_edgelen;
     if (xlimit > xend+1) xlimit = xend+1;
     if (xlimit > 256) xlimit = 256;
-    if (l_edgecov & (1<<31))
-    {
-        xcov = (l_edgecov >> 12) & 0x3FF;
-        if (xcov == 0x3FF) xcov = 0;
-    }
+    if (l_edgecov & (1<<31)) xcov = (l_edgecov >> 12) & 0x3FF;
 
     if (!l_filledge) x = xlimit;
     else
@@ -1259,11 +1255,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
     edge = yedge | 0x2;
     xlimit = xend+1;
     if (xlimit > 256) xlimit = 256;
-    if (r_edgecov & (1<<31))
-    {
-        xcov = (r_edgecov >> 12) & 0x3FF;
-        if (xcov == 0x3FF) xcov = 0;
-    }
+    if (r_edgecov & (1<<31)) xcov = (r_edgecov >> 12) & 0x3FF;
 
     if (r_filledge)
     for (; x < xlimit; x++)
diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h
index 2f5664e2..ec89cac5 100644
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@@ -236,6 +236,7 @@ private:
             dx = 0;
 
             this->x0 = x0;
+            this->y0 = y0;
             this->xmin = x0;
             this->xmax = x0;
 
@@ -253,6 +254,7 @@ private:
         constexpr s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y)
         {
             this->x0 = x0;
+            this->y0 = y0;
             this->y = y;
 
             if (x1 > x0)
@@ -367,7 +369,13 @@ private:
             if (side)     startx = startx - *length + 1;
 
             s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen;
-            *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF);
+
+            // fix the y value for negative slopes
+            s32 ycoord = Negative ? (ylen << 10) - startcov >> 10 : startcov >> 10;
+            // if yvalue is not equal to actual y value, invert coverage value
+            if (ycoord != y - y0) startcov = 0x3FF - (startcov & 0x3FF);
+
+            *coverage = (1<<31) | (startcov << 12) | (xcov_incr & 0x3FF);
 
             if constexpr (swapped) *length = 1;
         }
@@ -419,7 +427,7 @@ private:
         Interpolator<1> Interp;
 
     private:
-        s32 x0, xmin, xmax;
+        s32 x0, y0, xmin, xmax;
         s32 xlen, ylen;
         s32 dx;
         s32 y;

From 0c8095c1bb0e43ce4db884500b372aeb853d0ca4 Mon Sep 17 00:00:00 2001
From: Jaklyy <102590697+Jaklyy@users.noreply.github.com>
Date: Wed, 18 Oct 2023 00:36:53 -0400
Subject: [PATCH 2/4] rewrite of aa code

should behave identically
---
 src/GPU3D_Soft.cpp | 24 ++++++++----------------
 src/GPU3D_Soft.h   | 31 +++++++++++--------------------
 2 files changed, 19 insertions(+), 36 deletions(-)

diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index c131dc0f..1871caeb 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -1063,14 +1063,11 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
     if (x < 0) x = 0;
     s32 xlimit;
 
-    s32 xcov = 0;
-
     // part 1: left edge
     edge = yedge | 0x1;
     xlimit = xstart+l_edgelen;
     if (xlimit > xend+1) xlimit = xend+1;
     if (xlimit > 256) xlimit = 256;
-    if (l_edgecov & (1<<31)) xcov = (l_edgecov >> 12) & 0x3FF;
 
     if (!l_filledge) x = xlimit;
     else
@@ -1129,14 +1126,12 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
                 // anti-aliasing: all edges are rendered
 
                 // calculate coverage
-                s32 cov = l_edgecov;
-                if (cov & (1<<31))
+                attr |= (l_edgecov >> 5) << 8;
+                if (x < xlimit-1)
                 {
-                    cov = xcov >> 5;
-                    if (cov > 31) cov = 31;
-                    xcov += (l_edgecov & 0x3FF);
+                    l_edgecov += rp->SlopeL.XCov_Incr;
+                    if (l_edgecov > 0x3FF) l_edgecov = 0x3FF;
                 }
-                attr |= (cov << 8);
 
                 // push old pixel down if needed
                 if (pixeladdr < BufferSize)
@@ -1255,7 +1250,6 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
     edge = yedge | 0x2;
     xlimit = xend+1;
     if (xlimit > 256) xlimit = 256;
-    if (r_edgecov & (1<<31)) xcov = (r_edgecov >> 12) & 0x3FF;
 
     if (r_filledge)
     for (; x < xlimit; x++)
@@ -1313,14 +1307,12 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
                 // anti-aliasing: all edges are rendered
 
                 // calculate coverage
-                s32 cov = r_edgecov;
-                if (cov & (1<<31))
+                attr |= (r_edgecov >> 5) << 8;
+                if (x < xlimit-1)
                 {
-                    cov = 0x1F - (xcov >> 5);
-                    if (cov < 0) cov = 0;
-                    xcov += (r_edgecov & 0x3FF);
+                    r_edgecov -= rp->SlopeR.XCov_Incr;
+                    if (r_edgecov < 0) r_edgecov = 0;
                 }
-                attr |= (cov << 8);
 
                 // push old pixel down if needed
                 if (pixeladdr < BufferSize)
diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h
index ec89cac5..38de794c 100644
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@@ -246,7 +246,7 @@ private:
             Interp.Setup(0, 0, 0, 0);
             Interp.SetX(0);
 
-            xcov_incr = 0;
+            XCov_Incr = 0;
 
             return x0;
         }
@@ -322,7 +322,7 @@ private:
             Interp.SetX(y);
 
             // used for calculating AA coverage
-            if (XMajor) xcov_incr = (ylen << 10) / xlen;
+            if (XMajor) XCov_Incr = ((ylen << 10) / xlen) & 0x3FF;
 
             return x;
         }
@@ -373,9 +373,11 @@ private:
             // fix the y value for negative slopes
             s32 ycoord = Negative ? (ylen << 10) - startcov >> 10 : startcov >> 10;
             // if yvalue is not equal to actual y value, invert coverage value
-            if (ycoord != y - y0) startcov = 0x3FF - (startcov & 0x3FF);
+            startcov &= 0x3FF;
+            if (ycoord != y - y0) startcov = 0x3FF - startcov;
+            if (side ^ swapped) startcov = 0x3FF - startcov;
 
-            *coverage = (1<<31) | (startcov << 12) | (xcov_incr & 0x3FF);
+            *coverage = startcov;
 
             if constexpr (swapped) *length = 1;
         }
@@ -389,26 +391,17 @@ private:
             {
                 // for some reason vertical edges' aa values
                 // are inverted too when the edges are swapped
-                if constexpr (swapped)
-                    *coverage = 0;
-                else
-                    *coverage = 31;
+                *coverage = swapped ? 0 : 31 << 5;
             }
             else
             {
                 s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4;
                 if ((cov >> 5) != (dx >> 18)) cov = 31;
                 cov &= 0x1F;
-                if constexpr (swapped)
-                {
-                    if (side ^ Negative) cov = 0x1F - cov;
-                }
-                else
-                {
-                    if (!(side ^ Negative)) cov = 0x1F - cov;
-                }
+                if (side ^ !Negative ^ swapped) cov = 0x1F - cov;
 
-                *coverage = cov;
+                // shift left 5 just to make it align with xmajor coverage values
+                *coverage = cov << 5;
             }
         }
 
@@ -425,15 +418,13 @@ private:
         bool Negative;
         bool XMajor;
         Interpolator<1> Interp;
+        s32 XCov_Incr;
 
     private:
         s32 x0, y0, xmin, xmax;
         s32 xlen, ylen;
         s32 dx;
         s32 y;
-
-        s32 xcov_incr;
-        s32 ycoverage, ycov_incr;
     };
 
     template <typename T>

From 3facbe90c981a77d1a4f5d618bdba47d7008adab Mon Sep 17 00:00:00 2001
From: Jaklyy <102590697+Jaklyy@users.noreply.github.com>
Date: Sun, 10 Dec 2023 15:40:58 -0500
Subject: [PATCH 3/4] remove templates

seems to just make things sliiightly slower on my machine
---
 src/GPU3D_Soft.cpp | 18 +++++++++---------
 src/GPU3D_Soft.h   | 21 +++++++++------------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index 1871caeb..1166b770 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -741,8 +741,8 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
         interp_start = &rp->SlopeR.Interp;
         interp_end = &rp->SlopeL.Interp;
 
-        rp->SlopeR.EdgeParams<true>(&l_edgelen, &l_edgecov);
-        rp->SlopeL.EdgeParams<true>(&r_edgelen, &r_edgecov);
+        rp->SlopeR.EdgeParams(&l_edgelen, &l_edgecov, true);
+        rp->SlopeL.EdgeParams(&r_edgelen, &r_edgecov, true);
 
         std::swap(xstart, xend);
         std::swap(wl, wr);
@@ -773,9 +773,9 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
         interp_start = &rp->SlopeL.Interp;
         interp_end = &rp->SlopeR.Interp;
 
-        rp->SlopeL.EdgeParams<false>(&l_edgelen, &l_edgecov);
-        rp->SlopeR.EdgeParams<false>(&r_edgelen, &r_edgecov);
-
+        rp->SlopeL.EdgeParams(&l_edgelen, &l_edgecov, false);
+        rp->SlopeR.EdgeParams(&r_edgelen, &r_edgecov, false);
+        
         // CHECKME: edge fill rules for unswapped opaque shadow mask polygons
         if ((GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU.GPU3D.RenderDispCnt & (1<<3))) || wireframe)
         {
@@ -970,8 +970,8 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
         interp_start = &rp->SlopeR.Interp;
         interp_end = &rp->SlopeL.Interp;
 
-        rp->SlopeR.EdgeParams<true>(&l_edgelen, &l_edgecov);
-        rp->SlopeL.EdgeParams<true>(&r_edgelen, &r_edgecov);
+        rp->SlopeR.EdgeParams(&l_edgelen, &l_edgecov, true);
+        rp->SlopeL.EdgeParams(&r_edgelen, &r_edgecov, true);
 
         std::swap(xstart, xend);
         std::swap(wl, wr);
@@ -1008,8 +1008,8 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
         interp_start = &rp->SlopeL.Interp;
         interp_end = &rp->SlopeR.Interp;
 
-        rp->SlopeL.EdgeParams<false>(&l_edgelen, &l_edgecov);
-        rp->SlopeR.EdgeParams<false>(&r_edgelen, &r_edgecov);
+        rp->SlopeL.EdgeParams(&l_edgelen, &l_edgecov, false);
+        rp->SlopeR.EdgeParams(&r_edgelen, &r_edgecov, false);
 
         // edge fill rules for unswapped opaque edges:
         // * right edge is filled if slope > 1
diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h
index 38de794c..9b284932 100644
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@@ -347,13 +347,12 @@ private:
             else if (ret > xmax) ret = xmax;
             return ret;
         }
-
-        template<bool swapped>
-        constexpr void EdgeParams_XMajor(s32* length, s32* coverage) const
+        
+        constexpr void EdgeParams_XMajor(s32* length, s32* coverage, bool swapped) const
         {
             // only do length calc for right side when swapped as it's
             // only needed for aa calcs, as actual line spans are broken
-            if constexpr (!swapped || side)
+            if (!swapped || side)
             {
                 if (side ^ Negative)
                     *length = (dx >> 18) - ((dx-Increment) >> 18);
@@ -379,11 +378,10 @@ private:
 
             *coverage = startcov;
 
-            if constexpr (swapped) *length = 1;
+            if (swapped) *length = 1;
         }
 
-        template<bool swapped>
-        constexpr void EdgeParams_YMajor(s32* length, s32* coverage) const
+        constexpr void EdgeParams_YMajor(s32* length, s32* coverage, bool swapped) const
         {
             *length = 1;
 
@@ -404,14 +402,13 @@ private:
                 *coverage = cov << 5;
             }
         }
-
-        template<bool swapped>
-        constexpr void EdgeParams(s32* length, s32* coverage) const
+        
+        constexpr void EdgeParams(s32* length, s32* coverage, bool swapped) const
         {
             if (XMajor)
-                return EdgeParams_XMajor<swapped>(length, coverage);
+                return EdgeParams_XMajor(length, coverage, swapped);
             else
-                return EdgeParams_YMajor<swapped>(length, coverage);
+                return EdgeParams_YMajor(length, coverage, swapped);
         }
 
         s32 Increment;

From 3268f94f4bd9c8d64b9afb71d1f4a611ef5cc70c Mon Sep 17 00:00:00 2001
From: Jaklyy <102590697+Jaklyy@users.noreply.github.com>
Date: Sun, 10 Dec 2023 15:52:28 -0500
Subject: [PATCH 4/4] remove an optimization for swapped polygons

why did i optimize swapped polygons?
they're not that common.
---
 src/GPU3D_Soft.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h
index 9b284932..b1ea641c 100644
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@@ -352,13 +352,10 @@ private:
         {
             // only do length calc for right side when swapped as it's
             // only needed for aa calcs, as actual line spans are broken
-            if (!swapped || side)
-            {
-                if (side ^ Negative)
-                    *length = (dx >> 18) - ((dx-Increment) >> 18);
-                else
-                    *length = ((dx+Increment) >> 18) - (dx >> 18);
-            }
+            if (side ^ Negative)
+                *length = (dx >> 18) - ((dx-Increment) >> 18);
+            else
+                *length = ((dx+Increment) >> 18) - (dx >> 18);
 
             // for X-major edges, we return the coverage
             // for the first pixel, and the increment for