From cc9a6f8ec14059b208621926f345c80261e546bc Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 28 Dec 2024 23:27:42 -0500 Subject: [PATCH] redo a bunch of gx handling for marginal accuracy benefits --- src/DMA.cpp | 10 +-- src/FIFO.h | 5 ++ src/GPU3D.cpp | 207 +++++++++++++++++++++++++++++++++++++++++--------- src/GPU3D.h | 6 ++ src/NDS.cpp | 5 +- src/NDS.h | 1 + 6 files changed, 191 insertions(+), 43 deletions(-) diff --git a/src/DMA.cpp b/src/DMA.cpp index 8d97b67e..b9a4f170 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -363,7 +363,7 @@ void DMA::Run9() if (!(Cnt & (1<<26))) { - while (IterCount > 0 && !Stall) + while (IterCount > 0) { u32 rgn = NDS.ARM9Regions[CurSrcAddr>>14] | NDS.ARM9Regions[CurDstAddr>>14]; if (rgn & Mem9_MainRAM) @@ -388,7 +388,7 @@ void DMA::Run9() } else { - while (IterCount > 0 && !Stall) + while (IterCount > 0) { u32 rgn = NDS.ARM9Regions[CurSrcAddr>>14] | NDS.ARM9Regions[CurDstAddr>>14]; if (rgn & Mem9_MainRAM) @@ -418,7 +418,6 @@ void DMA::Run9() else Running = 2; Executing = false; - Stall = false; if (RemCount) { @@ -458,7 +457,7 @@ void DMA::Run7() if (!(Cnt & (1<<26))) { - while (IterCount > 0 && !Stall) + while (IterCount > 0) { u32 rgn = NDS.ARM7Regions[CurSrcAddr>>15] | NDS.ARM7Regions[CurDstAddr>>15]; if (rgn & Mem7_MainRAM) @@ -483,7 +482,7 @@ void DMA::Run7() } else { - while (IterCount > 0 && !Stall) + while (IterCount > 0) { u32 rgn = NDS.ARM7Regions[CurSrcAddr>>15] | NDS.ARM7Regions[CurDstAddr>>15]; if (rgn & Mem7_MainRAM) @@ -511,7 +510,6 @@ void DMA::Run7() else Running = 2; Executing = false; - Stall = false; if (RemCount) { diff --git a/src/FIFO.h b/src/FIFO.h index 5fc04832..8272d61c 100644 --- a/src/FIFO.h +++ b/src/FIFO.h @@ -89,6 +89,11 @@ public: return Entries[pos]; } + void Edit(T val) + { + Entries[ReadPos] = val; + } + u32 Level() const { return NumOccupied; } bool IsEmpty() const { return NumOccupied == 0; } bool IsFull() const { return NumOccupied >= NumEntries; } diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 3fc037df..f4886464 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -145,6 +145,11 @@ GPU3D::GPU3D(melonDS::NDS& nds, std::unique_ptr&& renderer) noexcept NDS(nds), CurrentRenderer(renderer ? std::move(renderer) : std::make_unique()) { + NDS.RegisterEventFuncs(Event_GX, this, + { + MakeEventThunk(GPU3D, EventFIFOEmpty), + MakeEventThunk(GPU3D, EventFIFOHalf) + }); } void Vertex::DoSavestate(Savestate* file) noexcept @@ -191,6 +196,7 @@ void GPU3D::Reset() noexcept CmdPIPE.Clear(); CmdStallQueue.Clear(); + CmdFIFORes.Clear(); ZeroDotWLimit = 0xFFFFFF; @@ -1682,13 +1688,24 @@ void GPU3D::VecTest(u32 param) noexcept void GPU3D::CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept { - if (CmdFIFO.IsEmpty() && !CmdPIPE.IsFull()) + /*if (CmdFIFO.IsEmpty() && !CmdPIPE.IsFull()) { + if (CmdFIFORes.IsFull()) + { + // store it to the stall queue. stall the system. + // worst case is if a STMxx opcode causes this, which is why our stall queue + // has 64 entries. this is less complicated than trying to make STMxx stall-able. + + CmdStallQueue.Write(entry); + NDS.GXFIFOStall(); + return; + } + CmdPIPE.Write(entry); } else { - if (CmdFIFO.IsFull()) + if (CmdFIFORes.IsFull()) { // store it to the stall queue. stall the system. // worst case is if a STMxx opcode causes this, which is why our stall queue @@ -1700,7 +1717,20 @@ void GPU3D::CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept } CmdFIFO.Write(entry); + }*/ + + if (CmdFIFORes.IsFull()) + { + // store it to the stall queue. stall the system. + // worst case is if a STMxx opcode causes this, which is why our stall queue + // has 64 entries. this is less complicated than trying to make STMxx stall-able. + + CmdStallQueue.Write(entry); + NDS.GXFIFOStall(); + return; } + + CurCmd = entry; GXStat |= (1<<27); @@ -1714,37 +1744,39 @@ void GPU3D::CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept GXStat |= (1<<0); // box/pos/vec test NumTestCommands++; } + + CycleCount = 0; + ExecuteCommand(); + CmdFIFORes.Write(CycleCount); + + CheckFIFOIRQ(); } GPU3D::CmdFIFOEntry GPU3D::CmdFIFORead() noexcept { - CmdFIFOEntry ret = CmdPIPE.Read(); + CmdFIFOEntry ret;//CmdPIPE.Read(); - if (CmdPIPE.Level() <= 2) + /*if (CmdPIPE.Level() <= 2) { if (!CmdFIFO.IsEmpty()) CmdPIPE.Write(CmdFIFO.Read()); if (!CmdFIFO.IsEmpty()) - CmdPIPE.Write(CmdFIFO.Read()); + CmdPIPE.Write(CmdFIFO.Read());*/ // empty stall queue if needed // CmdFIFO should not be full at this point. - if (!CmdStallQueue.IsEmpty()) - { - while (!CmdStallQueue.IsEmpty()) - { - if (CmdFIFO.IsFull()) break; - CmdFIFOEntry entry = CmdStallQueue.Read(); - CmdFIFOWrite(entry); - } + if (!CmdStallQueue.IsEmpty()) + { + ret = CmdStallQueue.Read(); - if (CmdStallQueue.IsEmpty()) - NDS.GXFIFOUnstall(); - } - - CheckFIFODMA(); - CheckFIFOIRQ(); + if (CmdStallQueue.IsEmpty()) + NDS.GXFIFOUnstall(); } + else ret = CurCmd; + + //CheckFIFODMA(); + //CheckFIFOIRQ(); + //} return ret; } @@ -2353,10 +2385,26 @@ void GPU3D::ExecuteCommand() noexcept } } +void GPU3D::ResolveCommands() noexcept +{ + u32 cycles = CmdFIFORes.Peek(); + + if (cycles <= -CycleCount) + { + CycleCount += cycles; + CmdFIFORes.Read(); + } + else + { + CmdFIFORes.Edit(cycles + CycleCount); + CycleCount = 0; + } +} + s32 GPU3D::CyclesToRunFor() const noexcept { - if (CycleCount < 0) return 0; - return CycleCount; + //if (CycleCount < 0) return 0; + return CmdFIFORes.Peek();//CycleCount; } void GPU3D::FinishWork(s32 cycles) noexcept @@ -2376,28 +2424,38 @@ void GPU3D::FinishWork(s32 cycles) noexcept void GPU3D::Run() noexcept { if (!GeometryEnabled || FlushRequest || - (CmdPIPE.IsEmpty() && !(GXStat & (1<<27)))) + (CmdFIFORes.IsEmpty() && !(GXStat & (1<<27)))) { Timestamp = std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift; return; } s32 cycles = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift) - Timestamp; - CycleCount -= cycles; + CycleCount = -cycles; Timestamp = std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift; - if (CycleCount <= 0) + if (CycleCount < 0) { - while (CycleCount <= 0 && !CmdPIPE.IsEmpty()) + while (CycleCount < 0 && !CmdFIFORes.IsEmpty()) { if (NumPushPopCommands == 0) GXStat &= ~(1<<14); if (NumTestCommands == 0) GXStat &= ~(1<<0); - ExecuteCommand(); + //ExecuteCommand(); + ResolveCommands(); + if (!CmdStallQueue.IsEmpty() && !CmdFIFORes.IsFull()) + { + s32 oldcycle = CycleCount; + CycleCount = 0; + ExecuteCommand(); + CmdFIFORes.Write(CycleCount); + CycleCount = oldcycle; + CheckFIFOIRQ(); + } } } - if (CycleCount <= 0 && CmdPIPE.IsEmpty()) + if (CycleCount < 0 && CmdFIFORes.IsEmpty()) { if (GXStat & (1<<27)) FinishWork(-CycleCount); else CycleCount = 0; @@ -2408,23 +2466,102 @@ void GPU3D::Run() noexcept } +void GPU3D::EventFIFOEmpty(u32 param) +{ + if ((GXStat >> 30) == 2) NDS.SetIRQ(0, IRQ_GXFIFO); +} + +void GPU3D::EventFIFOHalf(u32 param) +{ + NDS.CheckDMAs(0, 0x07); + + if ((GXStat >> 30) == 1) + NDS.SetIRQ(0, IRQ_GXFIFO); + + if ((GXStat >> 30) == 2) + { + if (CmdFIFORes.Level() > 4) + { + u64 entries = CmdFIFORes.Level() - 4; + u64 time = 0; + for (int i = 0; i < entries; i++) + { + time += CmdFIFORes.Peek(i); + } + + NDS.CancelEvent(Event_GX); + NDS.ScheduleEvent(Event_GX, false, time, 0, 0); + NDS.ClearIRQ(0, IRQ_GXFIFO); + } + else + { + EventFIFOEmpty(0); + } + } +} + void GPU3D::CheckFIFOIRQ() noexcept { - bool irq = false; - switch (GXStat >> 30) + u8 irq = (GXStat >> 30); + if (NDS.DMAsInMode(0, 0x07) || (irq == 1)) { - case 1: irq = (CmdFIFO.Level() < 128); break; - case 2: irq = CmdFIFO.IsEmpty(); break; - } + if ((CmdFIFORes.Level() >= 128+4)) + { + u64 time = 0; + u64 entries = CmdFIFORes.Level() - (127+4); + for (int i = 0; i < entries; i++) + { + time += CmdFIFORes.Peek(i); + } - if (irq) NDS.SetIRQ(0, IRQ_GXFIFO, CycleCount); - else NDS.ClearIRQ(0, IRQ_GXFIFO); + NDS.CancelEvent(Event_GX); + NDS.ScheduleEvent(Event_GX, false, time, 1, 0); + NDS.ClearIRQ(0, IRQ_GXFIFO); + } + else + { + EventFIFOHalf(0); + } + } + else if (irq == 2) + { + if (CmdFIFORes.Level() > 4) + { + u64 entries = CmdFIFORes.Level() - 4; + u64 time = 0; + for (int i = 0; i < entries; i++) + { + time += CmdFIFORes.Peek(i); + } + + NDS.CancelEvent(Event_GX); + NDS.ScheduleEvent(Event_GX, false, time, 0, 0); + NDS.ClearIRQ(0, IRQ_GXFIFO); + } + else + { + EventFIFOEmpty(0); + } + } } void GPU3D::CheckFIFODMA() noexcept { - if (CmdFIFO.Level() < 128) + if (CmdFIFORes.Level() < 128+4) NDS.CheckDMAs(0, 0x07); + else + { + u64 time = 0; + u64 entries = CmdFIFORes.Level() - (127+4); + for (int i = 0; i < entries; i++) + { + time += CmdFIFORes.Peek(i); + } + + NDS.CancelEvent(Event_GX); + NDS.ScheduleEvent(Event_GX, false, time, 1, 0); + NDS.ClearIRQ(0, IRQ_GXFIFO); + } } void GPU3D::VCount144(GPU& gpu) noexcept diff --git a/src/GPU3D.h b/src/GPU3D.h index d10df55f..68b81712 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -154,6 +154,9 @@ private: void VecTest(u32 param) noexcept; void CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept; CmdFIFOEntry CmdFIFORead() noexcept; + void ResolveCommands() noexcept; + void EventFIFOEmpty(u32 param); + void EventFIFOHalf(u32 param); void FinishWork(s32 cycles) noexcept; void VertexPipelineSubmitCmd() noexcept { @@ -197,6 +200,9 @@ public: FIFO CmdStallQueue {}; + CmdFIFOEntry CurCmd {}; + FIFO CmdFIFORes {}; + u32 ZeroDotWLimit = 0xFFFFFF; u32 GXStat = 0; diff --git a/src/NDS.cpp b/src/NDS.cpp index 5a1ea92b..b11af247 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -2313,7 +2313,7 @@ void NDS::GXFIFOStall() if (CurCPU == 1) ARM9.Halt(2); else { - DMAs[0].StallIfRunning(); + /*DMAs[0].StallIfRunning(); DMAs[1].StallIfRunning(); DMAs[2].StallIfRunning(); DMAs[3].StallIfRunning(); @@ -2321,8 +2321,9 @@ void NDS::GXFIFOStall() { auto& dsi = dynamic_cast(*this); dsi.StallNDMAs(); - } + }*/ } + Reschedule(std::max(ARM9Timestamp, DMA9Timestamp) >> ARM9ClockShift); } void NDS::GXFIFOUnstall() diff --git a/src/NDS.h b/src/NDS.h index d94cdd42..55b2de1e 100644 --- a/src/NDS.h +++ b/src/NDS.h @@ -58,6 +58,7 @@ enum Event_Wifi, Event_RTC, + Event_GX, Event_DisplayFIFO, Event_ROMTransfer, Event_ROMSPITransfer,