1
0
Fork 0
mirror of https://github.com/melonDS-emu/melonDS.git synced 2025-03-06 21:00:31 +01:00

redo a bunch of gx handling for marginal accuracy benefits

This commit is contained in:
Jaklyy 2024-12-28 23:27:42 -05:00
parent b9472d430e
commit cc9a6f8ec1
6 changed files with 191 additions and 43 deletions

View file

@ -363,7 +363,7 @@ void DMA::Run9()
if (!(Cnt & (1<<26))) if (!(Cnt & (1<<26)))
{ {
while (IterCount > 0 && !Stall) while (IterCount > 0)
{ {
u32 rgn = NDS.ARM9Regions[CurSrcAddr>>14] | NDS.ARM9Regions[CurDstAddr>>14]; u32 rgn = NDS.ARM9Regions[CurSrcAddr>>14] | NDS.ARM9Regions[CurDstAddr>>14];
if (rgn & Mem9_MainRAM) if (rgn & Mem9_MainRAM)
@ -388,7 +388,7 @@ void DMA::Run9()
} }
else else
{ {
while (IterCount > 0 && !Stall) while (IterCount > 0)
{ {
u32 rgn = NDS.ARM9Regions[CurSrcAddr>>14] | NDS.ARM9Regions[CurDstAddr>>14]; u32 rgn = NDS.ARM9Regions[CurSrcAddr>>14] | NDS.ARM9Regions[CurDstAddr>>14];
if (rgn & Mem9_MainRAM) if (rgn & Mem9_MainRAM)
@ -418,7 +418,6 @@ void DMA::Run9()
else Running = 2; else Running = 2;
Executing = false; Executing = false;
Stall = false;
if (RemCount) if (RemCount)
{ {
@ -458,7 +457,7 @@ void DMA::Run7()
if (!(Cnt & (1<<26))) if (!(Cnt & (1<<26)))
{ {
while (IterCount > 0 && !Stall) while (IterCount > 0)
{ {
u32 rgn = NDS.ARM7Regions[CurSrcAddr>>15] | NDS.ARM7Regions[CurDstAddr>>15]; u32 rgn = NDS.ARM7Regions[CurSrcAddr>>15] | NDS.ARM7Regions[CurDstAddr>>15];
if (rgn & Mem7_MainRAM) if (rgn & Mem7_MainRAM)
@ -483,7 +482,7 @@ void DMA::Run7()
} }
else else
{ {
while (IterCount > 0 && !Stall) while (IterCount > 0)
{ {
u32 rgn = NDS.ARM7Regions[CurSrcAddr>>15] | NDS.ARM7Regions[CurDstAddr>>15]; u32 rgn = NDS.ARM7Regions[CurSrcAddr>>15] | NDS.ARM7Regions[CurDstAddr>>15];
if (rgn & Mem7_MainRAM) if (rgn & Mem7_MainRAM)
@ -511,7 +510,6 @@ void DMA::Run7()
else Running = 2; else Running = 2;
Executing = false; Executing = false;
Stall = false;
if (RemCount) if (RemCount)
{ {

View file

@ -89,6 +89,11 @@ public:
return Entries[pos]; return Entries[pos];
} }
void Edit(T val)
{
Entries[ReadPos] = val;
}
u32 Level() const { return NumOccupied; } u32 Level() const { return NumOccupied; }
bool IsEmpty() const { return NumOccupied == 0; } bool IsEmpty() const { return NumOccupied == 0; }
bool IsFull() const { return NumOccupied >= NumEntries; } bool IsFull() const { return NumOccupied >= NumEntries; }

View file

@ -145,6 +145,11 @@ GPU3D::GPU3D(melonDS::NDS& nds, std::unique_ptr<Renderer3D>&& renderer) noexcept
NDS(nds), NDS(nds),
CurrentRenderer(renderer ? std::move(renderer) : std::make_unique<SoftRenderer>()) CurrentRenderer(renderer ? std::move(renderer) : std::make_unique<SoftRenderer>())
{ {
NDS.RegisterEventFuncs(Event_GX, this,
{
MakeEventThunk(GPU3D, EventFIFOEmpty),
MakeEventThunk(GPU3D, EventFIFOHalf)
});
} }
void Vertex::DoSavestate(Savestate* file) noexcept void Vertex::DoSavestate(Savestate* file) noexcept
@ -191,6 +196,7 @@ void GPU3D::Reset() noexcept
CmdPIPE.Clear(); CmdPIPE.Clear();
CmdStallQueue.Clear(); CmdStallQueue.Clear();
CmdFIFORes.Clear();
ZeroDotWLimit = 0xFFFFFF; ZeroDotWLimit = 0xFFFFFF;
@ -1682,13 +1688,24 @@ void GPU3D::VecTest(u32 param) noexcept
void GPU3D::CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept void GPU3D::CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept
{ {
if (CmdFIFO.IsEmpty() && !CmdPIPE.IsFull()) /*if (CmdFIFO.IsEmpty() && !CmdPIPE.IsFull())
{ {
if (CmdFIFORes.IsFull())
{
// store it to the stall queue. stall the system.
// worst case is if a STMxx opcode causes this, which is why our stall queue
// has 64 entries. this is less complicated than trying to make STMxx stall-able.
CmdStallQueue.Write(entry);
NDS.GXFIFOStall();
return;
}
CmdPIPE.Write(entry); CmdPIPE.Write(entry);
} }
else else
{ {
if (CmdFIFO.IsFull()) if (CmdFIFORes.IsFull())
{ {
// store it to the stall queue. stall the system. // store it to the stall queue. stall the system.
// worst case is if a STMxx opcode causes this, which is why our stall queue // worst case is if a STMxx opcode causes this, which is why our stall queue
@ -1700,7 +1717,20 @@ void GPU3D::CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept
} }
CmdFIFO.Write(entry); CmdFIFO.Write(entry);
}*/
if (CmdFIFORes.IsFull())
{
// store it to the stall queue. stall the system.
// worst case is if a STMxx opcode causes this, which is why our stall queue
// has 64 entries. this is less complicated than trying to make STMxx stall-able.
CmdStallQueue.Write(entry);
NDS.GXFIFOStall();
return;
} }
CurCmd = entry;
GXStat |= (1<<27); GXStat |= (1<<27);
@ -1714,37 +1744,39 @@ void GPU3D::CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept
GXStat |= (1<<0); // box/pos/vec test GXStat |= (1<<0); // box/pos/vec test
NumTestCommands++; NumTestCommands++;
} }
CycleCount = 0;
ExecuteCommand();
CmdFIFORes.Write(CycleCount);
CheckFIFOIRQ();
} }
GPU3D::CmdFIFOEntry GPU3D::CmdFIFORead() noexcept GPU3D::CmdFIFOEntry GPU3D::CmdFIFORead() noexcept
{ {
CmdFIFOEntry ret = CmdPIPE.Read(); CmdFIFOEntry ret;//CmdPIPE.Read();
if (CmdPIPE.Level() <= 2) /*if (CmdPIPE.Level() <= 2)
{ {
if (!CmdFIFO.IsEmpty()) if (!CmdFIFO.IsEmpty())
CmdPIPE.Write(CmdFIFO.Read()); CmdPIPE.Write(CmdFIFO.Read());
if (!CmdFIFO.IsEmpty()) if (!CmdFIFO.IsEmpty())
CmdPIPE.Write(CmdFIFO.Read()); CmdPIPE.Write(CmdFIFO.Read());*/
// empty stall queue if needed // empty stall queue if needed
// CmdFIFO should not be full at this point. // CmdFIFO should not be full at this point.
if (!CmdStallQueue.IsEmpty()) if (!CmdStallQueue.IsEmpty())
{ {
while (!CmdStallQueue.IsEmpty()) ret = CmdStallQueue.Read();
{
if (CmdFIFO.IsFull()) break;
CmdFIFOEntry entry = CmdStallQueue.Read();
CmdFIFOWrite(entry);
}
if (CmdStallQueue.IsEmpty()) if (CmdStallQueue.IsEmpty())
NDS.GXFIFOUnstall(); NDS.GXFIFOUnstall();
}
CheckFIFODMA();
CheckFIFOIRQ();
} }
else ret = CurCmd;
//CheckFIFODMA();
//CheckFIFOIRQ();
//}
return ret; return ret;
} }
@ -2353,10 +2385,26 @@ void GPU3D::ExecuteCommand() noexcept
} }
} }
void GPU3D::ResolveCommands() noexcept
{
u32 cycles = CmdFIFORes.Peek();
if (cycles <= -CycleCount)
{
CycleCount += cycles;
CmdFIFORes.Read();
}
else
{
CmdFIFORes.Edit(cycles + CycleCount);
CycleCount = 0;
}
}
s32 GPU3D::CyclesToRunFor() const noexcept s32 GPU3D::CyclesToRunFor() const noexcept
{ {
if (CycleCount < 0) return 0; //if (CycleCount < 0) return 0;
return CycleCount; return CmdFIFORes.Peek();//CycleCount;
} }
void GPU3D::FinishWork(s32 cycles) noexcept void GPU3D::FinishWork(s32 cycles) noexcept
@ -2376,28 +2424,38 @@ void GPU3D::FinishWork(s32 cycles) noexcept
void GPU3D::Run() noexcept void GPU3D::Run() noexcept
{ {
if (!GeometryEnabled || FlushRequest || if (!GeometryEnabled || FlushRequest ||
(CmdPIPE.IsEmpty() && !(GXStat & (1<<27)))) (CmdFIFORes.IsEmpty() && !(GXStat & (1<<27))))
{ {
Timestamp = std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift; Timestamp = std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift;
return; return;
} }
s32 cycles = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift) - Timestamp; s32 cycles = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift) - Timestamp;
CycleCount -= cycles; CycleCount = -cycles;
Timestamp = std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift; Timestamp = std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) >> NDS.ARM9ClockShift;
if (CycleCount <= 0) if (CycleCount < 0)
{ {
while (CycleCount <= 0 && !CmdPIPE.IsEmpty()) while (CycleCount < 0 && !CmdFIFORes.IsEmpty())
{ {
if (NumPushPopCommands == 0) GXStat &= ~(1<<14); if (NumPushPopCommands == 0) GXStat &= ~(1<<14);
if (NumTestCommands == 0) GXStat &= ~(1<<0); if (NumTestCommands == 0) GXStat &= ~(1<<0);
ExecuteCommand(); //ExecuteCommand();
ResolveCommands();
if (!CmdStallQueue.IsEmpty() && !CmdFIFORes.IsFull())
{
s32 oldcycle = CycleCount;
CycleCount = 0;
ExecuteCommand();
CmdFIFORes.Write(CycleCount);
CycleCount = oldcycle;
CheckFIFOIRQ();
}
} }
} }
if (CycleCount <= 0 && CmdPIPE.IsEmpty()) if (CycleCount < 0 && CmdFIFORes.IsEmpty())
{ {
if (GXStat & (1<<27)) FinishWork(-CycleCount); if (GXStat & (1<<27)) FinishWork(-CycleCount);
else CycleCount = 0; else CycleCount = 0;
@ -2408,23 +2466,102 @@ void GPU3D::Run() noexcept
} }
void GPU3D::EventFIFOEmpty(u32 param)
{
if ((GXStat >> 30) == 2) NDS.SetIRQ(0, IRQ_GXFIFO);
}
void GPU3D::EventFIFOHalf(u32 param)
{
NDS.CheckDMAs(0, 0x07);
if ((GXStat >> 30) == 1)
NDS.SetIRQ(0, IRQ_GXFIFO);
if ((GXStat >> 30) == 2)
{
if (CmdFIFORes.Level() > 4)
{
u64 entries = CmdFIFORes.Level() - 4;
u64 time = 0;
for (int i = 0; i < entries; i++)
{
time += CmdFIFORes.Peek(i);
}
NDS.CancelEvent(Event_GX);
NDS.ScheduleEvent(Event_GX, false, time, 0, 0);
NDS.ClearIRQ(0, IRQ_GXFIFO);
}
else
{
EventFIFOEmpty(0);
}
}
}
void GPU3D::CheckFIFOIRQ() noexcept void GPU3D::CheckFIFOIRQ() noexcept
{ {
bool irq = false; u8 irq = (GXStat >> 30);
switch (GXStat >> 30) if (NDS.DMAsInMode(0, 0x07) || (irq == 1))
{ {
case 1: irq = (CmdFIFO.Level() < 128); break; if ((CmdFIFORes.Level() >= 128+4))
case 2: irq = CmdFIFO.IsEmpty(); break; {
} u64 time = 0;
u64 entries = CmdFIFORes.Level() - (127+4);
for (int i = 0; i < entries; i++)
{
time += CmdFIFORes.Peek(i);
}
if (irq) NDS.SetIRQ(0, IRQ_GXFIFO, CycleCount); NDS.CancelEvent(Event_GX);
else NDS.ClearIRQ(0, IRQ_GXFIFO); NDS.ScheduleEvent(Event_GX, false, time, 1, 0);
NDS.ClearIRQ(0, IRQ_GXFIFO);
}
else
{
EventFIFOHalf(0);
}
}
else if (irq == 2)
{
if (CmdFIFORes.Level() > 4)
{
u64 entries = CmdFIFORes.Level() - 4;
u64 time = 0;
for (int i = 0; i < entries; i++)
{
time += CmdFIFORes.Peek(i);
}
NDS.CancelEvent(Event_GX);
NDS.ScheduleEvent(Event_GX, false, time, 0, 0);
NDS.ClearIRQ(0, IRQ_GXFIFO);
}
else
{
EventFIFOEmpty(0);
}
}
} }
void GPU3D::CheckFIFODMA() noexcept void GPU3D::CheckFIFODMA() noexcept
{ {
if (CmdFIFO.Level() < 128) if (CmdFIFORes.Level() < 128+4)
NDS.CheckDMAs(0, 0x07); NDS.CheckDMAs(0, 0x07);
else
{
u64 time = 0;
u64 entries = CmdFIFORes.Level() - (127+4);
for (int i = 0; i < entries; i++)
{
time += CmdFIFORes.Peek(i);
}
NDS.CancelEvent(Event_GX);
NDS.ScheduleEvent(Event_GX, false, time, 1, 0);
NDS.ClearIRQ(0, IRQ_GXFIFO);
}
} }
void GPU3D::VCount144(GPU& gpu) noexcept void GPU3D::VCount144(GPU& gpu) noexcept

View file

@ -154,6 +154,9 @@ private:
void VecTest(u32 param) noexcept; void VecTest(u32 param) noexcept;
void CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept; void CmdFIFOWrite(const CmdFIFOEntry& entry) noexcept;
CmdFIFOEntry CmdFIFORead() noexcept; CmdFIFOEntry CmdFIFORead() noexcept;
void ResolveCommands() noexcept;
void EventFIFOEmpty(u32 param);
void EventFIFOHalf(u32 param);
void FinishWork(s32 cycles) noexcept; void FinishWork(s32 cycles) noexcept;
void VertexPipelineSubmitCmd() noexcept void VertexPipelineSubmitCmd() noexcept
{ {
@ -197,6 +200,9 @@ public:
FIFO<CmdFIFOEntry, 64> CmdStallQueue {}; FIFO<CmdFIFOEntry, 64> CmdStallQueue {};
CmdFIFOEntry CurCmd {};
FIFO<u32, 260> CmdFIFORes {};
u32 ZeroDotWLimit = 0xFFFFFF; u32 ZeroDotWLimit = 0xFFFFFF;
u32 GXStat = 0; u32 GXStat = 0;

View file

@ -2313,7 +2313,7 @@ void NDS::GXFIFOStall()
if (CurCPU == 1) ARM9.Halt(2); if (CurCPU == 1) ARM9.Halt(2);
else else
{ {
DMAs[0].StallIfRunning(); /*DMAs[0].StallIfRunning();
DMAs[1].StallIfRunning(); DMAs[1].StallIfRunning();
DMAs[2].StallIfRunning(); DMAs[2].StallIfRunning();
DMAs[3].StallIfRunning(); DMAs[3].StallIfRunning();
@ -2321,8 +2321,9 @@ void NDS::GXFIFOStall()
{ {
auto& dsi = dynamic_cast<melonDS::DSi&>(*this); auto& dsi = dynamic_cast<melonDS::DSi&>(*this);
dsi.StallNDMAs(); dsi.StallNDMAs();
} }*/
} }
Reschedule(std::max(ARM9Timestamp, DMA9Timestamp) >> ARM9ClockShift);
} }
void NDS::GXFIFOUnstall() void NDS::GXFIFOUnstall()

View file

@ -58,6 +58,7 @@ enum
Event_Wifi, Event_Wifi,
Event_RTC, Event_RTC,
Event_GX,
Event_DisplayFIFO, Event_DisplayFIFO,
Event_ROMTransfer, Event_ROMTransfer,
Event_ROMSPITransfer, Event_ROMSPITransfer,