From b1cc005316b2d4d04e6fa0ad45a072f7fc2aec35 Mon Sep 17 00:00:00 2001
From: Ziemas <ziemas@ziemas.se>
Date: Wed, 5 Nov 2025 14:20:03 +0100
Subject: [PATCH 1/3] SPU: clang-format mixer.cpp

---
 pcsx2/SPU2/Mixer.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pcsx2/SPU2/Mixer.cpp b/pcsx2/SPU2/Mixer.cpp
index 97ee5a9a52..36d2fb912a 100644
--- a/pcsx2/SPU2/Mixer.cpp
+++ b/pcsx2/SPU2/Mixer.cpp
@@ -281,7 +281,7 @@ static __forceinline void CalculateADSR(V_Core& thiscore, uint voiceidx)
 __forceinline static s32 GaussianInterpolate(s32 pv4, s32 pv3, s32 pv2, s32 pv1, s32 i)
 {
 	s32 out = 0;
-	out =  (interpTable[i][0] * pv4) >> 15;
+	out += (interpTable[i][0] * pv4) >> 15;
 	out += (interpTable[i][1] * pv3) >> 15;
 	out += (interpTable[i][2] * pv2) >> 15;
 	out += (interpTable[i][3] * pv1) >> 15;
@@ -533,7 +533,8 @@ StereoOut32 V_Core::Mix(const VoiceMixSet& inVoices, const StereoOut32& Input, c
 	return TD + ApplyVolume(RV, FxVol);
 }
 
-static StereoOut32 DCFilter(StereoOut32 input) {
+static StereoOut32 DCFilter(StereoOut32 input)
+{
 	// A simple DC blocking high-pass filter
 	// Implementation from http://peabody.sapp.org/class/dmp2/lab/dcblock/
 	// The magic number 0x7f5c is ceil(INT16_MAX * 0.995)
@@ -634,9 +635,9 @@ __forceinline void spu2Mix()
 			if (SPU2::MsgCache())
 			{
 				SPU2::ConLog(" * SPU2 > CacheStats > Hits: %d  Misses: %d  Ignores: %d\n",
-					   g_counter_cache_hits,
-					   g_counter_cache_misses,
-					   g_counter_cache_ignores);
+					g_counter_cache_hits,
+					g_counter_cache_misses,
+					g_counter_cache_ignores);
 			}
 
 			g_counter_cache_hits =

From 77dd916a6bbaf13a45a03bfd59f55755e5b05740 Mon Sep 17 00:00:00 2001
From: Ziemas <ziemas@ziemas.se>
Date: Sun, 26 Oct 2025 20:25:50 +0100
Subject: [PATCH 2/3] SPU: Remove unused voice struct members

Might as well If the saveversion is already being bumped.

[SAVEVERSION+]
---
 pcsx2/SPU2/defs.h      | 7 -------
 pcsx2/SPU2/spu2sys.cpp | 1 -
 pcsx2/SaveState.h      | 2 +-
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/pcsx2/SPU2/defs.h b/pcsx2/SPU2/defs.h
index 80713657d8..5aeae73bd6 100644
--- a/pcsx2/SPU2/defs.h
+++ b/pcsx2/SPU2/defs.h
@@ -256,12 +256,6 @@ struct V_Voice
 	// Sample pointer (19:12 bit fixed point)
 	s32 SP;
 
-	// Sample pointer for Cubic Interpolation
-	// Cubic interpolation mixes a sample behind Linear, so that it
-	// can have sample data to either side of the end points from which
-	// to extrapolate.  This SP represents that late sample position.
-	s32 SPc;
-
 	// Previous sample values - used for interpolation
 	// Inverted order of these members to match the access order in the
 	//   code (might improve cache hits).
@@ -272,7 +266,6 @@ struct V_Voice
 
 	// Last outputted audio value, used for voice modulation.
 	s32 OutX;
-	s32 NextCrest; // temp value for Crest calculation
 
 	// SBuffer now points directly to an ADPCM cache entry.
 	s16* SBuffer;
diff --git a/pcsx2/SPU2/spu2sys.cpp b/pcsx2/SPU2/spu2sys.cpp
index 18a5690a39..f46ba745bd 100644
--- a/pcsx2/SPU2/spu2sys.cpp
+++ b/pcsx2/SPU2/spu2sys.cpp
@@ -228,7 +228,6 @@ void V_Voice::Start()
 
 	PV1 = PV2 = 0;
 	PV3 = PV4 = 0;
-	NextCrest = -0x8000;
 }
 
 void V_Voice::Stop()
diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h
index f073919477..4d1a5ea679 100644
--- a/pcsx2/SaveState.h
+++ b/pcsx2/SaveState.h
@@ -25,7 +25,7 @@ enum class FreezeAction
 // [SAVEVERSION+]
 // This informs the auto updater that the users savestates will be invalidated.
 
-static const u32 g_SaveVersion = (0x9A55 << 16) | 0x0000;
+static const u32 g_SaveVersion = (0x9A56 << 16) | 0x0000;
 
 
 // the freezing data between submodules and core

From 8328b4e278f3cf934d71c661f21f334b3770531c Mon Sep 17 00:00:00 2001
From: Ziemas <ziemas@ziemas.se>
Date: Wed, 5 Nov 2025 22:06:52 +0100
Subject: [PATCH 3/3] SPU: Emulate voice decode buffers

This makes the timing of NAX advancing more similar to console since it
emulates the decode buffer behaviour of it rushing ahead of playback
until the buffer is full.

It also makes interpolation of the first four samples more correct by
using real data instead of the zero filled previous values.

[SAVEVERSION+]
---
 pcsx2/SPU2/Debug.cpp   |   1 -
 pcsx2/SPU2/Mixer.cpp   | 204 +++++++++++++++++++----------------------
 pcsx2/SPU2/defs.h      |  14 +--
 pcsx2/SPU2/spu2sys.cpp |  20 ++--
 pcsx2/SaveState.h      |   2 +-
 5 files changed, 106 insertions(+), 135 deletions(-)

diff --git a/pcsx2/SPU2/Debug.cpp b/pcsx2/SPU2/Debug.cpp
index 0b6e0d7aba..9bc5c7053c 100644
--- a/pcsx2/SPU2/Debug.cpp
+++ b/pcsx2/SPU2/Debug.cpp
@@ -200,7 +200,6 @@ void SPU2::DoFullDump()
 				fprintf(dump, "  - Sound Start Address: %x\n", Cores[c].Voices[v].StartA);
 				fprintf(dump, "  - Next Data Address:   %x\n", Cores[c].Voices[v].NextA);
 				fprintf(dump, "  - Play Status:         %s\n", (Cores[c].Voices[v].ADSR.Phase > 0) ? "Playing" : "Not Playing");
-				fprintf(dump, "  - Block Sample:        %d\n", Cores[c].Voices[v].SCurrent);
 			}
 			fprintf(dump, "#### END OF DUMP.\n\n");
 		}
diff --git a/pcsx2/SPU2/Mixer.cpp b/pcsx2/SPU2/Mixer.cpp
index 36d2fb912a..f807c0eb79 100644
--- a/pcsx2/SPU2/Mixer.cpp
+++ b/pcsx2/SPU2/Mixer.cpp
@@ -89,55 +89,13 @@ int g_counter_cache_ignores = 0;
 #define XAFLAG_LOOP (1ul << 1)
 #define XAFLAG_LOOP_START (1ul << 2)
 
-static __forceinline s32 GetNextDataBuffered(V_Core& thiscore, uint voiceidx)
+static __forceinline void GetNextDataBuffered(V_Core& thiscore, uint voiceidx)
 {
 	V_Voice& vc(thiscore.Voices[voiceidx]);
 
-	if ((vc.SCurrent & 3) == 0)
+	if (vc.SBuffer == nullptr)
 	{
-		IncrementNextA(thiscore, voiceidx);
-
-		if ((vc.NextA & 7) == 0) // vc.SCurrent == 24 equivalent
-		{
-			if (vc.LoopFlags & XAFLAG_LOOP_END)
-			{
-				thiscore.Regs.ENDX |= (1 << voiceidx);
-				vc.NextA = vc.LoopStartA | 1;
-				if (!(vc.LoopFlags & XAFLAG_LOOP))
-				{
-					vc.Stop();
-
-					if (IsDevBuild)
-					{
-						if (SPU2::MsgVoiceOff())
-							SPU2::ConLog("* SPU2: Voice Off by EndPoint: %d \n", voiceidx);
-					}
-				}
-			}
-			else
-				vc.NextA++; // no, don't IncrementNextA here.  We haven't read the header yet.
-		}
-	}
-
-	if (vc.SCurrent == 28)
-	{
-		vc.SCurrent = 0;
-
-		// We'll need the loop flags and buffer pointers regardless of cache status:
-
-		for (int i = 0; i < 2; i++)
-			if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8))
-				SetIrqCall(i);
-
-		s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8);
-		vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte.
-
-		if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode)
-		{
-			vc.LoopStartA = vc.NextA & 0xFFFF8;
-		}
-
-		const int cacheIdx = vc.NextA / pcm_WordsPerBlock;
+		const int cacheIdx = (vc.NextA & 0xFFFF8) / pcm_WordsPerBlock;
 		PcmCacheEntry& cacheLine = pcm_cache_data[cacheIdx];
 		vc.SBuffer = cacheLine.Sampledata;
 
@@ -172,46 +130,18 @@ static __forceinline s32 GetNextDataBuffered(V_Core& thiscore, uint voiceidx)
 					g_counter_cache_misses++;
 			}
 
+
+			s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8);
 			XA_decode_block(vc.SBuffer, memptr, vc.Prev1, vc.Prev2);
 		}
 	}
 
-	return vc.SBuffer[vc.SCurrent++];
-}
-
-static __forceinline void GetNextDataDummy(V_Core& thiscore, uint voiceidx)
-{
-	V_Voice& vc(thiscore.Voices[voiceidx]);
-
-	IncrementNextA(thiscore, voiceidx);
-
-	if ((vc.NextA & 7) == 0) // vc.SCurrent == 24 equivalent
+	// Get the sample index for NextA, we have to subtract 1 to ignore the loop header
+	int sampleIdx = ((vc.NextA % pcm_WordsPerBlock) - 1) * 4;
+	for (int i = 0; i < 4; i++)
 	{
-		if (vc.LoopFlags & XAFLAG_LOOP_END)
-		{
-			thiscore.Regs.ENDX |= (1 << voiceidx);
-			vc.NextA = vc.LoopStartA | 1;
-		}
-		else
-			vc.NextA++; // no, don't IncrementNextA here.  We haven't read the header yet.
+		vc.DecodeFifo[(vc.DecPosWrite + i) % 32] = vc.SBuffer[sampleIdx + i];
 	}
-
-	if (vc.SCurrent == 28)
-	{
-		for (int i = 0; i < 2; i++)
-			if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8))
-				SetIrqCall(i);
-
-		vc.LoopFlags = *GetMemPtr(vc.NextA & 0xFFFF8) >> 8; // grab loop flags from the upper byte.
-
-		if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode)
-			vc.LoopStartA = vc.NextA & 0xFFFF8;
-
-		vc.SCurrent = 0;
-	}
-
-	vc.SP -= 0x1000 * (4 - (vc.SCurrent & 3));
-	vc.SCurrent += 4 - (vc.SCurrent & 3);
 }
 
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -237,6 +167,69 @@ static __forceinline StereoOut32 ApplyVolume(const StereoOut32& data, const V_Vo
 		ApplyVolume(data.Right, volume.Right.Value));
 }
 
+static __forceinline void UpdateBlockHeader(V_Core& thiscore, uint voiceidx)
+{
+	V_Voice& vc(thiscore.Voices[voiceidx]);
+
+	for (int i = 0; i < 2; i++)
+		if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8))
+			SetIrqCall(i);
+
+	s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8);
+	vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte.
+
+	if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode)
+	{
+		vc.LoopStartA = vc.NextA & 0xFFFF8;
+	}
+}
+
+static __forceinline void DecodeSamples(uint coreidx, uint voiceidx)
+{
+	V_Core& thiscore(Cores[coreidx]);
+	V_Voice& vc(thiscore.Voices[voiceidx]);
+
+	// Update the block header on every audio frame
+	UpdateBlockHeader(thiscore, voiceidx);
+
+	// When a voice is started at 0 pitch, NAX quickly advances to SSA + 5
+	// So that would mean the decode buffer holds around 12 samples
+	if (((int)(vc.DecPosWrite - vc.DecPosRead)) > 12) {
+		// Sufficient data buffered
+		return;
+	}
+
+	if (vc.ADSR.Phase > V_ADSR::PHASE_STOPPED)
+	{
+		GetNextDataBuffered(thiscore, voiceidx);
+	}
+
+	vc.DecPosWrite += 4;
+
+	IncrementNextA(thiscore, voiceidx);
+	if ((vc.NextA & 7) == 0)
+	{
+		if (vc.LoopFlags & XAFLAG_LOOP_END)
+		{
+			thiscore.Regs.ENDX |= (1 << voiceidx);
+			vc.NextA = vc.LoopStartA;
+			if (!(vc.LoopFlags & XAFLAG_LOOP))
+			{
+				vc.Stop();
+
+				if (IsDevBuild)
+				{
+					if (SPU2::MsgVoiceOff())
+						SPU2::ConLog("* SPU2: Voice Off by EndPoint: %d \n", voiceidx);
+				}
+			}
+		}
+
+		IncrementNextA(thiscore, voiceidx);
+		vc.SBuffer = nullptr;
+	}
+}
+
 static void __forceinline UpdatePitch(uint coreidx, uint voiceidx)
 {
 	V_Voice& vc(Cores[coreidx].Voices[voiceidx]);
@@ -278,33 +271,27 @@ static __forceinline void CalculateADSR(V_Core& thiscore, uint voiceidx)
 	pxAssume(vc.ADSR.Value >= 0); // ADSR should never be negative...
 }
 
-__forceinline static s32 GaussianInterpolate(s32 pv4, s32 pv3, s32 pv2, s32 pv1, s32 i)
+static __forceinline void ConsumeSamples(V_Core& thiscore, uint voiceidx)
 {
-	s32 out = 0;
-	out += (interpTable[i][0] * pv4) >> 15;
-	out += (interpTable[i][1] * pv3) >> 15;
-	out += (interpTable[i][2] * pv2) >> 15;
-	out += (interpTable[i][3] * pv1) >> 15;
+	V_Voice& vc(thiscore.Voices[voiceidx]);
 
-	return out;
+	int consumed = vc.SP >> 12;
+	vc.SP &= 0xfff;
+	vc.DecPosRead += consumed;
 }
 
 static __forceinline s32 GetVoiceValues(V_Core& thiscore, uint voiceidx)
 {
 	V_Voice& vc(thiscore.Voices[voiceidx]);
 
-	while (vc.SP >= 0)
-	{
-		vc.PV4 = vc.PV3;
-		vc.PV3 = vc.PV2;
-		vc.PV2 = vc.PV1;
-		vc.PV1 = GetNextDataBuffered(thiscore, voiceidx);
-		vc.SP -= 0x1000;
-	}
+	int phase = (vc.SP & 0x0ff0) >> 4;
+	s32 out = 0;
+	out += (interpTable[phase][0] * vc.DecodeFifo[(vc.DecPosRead + 0) % 32]) >> 15;
+	out += (interpTable[phase][1] * vc.DecodeFifo[(vc.DecPosRead + 1) % 32]) >> 15;
+	out += (interpTable[phase][2] * vc.DecodeFifo[(vc.DecPosRead + 2) % 32]) >> 15;
+	out += (interpTable[phase][3] * vc.DecodeFifo[(vc.DecPosRead + 3) % 32]) >> 15;
 
-	const s32 mu = vc.SP + 0x1000;
-
-	return GaussianInterpolate(vc.PV4, vc.PV3, vc.PV2, vc.PV1, (mu & 0x0ff0) >> 4);
+	return out;
 }
 
 // This is Dr. Hell's noise algorithm as implemented in pcsxr
@@ -382,21 +369,13 @@ static __forceinline StereoOut32 MixVoice(uint coreidx, uint voiceidx)
 	V_Core& thiscore(Cores[coreidx]);
 	V_Voice& vc(thiscore.Voices[voiceidx]);
 
-	// If this assertion fails, it mans SCurrent is being corrupted somewhere, or is not initialized
-	// properly.  Invalid values in SCurrent will cause errant IRQs and corrupted audio.
-	pxAssertMsg((vc.SCurrent <= 28) && (vc.SCurrent != 0), "Current sample should always range from 1->28");
-
 	// Most games don't use much volume slide effects.  So only call the UpdateVolume
 	// methods when needed by checking the flag outside the method here...
 	// (Note: Ys 6 : Ark of Nephistm uses these effects)
 
 	vc.Volume.Update();
 
-	// SPU2 Note: The spu2 continues to process voices for eternity, always, so we
-	// have to run through all the motions of updating the voice regardless of it's
-	// audible status.  Otherwise IRQs might not trigger and emulation might fail.
-
-	UpdatePitch(coreidx, voiceidx);
+	DecodeSamples(coreidx, voiceidx);
 
 	StereoOut32 voiceOut(0, 0);
 	s32 Value = 0;
@@ -419,11 +398,14 @@ static __forceinline StereoOut32 MixVoice(uint coreidx, uint voiceidx)
 
 		voiceOut = ApplyVolume(StereoOut32(Value, Value), vc.Volume);
 	}
-	else
-	{
-		while (vc.SP >= 0)
-			GetNextDataDummy(thiscore, voiceidx); // Dummy is enough
-	}
+
+	// SPU2 Note: The spu2 continues to process voices for eternity, always, so we
+	// have to run through all the motions of updating the voice regardless of it's
+	// audible status.  Otherwise IRQs might not trigger and emulation might fail.
+
+	UpdatePitch(coreidx, voiceidx);
+
+	ConsumeSamples(thiscore, voiceidx);
 
 	// Write-back of raw voice data (post ADSR applied)
 	if (voiceidx == 1)
diff --git a/pcsx2/SPU2/defs.h b/pcsx2/SPU2/defs.h
index 5aeae73bd6..2fd12015c5 100644
--- a/pcsx2/SPU2/defs.h
+++ b/pcsx2/SPU2/defs.h
@@ -256,22 +256,16 @@ struct V_Voice
 	// Sample pointer (19:12 bit fixed point)
 	s32 SP;
 
-	// Previous sample values - used for interpolation
-	// Inverted order of these members to match the access order in the
-	//   code (might improve cache hits).
-	s32 PV4;
-	s32 PV3;
-	s32 PV2;
-	s32 PV1;
-
 	// Last outputted audio value, used for voice modulation.
 	s32 OutX;
 
 	// SBuffer now points directly to an ADPCM cache entry.
 	s16* SBuffer;
 
-	// sample position within the current decoded packet.
-	s32 SCurrent;
+	// Each voice has a buffer of decoded samples
+	s32 DecodeFifo[32];
+	u32 DecPosWrite;
+	u32 DecPosRead;
 
 	// it takes a few ticks for voices to start on the real SPU2?
 	void Start();
diff --git a/pcsx2/SPU2/spu2sys.cpp b/pcsx2/SPU2/spu2sys.cpp
index f46ba745bd..54086c78ad 100644
--- a/pcsx2/SPU2/spu2sys.cpp
+++ b/pcsx2/SPU2/spu2sys.cpp
@@ -181,7 +181,6 @@ void V_Core::Init(int index)
 		VoiceGates[v].WetR = -1;
 
 		Voices[v].Volume = V_VolumeSlideLR(0, 0); // V_VolumeSlideLR::Max;
-		Voices[v].SCurrent = 28;
 
 		Voices[v].ADSR.Counter = 0;
 		Voices[v].ADSR.Value = 0;
@@ -190,6 +189,10 @@ void V_Core::Init(int index)
 		Voices[v].NextA = 0x2801;
 		Voices[v].StartA = 0x2800;
 		Voices[v].LoopStartA = 0x2800;
+
+		memset(Voices[v].DecodeFifo, 0, sizeof(Voices[v].DecodeFifo));
+		Voices[v].DecPosRead = 0;
+		Voices[v].DecPosWrite = 0;
 	}
 
 	DMAICounter = 0;
@@ -212,22 +215,18 @@ void V_Voice::Start()
 	}
 
 	ADSR.Attack();
-	SCurrent = 28;
 	LoopMode = 0;
 
-	// When SP >= 0 the next sample will be grabbed, we don't want this to happen
-	// instantly because in the case of pitch being 0 we want to delay getting
-	// the next block header. This is a hack to work around the fact that unlike
-	// the HW we don't update the block header on every cycle.
-	SP = -1;
+	SP = 0;
 
 	LoopFlags = 0;
 	NextA = StartA | 1;
 	Prev1 = 0;
 	Prev2 = 0;
 
-	PV1 = PV2 = 0;
-	PV3 = PV4 = 0;
+	SBuffer = nullptr;
+	DecPosRead = 0;
+	DecPosWrite = 0;
 }
 
 void V_Voice::Stop()
@@ -1014,12 +1013,10 @@ static void RegWrite_VoiceAddr(u16 value)
 			// Wallace And Gromit: Curse Of The Were-Rabbit.
 
 			thisvoice.NextA = ((u32)(value & 0x0F) << 16) | (thisvoice.NextA & 0xFFF8) | 1;
-			thisvoice.SCurrent = 28;
 			break;
 
 		case 5:
 			thisvoice.NextA = (thisvoice.NextA & 0x0F0000) | (value & 0xFFF8) | 1;
-			thisvoice.SCurrent = 28;
 			break;
 	}
 }
@@ -1237,7 +1234,6 @@ static void RegWrite_Core(u16 value)
 				for (uint v = 0; v < 24; ++v)
 				{
 					Cores[1].Voices[v].Volume = V_VolumeSlideLR(0, 0); // V_VolumeSlideLR::Max;
-					Cores[1].Voices[v].SCurrent = 28;
 
 					Cores[1].Voices[v].ADSR.Value = 0;
 					Cores[1].Voices[v].ADSR.Phase = 0;
diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h
index 4d1a5ea679..11bc3a5e90 100644
--- a/pcsx2/SaveState.h
+++ b/pcsx2/SaveState.h
@@ -25,7 +25,7 @@ enum class FreezeAction
 // [SAVEVERSION+]
 // This informs the auto updater that the users savestates will be invalidated.
 
-static const u32 g_SaveVersion = (0x9A56 << 16) | 0x0000;
+static const u32 g_SaveVersion = (0x9A57 << 16) | 0x0000;
 
 
 // the freezing data between submodules and core