SPU: Emulate voice decode buffers

This makes the timing of NAX advancing more similar to console since it
emulates the decode buffer behaviour of it rushing ahead of playback
until the buffer is full.

It also makes interpolation of the first four samples more correct by
using real data instead of the zero filled previous values.

[SAVEVERSION+]
This commit is contained in:
Ziemas 2025-11-05 22:06:52 +01:00
parent 77dd916a6b
commit 8328b4e278
5 changed files with 106 additions and 135 deletions

View File

@ -200,7 +200,6 @@ void SPU2::DoFullDump()
fprintf(dump, " - Sound Start Address: %x\n", Cores[c].Voices[v].StartA);
fprintf(dump, " - Next Data Address: %x\n", Cores[c].Voices[v].NextA);
fprintf(dump, " - Play Status: %s\n", (Cores[c].Voices[v].ADSR.Phase > 0) ? "Playing" : "Not Playing");
fprintf(dump, " - Block Sample: %d\n", Cores[c].Voices[v].SCurrent);
}
fprintf(dump, "#### END OF DUMP.\n\n");
}

View File

@ -89,55 +89,13 @@ int g_counter_cache_ignores = 0;
#define XAFLAG_LOOP (1ul << 1)
#define XAFLAG_LOOP_START (1ul << 2)
static __forceinline s32 GetNextDataBuffered(V_Core& thiscore, uint voiceidx)
static __forceinline void GetNextDataBuffered(V_Core& thiscore, uint voiceidx)
{
V_Voice& vc(thiscore.Voices[voiceidx]);
if ((vc.SCurrent & 3) == 0)
if (vc.SBuffer == nullptr)
{
IncrementNextA(thiscore, voiceidx);
if ((vc.NextA & 7) == 0) // vc.SCurrent == 24 equivalent
{
if (vc.LoopFlags & XAFLAG_LOOP_END)
{
thiscore.Regs.ENDX |= (1 << voiceidx);
vc.NextA = vc.LoopStartA | 1;
if (!(vc.LoopFlags & XAFLAG_LOOP))
{
vc.Stop();
if (IsDevBuild)
{
if (SPU2::MsgVoiceOff())
SPU2::ConLog("* SPU2: Voice Off by EndPoint: %d \n", voiceidx);
}
}
}
else
vc.NextA++; // no, don't IncrementNextA here. We haven't read the header yet.
}
}
if (vc.SCurrent == 28)
{
vc.SCurrent = 0;
// We'll need the loop flags and buffer pointers regardless of cache status:
for (int i = 0; i < 2; i++)
if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8))
SetIrqCall(i);
s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8);
vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte.
if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode)
{
vc.LoopStartA = vc.NextA & 0xFFFF8;
}
const int cacheIdx = vc.NextA / pcm_WordsPerBlock;
const int cacheIdx = (vc.NextA & 0xFFFF8) / pcm_WordsPerBlock;
PcmCacheEntry& cacheLine = pcm_cache_data[cacheIdx];
vc.SBuffer = cacheLine.Sampledata;
@ -172,46 +130,18 @@ static __forceinline s32 GetNextDataBuffered(V_Core& thiscore, uint voiceidx)
g_counter_cache_misses++;
}
s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8);
XA_decode_block(vc.SBuffer, memptr, vc.Prev1, vc.Prev2);
}
}
return vc.SBuffer[vc.SCurrent++];
}
static __forceinline void GetNextDataDummy(V_Core& thiscore, uint voiceidx)
{
V_Voice& vc(thiscore.Voices[voiceidx]);
IncrementNextA(thiscore, voiceidx);
if ((vc.NextA & 7) == 0) // vc.SCurrent == 24 equivalent
// Get the sample index for NextA, we have to subtract 1 to ignore the loop header
int sampleIdx = ((vc.NextA % pcm_WordsPerBlock) - 1) * 4;
for (int i = 0; i < 4; i++)
{
if (vc.LoopFlags & XAFLAG_LOOP_END)
{
thiscore.Regs.ENDX |= (1 << voiceidx);
vc.NextA = vc.LoopStartA | 1;
}
else
vc.NextA++; // no, don't IncrementNextA here. We haven't read the header yet.
vc.DecodeFifo[(vc.DecPosWrite + i) % 32] = vc.SBuffer[sampleIdx + i];
}
if (vc.SCurrent == 28)
{
for (int i = 0; i < 2; i++)
if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8))
SetIrqCall(i);
vc.LoopFlags = *GetMemPtr(vc.NextA & 0xFFFF8) >> 8; // grab loop flags from the upper byte.
if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode)
vc.LoopStartA = vc.NextA & 0xFFFF8;
vc.SCurrent = 0;
}
vc.SP -= 0x1000 * (4 - (vc.SCurrent & 3));
vc.SCurrent += 4 - (vc.SCurrent & 3);
}
/////////////////////////////////////////////////////////////////////////////////////////
@ -237,6 +167,69 @@ static __forceinline StereoOut32 ApplyVolume(const StereoOut32& data, const V_Vo
ApplyVolume(data.Right, volume.Right.Value));
}
static __forceinline void UpdateBlockHeader(V_Core& thiscore, uint voiceidx)
{
V_Voice& vc(thiscore.Voices[voiceidx]);
for (int i = 0; i < 2; i++)
if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8))
SetIrqCall(i);
s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8);
vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte.
if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode)
{
vc.LoopStartA = vc.NextA & 0xFFFF8;
}
}
static __forceinline void DecodeSamples(uint coreidx, uint voiceidx)
{
V_Core& thiscore(Cores[coreidx]);
V_Voice& vc(thiscore.Voices[voiceidx]);
// Update the block header on every audio frame
UpdateBlockHeader(thiscore, voiceidx);
// When a voice is started at 0 pitch, NAX quickly advances to SSA + 5
// So that would mean the decode buffer holds around 12 samples
if (((int)(vc.DecPosWrite - vc.DecPosRead)) > 12) {
// Sufficient data buffered
return;
}
if (vc.ADSR.Phase > V_ADSR::PHASE_STOPPED)
{
GetNextDataBuffered(thiscore, voiceidx);
}
vc.DecPosWrite += 4;
IncrementNextA(thiscore, voiceidx);
if ((vc.NextA & 7) == 0)
{
if (vc.LoopFlags & XAFLAG_LOOP_END)
{
thiscore.Regs.ENDX |= (1 << voiceidx);
vc.NextA = vc.LoopStartA;
if (!(vc.LoopFlags & XAFLAG_LOOP))
{
vc.Stop();
if (IsDevBuild)
{
if (SPU2::MsgVoiceOff())
SPU2::ConLog("* SPU2: Voice Off by EndPoint: %d \n", voiceidx);
}
}
}
IncrementNextA(thiscore, voiceidx);
vc.SBuffer = nullptr;
}
}
static void __forceinline UpdatePitch(uint coreidx, uint voiceidx)
{
V_Voice& vc(Cores[coreidx].Voices[voiceidx]);
@ -278,33 +271,27 @@ static __forceinline void CalculateADSR(V_Core& thiscore, uint voiceidx)
pxAssume(vc.ADSR.Value >= 0); // ADSR should never be negative...
}
__forceinline static s32 GaussianInterpolate(s32 pv4, s32 pv3, s32 pv2, s32 pv1, s32 i)
static __forceinline void ConsumeSamples(V_Core& thiscore, uint voiceidx)
{
s32 out = 0;
out += (interpTable[i][0] * pv4) >> 15;
out += (interpTable[i][1] * pv3) >> 15;
out += (interpTable[i][2] * pv2) >> 15;
out += (interpTable[i][3] * pv1) >> 15;
V_Voice& vc(thiscore.Voices[voiceidx]);
return out;
int consumed = vc.SP >> 12;
vc.SP &= 0xfff;
vc.DecPosRead += consumed;
}
static __forceinline s32 GetVoiceValues(V_Core& thiscore, uint voiceidx)
{
V_Voice& vc(thiscore.Voices[voiceidx]);
while (vc.SP >= 0)
{
vc.PV4 = vc.PV3;
vc.PV3 = vc.PV2;
vc.PV2 = vc.PV1;
vc.PV1 = GetNextDataBuffered(thiscore, voiceidx);
vc.SP -= 0x1000;
}
int phase = (vc.SP & 0x0ff0) >> 4;
s32 out = 0;
out += (interpTable[phase][0] * vc.DecodeFifo[(vc.DecPosRead + 0) % 32]) >> 15;
out += (interpTable[phase][1] * vc.DecodeFifo[(vc.DecPosRead + 1) % 32]) >> 15;
out += (interpTable[phase][2] * vc.DecodeFifo[(vc.DecPosRead + 2) % 32]) >> 15;
out += (interpTable[phase][3] * vc.DecodeFifo[(vc.DecPosRead + 3) % 32]) >> 15;
const s32 mu = vc.SP + 0x1000;
return GaussianInterpolate(vc.PV4, vc.PV3, vc.PV2, vc.PV1, (mu & 0x0ff0) >> 4);
return out;
}
// This is Dr. Hell's noise algorithm as implemented in pcsxr
@ -382,21 +369,13 @@ static __forceinline StereoOut32 MixVoice(uint coreidx, uint voiceidx)
V_Core& thiscore(Cores[coreidx]);
V_Voice& vc(thiscore.Voices[voiceidx]);
// If this assertion fails, it mans SCurrent is being corrupted somewhere, or is not initialized
// properly. Invalid values in SCurrent will cause errant IRQs and corrupted audio.
pxAssertMsg((vc.SCurrent <= 28) && (vc.SCurrent != 0), "Current sample should always range from 1->28");
// Most games don't use much volume slide effects. So only call the UpdateVolume
// methods when needed by checking the flag outside the method here...
// (Note: Ys 6 : Ark of Nephistm uses these effects)
vc.Volume.Update();
// SPU2 Note: The spu2 continues to process voices for eternity, always, so we
// have to run through all the motions of updating the voice regardless of it's
// audible status. Otherwise IRQs might not trigger and emulation might fail.
UpdatePitch(coreidx, voiceidx);
DecodeSamples(coreidx, voiceidx);
StereoOut32 voiceOut(0, 0);
s32 Value = 0;
@ -419,11 +398,14 @@ static __forceinline StereoOut32 MixVoice(uint coreidx, uint voiceidx)
voiceOut = ApplyVolume(StereoOut32(Value, Value), vc.Volume);
}
else
{
while (vc.SP >= 0)
GetNextDataDummy(thiscore, voiceidx); // Dummy is enough
}
// SPU2 Note: The spu2 continues to process voices for eternity, always, so we
// have to run through all the motions of updating the voice regardless of it's
// audible status. Otherwise IRQs might not trigger and emulation might fail.
UpdatePitch(coreidx, voiceidx);
ConsumeSamples(thiscore, voiceidx);
// Write-back of raw voice data (post ADSR applied)
if (voiceidx == 1)

View File

@ -256,22 +256,16 @@ struct V_Voice
// Sample pointer (19:12 bit fixed point)
s32 SP;
// Previous sample values - used for interpolation
// Inverted order of these members to match the access order in the
// code (might improve cache hits).
s32 PV4;
s32 PV3;
s32 PV2;
s32 PV1;
// Last outputted audio value, used for voice modulation.
s32 OutX;
// SBuffer now points directly to an ADPCM cache entry.
s16* SBuffer;
// sample position within the current decoded packet.
s32 SCurrent;
// Each voice has a buffer of decoded samples
s32 DecodeFifo[32];
u32 DecPosWrite;
u32 DecPosRead;
// it takes a few ticks for voices to start on the real SPU2?
void Start();

View File

@ -181,7 +181,6 @@ void V_Core::Init(int index)
VoiceGates[v].WetR = -1;
Voices[v].Volume = V_VolumeSlideLR(0, 0); // V_VolumeSlideLR::Max;
Voices[v].SCurrent = 28;
Voices[v].ADSR.Counter = 0;
Voices[v].ADSR.Value = 0;
@ -190,6 +189,10 @@ void V_Core::Init(int index)
Voices[v].NextA = 0x2801;
Voices[v].StartA = 0x2800;
Voices[v].LoopStartA = 0x2800;
memset(Voices[v].DecodeFifo, 0, sizeof(Voices[v].DecodeFifo));
Voices[v].DecPosRead = 0;
Voices[v].DecPosWrite = 0;
}
DMAICounter = 0;
@ -212,22 +215,18 @@ void V_Voice::Start()
}
ADSR.Attack();
SCurrent = 28;
LoopMode = 0;
// When SP >= 0 the next sample will be grabbed, we don't want this to happen
// instantly because in the case of pitch being 0 we want to delay getting
// the next block header. This is a hack to work around the fact that unlike
// the HW we don't update the block header on every cycle.
SP = -1;
SP = 0;
LoopFlags = 0;
NextA = StartA | 1;
Prev1 = 0;
Prev2 = 0;
PV1 = PV2 = 0;
PV3 = PV4 = 0;
SBuffer = nullptr;
DecPosRead = 0;
DecPosWrite = 0;
}
void V_Voice::Stop()
@ -1014,12 +1013,10 @@ static void RegWrite_VoiceAddr(u16 value)
// Wallace And Gromit: Curse Of The Were-Rabbit.
thisvoice.NextA = ((u32)(value & 0x0F) << 16) | (thisvoice.NextA & 0xFFF8) | 1;
thisvoice.SCurrent = 28;
break;
case 5:
thisvoice.NextA = (thisvoice.NextA & 0x0F0000) | (value & 0xFFF8) | 1;
thisvoice.SCurrent = 28;
break;
}
}
@ -1237,7 +1234,6 @@ static void RegWrite_Core(u16 value)
for (uint v = 0; v < 24; ++v)
{
Cores[1].Voices[v].Volume = V_VolumeSlideLR(0, 0); // V_VolumeSlideLR::Max;
Cores[1].Voices[v].SCurrent = 28;
Cores[1].Voices[v].ADSR.Value = 0;
Cores[1].Voices[v].ADSR.Phase = 0;

View File

@ -25,7 +25,7 @@ enum class FreezeAction
// [SAVEVERSION+]
// This informs the auto updater that the users savestates will be invalidated.
static const u32 g_SaveVersion = (0x9A56 << 16) | 0x0000;
static const u32 g_SaveVersion = (0x9A57 << 16) | 0x0000;
// the freezing data between submodules and core