mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
GS/SW: Use non-saturating ARM instructions for color gradient setup.
Some checks are pending
🐧 Linux Builds / AppImage (push) Waiting to run
🐧 Linux Builds / Flatpak (push) Waiting to run
🍎 MacOS Builds / Defaults (push) Waiting to run
🖥️ Windows Builds / Lint VS Project Files (push) Waiting to run
🖥️ Windows Builds / SSE4 (push) Blocked by required conditions
🖥️ Windows Builds / AVX2 (push) Blocked by required conditions
🖥️ Windows Builds / CMake (push) Waiting to run
Some checks are pending
🐧 Linux Builds / AppImage (push) Waiting to run
🐧 Linux Builds / Flatpak (push) Waiting to run
🍎 MacOS Builds / Defaults (push) Waiting to run
🖥️ Windows Builds / Lint VS Project Files (push) Waiting to run
🖥️ Windows Builds / SSE4 (push) Blocked by required conditions
🖥️ Windows Builds / AVX2 (push) Blocked by required conditions
🖥️ Windows Builds / CMake (push) Waiting to run
This is more efficient on ARM, though the equivalent instructions are not currently used in the x64 JIT and C++ versions of GSVector. Co-authored-by: TellowKrinkle
This commit is contained in:
parent
a7f5ddfe0d
commit
f322dfb1d4
@ -225,17 +225,13 @@ void GSSetupPrimCodeGenerator::Color()
|
||||
// GSVector4 c = dscan.c;
|
||||
armAsm->Ldr(v16, MemOperand(_dscan, offsetof(GSVertexSW, c)));
|
||||
|
||||
// constexpr VectorI mask16 = VectorI::cxpr(0xFFFF);
|
||||
armAsm->Movi(v17.V4S(), 0xFFFF);
|
||||
|
||||
// local.d4.c = (GSVector4i(dscan.c * step_shift) & mask16).xzyw().pu32();
|
||||
// GSVector4i tmp = GSVector4i(dscan.c * step_shift).xzyw();
|
||||
// local.d4.c = tmp.uzp1_16(tmp); // Not currently in GSVector since that's mainly targeting x86 for now
|
||||
armAsm->Fmul(v2.V4S(), v16.V4S(), v3.V4S());
|
||||
armAsm->Fcvtzs(v2.V4S(), v2.V4S());
|
||||
armAsm->And(v2.V4S(), v17.V4S());
|
||||
armAsm->Rev64(_vscratch.V4S(), v2.V4S());
|
||||
armAsm->Uzp1(v2.V4S(), v2.V4S(), _vscratch.V4S());
|
||||
armAsm->Uqxtn(v2.V4H(), v2.V4S());
|
||||
armAsm->Dup(v2.V2D(), v2.V2D(), 0);
|
||||
armAsm->Uzp1(v2.V8H(), v2.V8H(), v2.V8H());
|
||||
armAsm->Str(v2, MemOperand(_locals, offsetof(GSScanlineLocalData, d4.c)));
|
||||
|
||||
// GSVector4 dr = c.xxxx();
|
||||
@ -246,25 +242,18 @@ void GSSetupPrimCodeGenerator::Color()
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// VectorI r = (VectorI(dr * shift[1 + i]) & mask16).pu32();
|
||||
// VectorI r = VectorI(dr * shift[1 + i]);
|
||||
|
||||
armAsm->Fmul(v2.V4S(), v0.V4S(), VRegister(4 + i, kFormat4S));
|
||||
armAsm->Fcvtzs(v2.V4S(), v2.V4S());
|
||||
armAsm->And(v2.V4S(), v17.V4S());
|
||||
armAsm->Uqxtn(v2.V4H(), v2.V4S());
|
||||
armAsm->Dup(v2.V2D(), v2.V2D(), 0);
|
||||
|
||||
// VectorI b = (VectorI(db * shift[1 + i]) & mask16).pu32();
|
||||
// VectorI b = VectorI(db * shift[1 + i]);
|
||||
|
||||
armAsm->Fmul(v3.V4S(), v1.V4S(), VRegister(4 + i, kFormat4S));
|
||||
armAsm->Fcvtzs(v3.V4S(), v3.V4S());
|
||||
armAsm->And(v3.V4S(), v17.V4S());
|
||||
armAsm->Uqxtn(v3.V4H(), v3.V4S());
|
||||
armAsm->Dup(v3.V2D(), v3.V2D(), 0);
|
||||
|
||||
// m_local.d[i].rb = r.upl16(b);
|
||||
|
||||
armAsm->Zip1(v2.V8H(), v2.V8H(), v3.V8H());
|
||||
// m_local.d[i].rb = r.trn1_16(b); // Not currently in GSVector since that's mainly targeting x86 for now
|
||||
armAsm->Trn1(v2.V8H(), v2.V8H(), v3.V8H());
|
||||
armAsm->Str(v2, _local(d[i].rb));
|
||||
}
|
||||
|
||||
@ -278,25 +267,19 @@ void GSSetupPrimCodeGenerator::Color()
|
||||
|
||||
for (int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// VectorI g = (VectorI(dg * shift[1 + i]) & mask16).pu32();
|
||||
// VectorI g = VectorI(dg * shift[1 + i]);
|
||||
|
||||
armAsm->Fmul(v2.V4S(), v0.V4S(), VRegister(4 + i, kFormat4S));
|
||||
armAsm->Fcvtzs(v2.V4S(), v2.V4S());
|
||||
armAsm->And(v2.V4S(), v17.V4S());
|
||||
armAsm->Uqxtn(v2.V4H(), v2.V4S());
|
||||
armAsm->Dup(v2.V2D(), v2.V2D(), 0);
|
||||
|
||||
// VectorI a = (VectorI(da * shift[1 + i]) & mask16).pu32();
|
||||
// VectorI a = VectorI(da * shift[1 + i]);
|
||||
|
||||
armAsm->Fmul(v3.V4S(), v1.V4S(), VRegister(4 + i, kFormat4S));
|
||||
armAsm->Fcvtzs(v3.V4S(), v3.V4S());
|
||||
armAsm->And(v3.V4S(), v17.V4S());
|
||||
armAsm->Uqxtn(v3.V4H(), v3.V4S());
|
||||
armAsm->Dup(v3.V2D(), v3.V2D(), 0);
|
||||
|
||||
// m_local.d[i].ga = g.upl16(a);
|
||||
// m_local.d[i].ga = g.trn1_16(a); // Not currently in GSVector since that's mainly targeting x86 for now
|
||||
|
||||
armAsm->Zip1(v2.V8H(), v2.V8H(), v3.V8H());
|
||||
armAsm->Trn1(v2.V8H(), v2.V8H(), v3.V8H());
|
||||
armAsm->Str(v2, _local(d[i].ga));
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user