mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
Common: Switch fp compare instructions to auto SSE/AVX
This commit is contained in:
parent
01a1b017e8
commit
0a76c5a64d
@ -8,10 +8,10 @@ namespace x86Emitter
|
||||
|
||||
struct xImplSimd_MinMax
|
||||
{
|
||||
const xImplSimd_DestRegSSE PS; // packed single precision
|
||||
const xImplSimd_DestRegSSE PD; // packed double precision
|
||||
const xImplSimd_DestRegSSE SS; // scalar single precision
|
||||
const xImplSimd_DestRegSSE SD; // scalar double precision
|
||||
const xImplSimd_3Arg PS; // packed single precision
|
||||
const xImplSimd_3Arg PD; // packed double precision
|
||||
const xImplSimd_3Arg SS; // scalar single precision
|
||||
const xImplSimd_3Arg SD; // scalar double precision
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -20,17 +20,25 @@ namespace x86Emitter
|
||||
{
|
||||
SSE2_ComparisonType CType;
|
||||
|
||||
void PS(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void PS(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void PS(const xRegisterSSE& dst, const xRegisterSSE& src) const { PS(dst, dst, src); }
|
||||
void PS(const xRegisterSSE& dst, const xIndirectVoid& src) const { PS(dst, dst, src); }
|
||||
void PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const;
|
||||
void PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const;
|
||||
|
||||
void PD(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void PD(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void PD(const xRegisterSSE& dst, const xRegisterSSE& src) const { PD(dst, dst, src); }
|
||||
void PD(const xRegisterSSE& dst, const xIndirectVoid& src) const { PD(dst, dst, src); }
|
||||
void PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const;
|
||||
void PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const;
|
||||
|
||||
void SS(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void SS(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void SS(const xRegisterSSE& dst, const xRegisterSSE& src) const { SS(dst, dst, src); }
|
||||
void SS(const xRegisterSSE& dst, const xIndirectVoid& src) const { SS(dst, dst, src); }
|
||||
void SS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const;
|
||||
void SS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const;
|
||||
|
||||
void SD(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void SD(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void SD(const xRegisterSSE& dst, const xRegisterSSE& src) const { SD(dst, dst, src); }
|
||||
void SD(const xRegisterSSE& dst, const xIndirectVoid& src) const { SD(dst, dst, src); }
|
||||
void SD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const;
|
||||
void SD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -38,8 +46,8 @@ namespace x86Emitter
|
||||
//
|
||||
struct xImplSimd_COMI
|
||||
{
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
const xImplSimd_2Arg SS;
|
||||
const xImplSimd_2Arg SD;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@ -446,32 +446,32 @@ namespace x86Emitter
|
||||
// SIMD Comparison Instructions
|
||||
// =====================================================================================================
|
||||
|
||||
void xImplSimd_Compare::PS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x00, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::PS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x00, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).f(), dst, src1, src2, CType); }
|
||||
void xImplSimd_Compare::PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).f(), dst, src1, src2, CType); }
|
||||
|
||||
void xImplSimd_Compare::PD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x66, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::PD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x66, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).d().p66(), dst, src1, src2, CType); }
|
||||
void xImplSimd_Compare::PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).d().p66(), dst, src1, src2, CType); }
|
||||
|
||||
void xImplSimd_Compare::SS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0xf3, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::SS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0xf3, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::SS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).f().pf3(), dst, src1, src2, CType); }
|
||||
void xImplSimd_Compare::SS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).f().pf3(), dst, src1, src2, CType); }
|
||||
|
||||
void xImplSimd_Compare::SD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0xf2, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::SD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0xf2, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::SD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).d().pf2(), dst, src1, src2, CType); }
|
||||
void xImplSimd_Compare::SD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).d().pf2(), dst, src1, src2, CType); }
|
||||
|
||||
const xImplSimd_MinMax xMIN =
|
||||
{
|
||||
{0x00, 0x5d}, // PS
|
||||
{0x66, 0x5d}, // PD
|
||||
{0xf3, 0x5d}, // SS
|
||||
{0xf2, 0x5d}, // SD
|
||||
{
|
||||
{SIMDInstructionInfo(0x5d).f()}, // PS
|
||||
{SIMDInstructionInfo(0x5d).d().p66()}, // PD
|
||||
{SIMDInstructionInfo(0x5d).f().pf3()}, // SS
|
||||
{SIMDInstructionInfo(0x5d).d().pf2()}, // SD
|
||||
};
|
||||
|
||||
const xImplSimd_MinMax xMAX =
|
||||
{
|
||||
{0x00, 0x5f}, // PS
|
||||
{0x66, 0x5f}, // PD
|
||||
{0xf3, 0x5f}, // SS
|
||||
{0xf2, 0x5f}, // SD
|
||||
{
|
||||
{SIMDInstructionInfo(0x5f).f()}, // PS
|
||||
{SIMDInstructionInfo(0x5f).d().p66()}, // PD
|
||||
{SIMDInstructionInfo(0x5f).f().pf3()}, // SS
|
||||
{SIMDInstructionInfo(0x5f).d().pf2()}, // SD
|
||||
};
|
||||
|
||||
// [TODO] : Merge this into the xCMP class, so that they are notation as: xCMP.EQ
|
||||
@ -486,15 +486,15 @@ namespace x86Emitter
|
||||
const xImplSimd_Compare xCMPORD = {SSE2_Ordered};
|
||||
|
||||
const xImplSimd_COMI xCOMI =
|
||||
{
|
||||
{0x00, 0x2f}, // SS
|
||||
{0x66, 0x2f}, // SD
|
||||
{
|
||||
{SIMDInstructionInfo(0x2f)}, // SS
|
||||
{SIMDInstructionInfo(0x2f).p66()}, // SD
|
||||
};
|
||||
|
||||
const xImplSimd_COMI xUCOMI =
|
||||
{
|
||||
{0x00, 0x2e}, // SS
|
||||
{0x66, 0x2e}, // SD
|
||||
{
|
||||
{SIMDInstructionInfo(0x2e)}, // SS
|
||||
{SIMDInstructionInfo(0x2e).p66()}, // SD
|
||||
};
|
||||
|
||||
const xImplSimd_PCompare xPCMP =
|
||||
|
||||
@ -240,6 +240,23 @@ TEST(CodegenTests, SSETest)
|
||||
CODEGEN_TEST(xROUND.SS(xmm5, xmm2, 2), "66 0f 3a 0a ea 02");
|
||||
CODEGEN_TEST(xROUND.SD(xmm8, xmm2, 3), "66 44 0f 3a 0b c2 03");
|
||||
|
||||
CODEGEN_TEST(xCMPEQ.PS(xmm4, xmm8), "41 0f c2 e0 00");
|
||||
CODEGEN_TEST(xCMPLT.PD(xmm6, xmm9), "66 41 0f c2 f1 01");
|
||||
CODEGEN_TEST(xCMPLE.SS(xmm2, xmm5), "f3 0f c2 d5 02");
|
||||
CODEGEN_TEST(xCMPNE.SD(xmm1, xmm9), "f2 41 0f c2 c9 04");
|
||||
CODEGEN_TEST(xMIN.PS(xmm2, xmm8), "41 0f 5d d0");
|
||||
CODEGEN_TEST(xMIN.PD(xmm3, ptr[rax]), "66 0f 5d 18");
|
||||
CODEGEN_TEST(xMIN.SS(xmm8, xmm2), "f3 44 0f 5d c2");
|
||||
CODEGEN_TEST(xMIN.SD(xmm1, ptr[r8]), "f2 41 0f 5d 08");
|
||||
CODEGEN_TEST(xMAX.PS(xmm2, xmm8), "41 0f 5f d0");
|
||||
CODEGEN_TEST(xMAX.PD(xmm3, ptr[rax]), "66 0f 5f 18");
|
||||
CODEGEN_TEST(xMAX.SS(xmm8, xmm2), "f3 44 0f 5f c2");
|
||||
CODEGEN_TEST(xMAX.SD(xmm1, ptr[r8]), "f2 41 0f 5f 08");
|
||||
CODEGEN_TEST(xCOMI.SS(xmm2, xmm8), "41 0f 2f d0");
|
||||
CODEGEN_TEST(xCOMI.SD(xmm3, ptr[r8]), "66 41 0f 2f 18");
|
||||
CODEGEN_TEST(xUCOMI.SS(xmm8, xmm2), "44 0f 2e c2");
|
||||
CODEGEN_TEST(xUCOMI.SD(xmm2, xmm3), "66 0f 2e d3");
|
||||
|
||||
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
|
||||
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
|
||||
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
|
||||
@ -361,6 +378,23 @@ TEST(CodegenTests, AVXTest)
|
||||
CODEGEN_TEST(xROUND.SS(xmm5, xmm2, 2), "c4 e3 51 0a ea 02");
|
||||
CODEGEN_TEST(xROUND.SD(xmm8, xmm2, 3), "c4 63 39 0b c2 03");
|
||||
|
||||
CODEGEN_TEST(xCMPEQ.PS(xmm4, xmm8), "c4 c1 58 c2 e0 00");
|
||||
CODEGEN_TEST(xCMPLT.PD(xmm6, xmm9), "c4 c1 49 c2 f1 01");
|
||||
CODEGEN_TEST(xCMPLE.SS(xmm2, xmm5), "c5 ea c2 d5 02");
|
||||
CODEGEN_TEST(xCMPNE.SD(xmm1, xmm9), "c4 c1 73 c2 c9 04");
|
||||
CODEGEN_TEST(xMIN.PS(xmm2, xmm8), "c4 c1 68 5d d0");
|
||||
CODEGEN_TEST(xMIN.PD(xmm3, ptr[rax]), "c5 e1 5d 18");
|
||||
CODEGEN_TEST(xMIN.SS(xmm8, xmm2), "c5 3a 5d c2");
|
||||
CODEGEN_TEST(xMIN.SD(xmm1, ptr[r8]), "c4 c1 73 5d 08");
|
||||
CODEGEN_TEST(xMAX.PS(xmm2, xmm8), "c4 c1 68 5f d0");
|
||||
CODEGEN_TEST(xMAX.PD(xmm3, ptr[rax]), "c5 e1 5f 18");
|
||||
CODEGEN_TEST(xMAX.SS(xmm8, xmm2), "c5 3a 5f c2");
|
||||
CODEGEN_TEST(xMAX.SD(xmm1, ptr[r8]), "c4 c1 73 5f 08");
|
||||
CODEGEN_TEST(xCOMI.SS(xmm2, xmm8), "c4 c1 78 2f d0");
|
||||
CODEGEN_TEST(xCOMI.SD(xmm3, ptr[r8]), "c4 c1 79 2f 18");
|
||||
CODEGEN_TEST(xUCOMI.SS(xmm8, xmm2), "c5 78 2e c2");
|
||||
CODEGEN_TEST(xUCOMI.SD(xmm2, xmm3), "c5 f9 2e d3");
|
||||
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
|
||||
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user