mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
Common: Switch simd integer compare instructions to auto SSE/AVX
This commit is contained in:
parent
e97f03ed8b
commit
86b5de8785
@ -55,36 +55,35 @@ namespace x86Emitter
|
||||
//
|
||||
struct xImplSimd_PCompare
|
||||
{
|
||||
public:
|
||||
// Compare packed bytes for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither EQB;
|
||||
const xImplSimd_3Arg EQB;
|
||||
|
||||
// Compare packed words for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither EQW;
|
||||
const xImplSimd_3Arg EQW;
|
||||
|
||||
// Compare packed doublewords [32-bits] for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither EQD;
|
||||
const xImplSimd_3Arg EQD;
|
||||
|
||||
// Compare packed signed bytes for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither GTB;
|
||||
const xImplSimd_3Arg GTB;
|
||||
|
||||
// Compare packed signed words for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither GTW;
|
||||
const xImplSimd_3Arg GTW;
|
||||
|
||||
// Compare packed signed doublewords [32-bits] for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither GTD;
|
||||
const xImplSimd_3Arg GTD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -93,27 +92,27 @@ namespace x86Emitter
|
||||
{
|
||||
// Compare packed unsigned byte integers in dest to src and store packed min/max
|
||||
// values in dest.
|
||||
const xImplSimd_DestRegEither UB;
|
||||
const xImplSimd_3Arg UB;
|
||||
|
||||
// Compare packed signed word integers in dest to src and store packed min/max
|
||||
// values in dest.
|
||||
const xImplSimd_DestRegEither SW;
|
||||
const xImplSimd_3Arg SW;
|
||||
|
||||
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const xImplSimd_DestRegSSE SB;
|
||||
const xImplSimd_3Arg SB;
|
||||
|
||||
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
const xImplSimd_3Arg SD;
|
||||
|
||||
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const xImplSimd_DestRegSSE UW;
|
||||
const xImplSimd_3Arg UW;
|
||||
|
||||
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const xImplSimd_DestRegSSE UD;
|
||||
const xImplSimd_3Arg UD;
|
||||
};
|
||||
|
||||
} // end namespace x86Emitter
|
||||
|
||||
@ -500,36 +500,34 @@ namespace x86Emitter
|
||||
};
|
||||
|
||||
const xImplSimd_PCompare xPCMP =
|
||||
{
|
||||
{0x66, 0x74}, // EQB
|
||||
{0x66, 0x75}, // EQW
|
||||
{0x66, 0x76}, // EQD
|
||||
{
|
||||
{SIMDInstructionInfo(0x74).i().p66().commutative()}, // EQB
|
||||
{SIMDInstructionInfo(0x75).i().p66().commutative()}, // EQW
|
||||
{SIMDInstructionInfo(0x76).i().p66().commutative()}, // EQD
|
||||
|
||||
{0x66, 0x64}, // GTB
|
||||
{0x66, 0x65}, // GTW
|
||||
{0x66, 0x66}, // GTD
|
||||
{SIMDInstructionInfo(0x64).i().p66()}, // GTB
|
||||
{SIMDInstructionInfo(0x65).i().p66()}, // GTW
|
||||
{SIMDInstructionInfo(0x66).i().p66()}, // GTD
|
||||
};
|
||||
|
||||
const xImplSimd_PMinMax xPMIN =
|
||||
{
|
||||
{0x66, 0xda}, // UB
|
||||
{0x66, 0xea}, // SW
|
||||
{0x66, 0x3838}, // SB
|
||||
{0x66, 0x3938}, // SD
|
||||
|
||||
{0x66, 0x3a38}, // UW
|
||||
{0x66, 0x3b38}, // UD
|
||||
{
|
||||
{SIMDInstructionInfo(0xda).i().p66().commutative()}, // UB
|
||||
{SIMDInstructionInfo(0xea).i().p66().commutative()}, // SW
|
||||
{SIMDInstructionInfo(0x38).i().p66().m0f38().commutative()}, // SB
|
||||
{SIMDInstructionInfo(0x39).i().p66().m0f38().commutative()}, // SD
|
||||
{SIMDInstructionInfo(0x3a).i().p66().m0f38().commutative()}, // UW
|
||||
{SIMDInstructionInfo(0x3b).i().p66().m0f38().commutative()}, // UD
|
||||
};
|
||||
|
||||
const xImplSimd_PMinMax xPMAX =
|
||||
{
|
||||
{0x66, 0xde}, // UB
|
||||
{0x66, 0xee}, // SW
|
||||
{0x66, 0x3c38}, // SB
|
||||
{0x66, 0x3d38}, // SD
|
||||
|
||||
{0x66, 0x3e38}, // UW
|
||||
{0x66, 0x3f38}, // UD
|
||||
{
|
||||
{SIMDInstructionInfo(0xde).i().p66().commutative()}, // UB
|
||||
{SIMDInstructionInfo(0xee).i().p66().commutative()}, // SW
|
||||
{SIMDInstructionInfo(0x3c).i().p66().m0f38().commutative()}, // SB
|
||||
{SIMDInstructionInfo(0x3d).i().p66().m0f38().commutative()}, // SD
|
||||
{SIMDInstructionInfo(0x3e).i().p66().m0f38().commutative()}, // UW
|
||||
{SIMDInstructionInfo(0x3f).i().p66().m0f38().commutative()}, // UD
|
||||
};
|
||||
|
||||
// =====================================================================================================
|
||||
|
||||
@ -257,6 +257,25 @@ TEST(CodegenTests, SSETest)
|
||||
CODEGEN_TEST(xUCOMI.SS(xmm8, xmm2), "44 0f 2e c2");
|
||||
CODEGEN_TEST(xUCOMI.SD(xmm2, xmm3), "66 0f 2e d3");
|
||||
|
||||
CODEGEN_TEST(xPCMP.EQB(xmm0, xmm8), "66 41 0f 74 c0");
|
||||
CODEGEN_TEST(xPCMP.EQW(xmm4, ptr[r8]), "66 41 0f 75 20");
|
||||
CODEGEN_TEST(xPCMP.EQD(xmm3, xmm4), "66 0f 76 dc");
|
||||
CODEGEN_TEST(xPCMP.GTB(xmm0, xmm8), "66 41 0f 64 c0");
|
||||
CODEGEN_TEST(xPCMP.GTW(xmm4, ptr[r8]), "66 41 0f 65 20");
|
||||
CODEGEN_TEST(xPCMP.GTD(xmm3, xmm4), "66 0f 66 dc");
|
||||
CODEGEN_TEST(xPMIN.UB(xmm0, xmm8), "66 41 0f da c0");
|
||||
CODEGEN_TEST(xPMIN.SW(xmm4, ptr[rcx]), "66 0f ea 21");
|
||||
CODEGEN_TEST(xPMIN.SB(xmm3, xmm4), "66 0f 38 38 dc");
|
||||
CODEGEN_TEST(xPMIN.SD(xmm8, xmm3), "66 44 0f 38 39 c3");
|
||||
CODEGEN_TEST(xPMIN.UW(xmm4, xmm9), "66 41 0f 38 3a e1");
|
||||
CODEGEN_TEST(xPMIN.UD(xmm2, ptr[r10]), "66 41 0f 38 3b 12");
|
||||
CODEGEN_TEST(xPMAX.UB(xmm0, xmm8), "66 41 0f de c0");
|
||||
CODEGEN_TEST(xPMAX.SW(xmm4, ptr[rcx]), "66 0f ee 21");
|
||||
CODEGEN_TEST(xPMAX.SB(xmm3, xmm4), "66 0f 38 3c dc");
|
||||
CODEGEN_TEST(xPMAX.SD(xmm8, xmm3), "66 44 0f 38 3d c3");
|
||||
CODEGEN_TEST(xPMAX.UW(xmm4, xmm9), "66 41 0f 38 3e e1");
|
||||
CODEGEN_TEST(xPMAX.UD(xmm2, ptr[r10]), "66 41 0f 38 3f 12");
|
||||
|
||||
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
|
||||
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
|
||||
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
|
||||
@ -395,6 +414,25 @@ TEST(CodegenTests, AVXTest)
|
||||
CODEGEN_TEST(xUCOMI.SS(xmm8, xmm2), "c5 78 2e c2");
|
||||
CODEGEN_TEST(xUCOMI.SD(xmm2, xmm3), "c5 f9 2e d3");
|
||||
|
||||
CODEGEN_TEST(xPCMP.EQB(xmm0, xmm8), "c5 b9 74 c0"); // => vpcmpeqb xmm0, xmm8, xmm0
|
||||
CODEGEN_TEST(xPCMP.EQW(xmm4, ptr[r8]), "c4 c1 59 75 20");
|
||||
CODEGEN_TEST(xPCMP.EQD(xmm3, xmm4), "c5 e1 76 dc");
|
||||
CODEGEN_TEST(xPCMP.GTB(xmm0, xmm8), "c4 c1 79 64 c0");
|
||||
CODEGEN_TEST(xPCMP.GTW(xmm4, ptr[r8]), "c4 c1 59 65 20");
|
||||
CODEGEN_TEST(xPCMP.GTD(xmm3, xmm4), "c5 e1 66 dc");
|
||||
CODEGEN_TEST(xPMIN.UB(xmm0, xmm8), "c5 b9 da c0"); // => vpminub xmm0, xmm8, xmm0
|
||||
CODEGEN_TEST(xPMIN.SW(xmm4, ptr[rcx]), "c5 d9 ea 21");
|
||||
CODEGEN_TEST(xPMIN.SB(xmm3, xmm4), "c4 e2 61 38 dc");
|
||||
CODEGEN_TEST(xPMIN.SD(xmm8, xmm3), "c4 62 39 39 c3");
|
||||
CODEGEN_TEST(xPMIN.UW(xmm4, xmm9), "c4 c2 59 3a e1");
|
||||
CODEGEN_TEST(xPMIN.UD(xmm2, ptr[r10]), "c4 c2 69 3b 12");
|
||||
CODEGEN_TEST(xPMAX.UB(xmm0, xmm8), "c5 b9 de c0"); // => vpmaxub xmm0, xmm8, xmm0
|
||||
CODEGEN_TEST(xPMAX.SW(xmm4, ptr[rcx]), "c5 d9 ee 21");
|
||||
CODEGEN_TEST(xPMAX.SB(xmm3, xmm4), "c4 e2 61 3c dc");
|
||||
CODEGEN_TEST(xPMAX.SD(xmm8, xmm3), "c4 62 39 3d c3");
|
||||
CODEGEN_TEST(xPMAX.UW(xmm4, xmm9), "c4 c2 59 3e e1");
|
||||
CODEGEN_TEST(xPMAX.UD(xmm2, ptr[r10]), "c4 c2 69 3f 12");
|
||||
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
|
||||
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user