Common: Switch simd integer compare instructions to auto SSE/AVX

This commit is contained in:
TellowKrinkle 2025-06-01 23:45:28 -05:00 committed by TellowKrinkle
parent e97f03ed8b
commit 86b5de8785
3 changed files with 71 additions and 36 deletions

View File

@ -55,36 +55,35 @@ namespace x86Emitter
//
struct xImplSimd_PCompare
{
public:
// Compare packed bytes for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const xImplSimd_DestRegEither EQB;
const xImplSimd_3Arg EQB;
// Compare packed words for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const xImplSimd_DestRegEither EQW;
const xImplSimd_3Arg EQW;
// Compare packed doublewords [32-bits] for equality.
// If a data element in dest is equal to the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const xImplSimd_DestRegEither EQD;
const xImplSimd_3Arg EQD;
// Compare packed signed bytes for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const xImplSimd_DestRegEither GTB;
const xImplSimd_3Arg GTB;
// Compare packed signed words for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const xImplSimd_DestRegEither GTW;
const xImplSimd_3Arg GTW;
// Compare packed signed doublewords [32-bits] for greater than.
// If a data element in dest is greater than the corresponding date element src, the
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
const xImplSimd_DestRegEither GTD;
const xImplSimd_3Arg GTD;
};
//////////////////////////////////////////////////////////////////////////////////////////
@ -93,27 +92,27 @@ namespace x86Emitter
{
// Compare packed unsigned byte integers in dest to src and store packed min/max
// values in dest.
const xImplSimd_DestRegEither UB;
const xImplSimd_3Arg UB;
// Compare packed signed word integers in dest to src and store packed min/max
// values in dest.
const xImplSimd_DestRegEither SW;
const xImplSimd_3Arg SW;
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const xImplSimd_DestRegSSE SB;
const xImplSimd_3Arg SB;
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const xImplSimd_DestRegSSE SD;
const xImplSimd_3Arg SD;
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const xImplSimd_DestRegSSE UW;
const xImplSimd_3Arg UW;
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
// packed min/max values in dest. (SSE operands only)
const xImplSimd_DestRegSSE UD;
const xImplSimd_3Arg UD;
};
} // end namespace x86Emitter

View File

@ -500,36 +500,34 @@ namespace x86Emitter
};
const xImplSimd_PCompare xPCMP =
{
{0x66, 0x74}, // EQB
{0x66, 0x75}, // EQW
{0x66, 0x76}, // EQD
{
{SIMDInstructionInfo(0x74).i().p66().commutative()}, // EQB
{SIMDInstructionInfo(0x75).i().p66().commutative()}, // EQW
{SIMDInstructionInfo(0x76).i().p66().commutative()}, // EQD
{0x66, 0x64}, // GTB
{0x66, 0x65}, // GTW
{0x66, 0x66}, // GTD
{SIMDInstructionInfo(0x64).i().p66()}, // GTB
{SIMDInstructionInfo(0x65).i().p66()}, // GTW
{SIMDInstructionInfo(0x66).i().p66()}, // GTD
};
const xImplSimd_PMinMax xPMIN =
{
{0x66, 0xda}, // UB
{0x66, 0xea}, // SW
{0x66, 0x3838}, // SB
{0x66, 0x3938}, // SD
{0x66, 0x3a38}, // UW
{0x66, 0x3b38}, // UD
{
{SIMDInstructionInfo(0xda).i().p66().commutative()}, // UB
{SIMDInstructionInfo(0xea).i().p66().commutative()}, // SW
{SIMDInstructionInfo(0x38).i().p66().m0f38().commutative()}, // SB
{SIMDInstructionInfo(0x39).i().p66().m0f38().commutative()}, // SD
{SIMDInstructionInfo(0x3a).i().p66().m0f38().commutative()}, // UW
{SIMDInstructionInfo(0x3b).i().p66().m0f38().commutative()}, // UD
};
const xImplSimd_PMinMax xPMAX =
{
{0x66, 0xde}, // UB
{0x66, 0xee}, // SW
{0x66, 0x3c38}, // SB
{0x66, 0x3d38}, // SD
{0x66, 0x3e38}, // UW
{0x66, 0x3f38}, // UD
{
{SIMDInstructionInfo(0xde).i().p66().commutative()}, // UB
{SIMDInstructionInfo(0xee).i().p66().commutative()}, // SW
{SIMDInstructionInfo(0x3c).i().p66().m0f38().commutative()}, // SB
{SIMDInstructionInfo(0x3d).i().p66().m0f38().commutative()}, // SD
{SIMDInstructionInfo(0x3e).i().p66().m0f38().commutative()}, // UW
{SIMDInstructionInfo(0x3f).i().p66().m0f38().commutative()}, // UD
};
// =====================================================================================================

View File

@ -257,6 +257,25 @@ TEST(CodegenTests, SSETest)
CODEGEN_TEST(xUCOMI.SS(xmm8, xmm2), "44 0f 2e c2");
CODEGEN_TEST(xUCOMI.SD(xmm2, xmm3), "66 0f 2e d3");
CODEGEN_TEST(xPCMP.EQB(xmm0, xmm8), "66 41 0f 74 c0");
CODEGEN_TEST(xPCMP.EQW(xmm4, ptr[r8]), "66 41 0f 75 20");
CODEGEN_TEST(xPCMP.EQD(xmm3, xmm4), "66 0f 76 dc");
CODEGEN_TEST(xPCMP.GTB(xmm0, xmm8), "66 41 0f 64 c0");
CODEGEN_TEST(xPCMP.GTW(xmm4, ptr[r8]), "66 41 0f 65 20");
CODEGEN_TEST(xPCMP.GTD(xmm3, xmm4), "66 0f 66 dc");
CODEGEN_TEST(xPMIN.UB(xmm0, xmm8), "66 41 0f da c0");
CODEGEN_TEST(xPMIN.SW(xmm4, ptr[rcx]), "66 0f ea 21");
CODEGEN_TEST(xPMIN.SB(xmm3, xmm4), "66 0f 38 38 dc");
CODEGEN_TEST(xPMIN.SD(xmm8, xmm3), "66 44 0f 38 39 c3");
CODEGEN_TEST(xPMIN.UW(xmm4, xmm9), "66 41 0f 38 3a e1");
CODEGEN_TEST(xPMIN.UD(xmm2, ptr[r10]), "66 41 0f 38 3b 12");
CODEGEN_TEST(xPMAX.UB(xmm0, xmm8), "66 41 0f de c0");
CODEGEN_TEST(xPMAX.SW(xmm4, ptr[rcx]), "66 0f ee 21");
CODEGEN_TEST(xPMAX.SB(xmm3, xmm4), "66 0f 38 3c dc");
CODEGEN_TEST(xPMAX.SD(xmm8, xmm3), "66 44 0f 38 3d c3");
CODEGEN_TEST(xPMAX.UW(xmm4, xmm9), "66 41 0f 38 3e e1");
CODEGEN_TEST(xPMAX.UD(xmm2, ptr[r10]), "66 41 0f 38 3f 12");
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
@ -395,6 +414,25 @@ TEST(CodegenTests, AVXTest)
CODEGEN_TEST(xUCOMI.SS(xmm8, xmm2), "c5 78 2e c2");
CODEGEN_TEST(xUCOMI.SD(xmm2, xmm3), "c5 f9 2e d3");
CODEGEN_TEST(xPCMP.EQB(xmm0, xmm8), "c5 b9 74 c0"); // => vpcmpeqb xmm0, xmm8, xmm0
CODEGEN_TEST(xPCMP.EQW(xmm4, ptr[r8]), "c4 c1 59 75 20");
CODEGEN_TEST(xPCMP.EQD(xmm3, xmm4), "c5 e1 76 dc");
CODEGEN_TEST(xPCMP.GTB(xmm0, xmm8), "c4 c1 79 64 c0");
CODEGEN_TEST(xPCMP.GTW(xmm4, ptr[r8]), "c4 c1 59 65 20");
CODEGEN_TEST(xPCMP.GTD(xmm3, xmm4), "c5 e1 66 dc");
CODEGEN_TEST(xPMIN.UB(xmm0, xmm8), "c5 b9 da c0"); // => vpminub xmm0, xmm8, xmm0
CODEGEN_TEST(xPMIN.SW(xmm4, ptr[rcx]), "c5 d9 ea 21");
CODEGEN_TEST(xPMIN.SB(xmm3, xmm4), "c4 e2 61 38 dc");
CODEGEN_TEST(xPMIN.SD(xmm8, xmm3), "c4 62 39 39 c3");
CODEGEN_TEST(xPMIN.UW(xmm4, xmm9), "c4 c2 59 3a e1");
CODEGEN_TEST(xPMIN.UD(xmm2, ptr[r10]), "c4 c2 69 3b 12");
CODEGEN_TEST(xPMAX.UB(xmm0, xmm8), "c5 b9 de c0"); // => vpmaxub xmm0, xmm8, xmm0
CODEGEN_TEST(xPMAX.SW(xmm4, ptr[rcx]), "c5 d9 ee 21");
CODEGEN_TEST(xPMAX.SB(xmm3, xmm4), "c4 e2 61 3c dc");
CODEGEN_TEST(xPMAX.SD(xmm8, xmm3), "c4 62 39 3d c3");
CODEGEN_TEST(xPMAX.UW(xmm4, xmm9), "c4 c2 59 3e e1");
CODEGEN_TEST(xPMAX.UD(xmm2, ptr[r10]), "c4 c2 69 3f 12");
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");