From 0a76c5a64d35a7bdddf09b8d750eda9444fd693d Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 1 Jun 2025 23:27:51 -0500 Subject: [PATCH] Common: Switch fp compare instructions to auto SSE/AVX --- common/emitter/implement/simd_comparisons.h | 36 ++++++++------ common/emitter/simd.cpp | 48 +++++++++---------- .../common/x86emitter/codegen_tests_main.cpp | 34 +++++++++++++ 3 files changed, 80 insertions(+), 38 deletions(-) diff --git a/common/emitter/implement/simd_comparisons.h b/common/emitter/implement/simd_comparisons.h index 596d1cf3e7..42da63435a 100644 --- a/common/emitter/implement/simd_comparisons.h +++ b/common/emitter/implement/simd_comparisons.h @@ -8,10 +8,10 @@ namespace x86Emitter struct xImplSimd_MinMax { - const xImplSimd_DestRegSSE PS; // packed single precision - const xImplSimd_DestRegSSE PD; // packed double precision - const xImplSimd_DestRegSSE SS; // scalar single precision - const xImplSimd_DestRegSSE SD; // scalar double precision + const xImplSimd_3Arg PS; // packed single precision + const xImplSimd_3Arg PD; // packed double precision + const xImplSimd_3Arg SS; // scalar single precision + const xImplSimd_3Arg SD; // scalar double precision }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -20,17 +20,25 @@ namespace x86Emitter { SSE2_ComparisonType CType; - void PS(const xRegisterSSE& to, const xRegisterSSE& from) const; - void PS(const xRegisterSSE& to, const xIndirectVoid& from) const; + void PS(const xRegisterSSE& dst, const xRegisterSSE& src) const { PS(dst, dst, src); } + void PS(const xRegisterSSE& dst, const xIndirectVoid& src) const { PS(dst, dst, src); } + void PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const; + void PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const; - void PD(const xRegisterSSE& to, const xRegisterSSE& from) const; - void PD(const xRegisterSSE& to, const xIndirectVoid& from) const; + void PD(const xRegisterSSE& dst, const xRegisterSSE& src) const { PD(dst, dst, src); } + void PD(const xRegisterSSE& dst, const xIndirectVoid& src) const { PD(dst, dst, src); } + void PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const; + void PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const; - void SS(const xRegisterSSE& to, const xRegisterSSE& from) const; - void SS(const xRegisterSSE& to, const xIndirectVoid& from) const; + void SS(const xRegisterSSE& dst, const xRegisterSSE& src) const { SS(dst, dst, src); } + void SS(const xRegisterSSE& dst, const xIndirectVoid& src) const { SS(dst, dst, src); } + void SS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const; + void SS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const; - void SD(const xRegisterSSE& to, const xRegisterSSE& from) const; - void SD(const xRegisterSSE& to, const xIndirectVoid& from) const; + void SD(const xRegisterSSE& dst, const xRegisterSSE& src) const { SD(dst, dst, src); } + void SD(const xRegisterSSE& dst, const xIndirectVoid& src) const { SD(dst, dst, src); } + void SD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const; + void SD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const; }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -38,8 +46,8 @@ namespace x86Emitter // struct xImplSimd_COMI { - const xImplSimd_DestRegSSE SS; - const xImplSimd_DestRegSSE SD; + const xImplSimd_2Arg SS; + const xImplSimd_2Arg SD; }; diff --git a/common/emitter/simd.cpp b/common/emitter/simd.cpp index 785597f24a..50222bfcf6 100644 --- a/common/emitter/simd.cpp +++ b/common/emitter/simd.cpp @@ -446,32 +446,32 @@ namespace x86Emitter // SIMD Comparison Instructions // ===================================================================================================== - void xImplSimd_Compare::PS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x00, 0xc2, to, from, (u8)CType); } - void xImplSimd_Compare::PS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x00, 0xc2, to, from, (u8)CType); } + void xImplSimd_Compare::PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).f(), dst, src1, src2, CType); } + void xImplSimd_Compare::PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).f(), dst, src1, src2, CType); } - void xImplSimd_Compare::PD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x66, 0xc2, to, from, (u8)CType); } - void xImplSimd_Compare::PD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x66, 0xc2, to, from, (u8)CType); } + void xImplSimd_Compare::PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).d().p66(), dst, src1, src2, CType); } + void xImplSimd_Compare::PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).d().p66(), dst, src1, src2, CType); } - void xImplSimd_Compare::SS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0xf3, 0xc2, to, from, (u8)CType); } - void xImplSimd_Compare::SS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0xf3, 0xc2, to, from, (u8)CType); } + void xImplSimd_Compare::SS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).f().pf3(), dst, src1, src2, CType); } + void xImplSimd_Compare::SS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).f().pf3(), dst, src1, src2, CType); } - void xImplSimd_Compare::SD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0xf2, 0xc2, to, from, (u8)CType); } - void xImplSimd_Compare::SD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0xf2, 0xc2, to, from, (u8)CType); } + void xImplSimd_Compare::SD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).d().pf2(), dst, src1, src2, CType); } + void xImplSimd_Compare::SD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(SIMDInstructionInfo(0xc2).d().pf2(), dst, src1, src2, CType); } const xImplSimd_MinMax xMIN = - { - {0x00, 0x5d}, // PS - {0x66, 0x5d}, // PD - {0xf3, 0x5d}, // SS - {0xf2, 0x5d}, // SD + { + {SIMDInstructionInfo(0x5d).f()}, // PS + {SIMDInstructionInfo(0x5d).d().p66()}, // PD + {SIMDInstructionInfo(0x5d).f().pf3()}, // SS + {SIMDInstructionInfo(0x5d).d().pf2()}, // SD }; const xImplSimd_MinMax xMAX = - { - {0x00, 0x5f}, // PS - {0x66, 0x5f}, // PD - {0xf3, 0x5f}, // SS - {0xf2, 0x5f}, // SD + { + {SIMDInstructionInfo(0x5f).f()}, // PS + {SIMDInstructionInfo(0x5f).d().p66()}, // PD + {SIMDInstructionInfo(0x5f).f().pf3()}, // SS + {SIMDInstructionInfo(0x5f).d().pf2()}, // SD }; // [TODO] : Merge this into the xCMP class, so that they are notation as: xCMP.EQ @@ -486,15 +486,15 @@ namespace x86Emitter const xImplSimd_Compare xCMPORD = {SSE2_Ordered}; const xImplSimd_COMI xCOMI = - { - {0x00, 0x2f}, // SS - {0x66, 0x2f}, // SD + { + {SIMDInstructionInfo(0x2f)}, // SS + {SIMDInstructionInfo(0x2f).p66()}, // SD }; const xImplSimd_COMI xUCOMI = - { - {0x00, 0x2e}, // SS - {0x66, 0x2e}, // SD + { + {SIMDInstructionInfo(0x2e)}, // SS + {SIMDInstructionInfo(0x2e).p66()}, // SD }; const xImplSimd_PCompare xPCMP = diff --git a/tests/ctest/common/x86emitter/codegen_tests_main.cpp b/tests/ctest/common/x86emitter/codegen_tests_main.cpp index 864180622d..24d838fd22 100644 --- a/tests/ctest/common/x86emitter/codegen_tests_main.cpp +++ b/tests/ctest/common/x86emitter/codegen_tests_main.cpp @@ -240,6 +240,23 @@ TEST(CodegenTests, SSETest) CODEGEN_TEST(xROUND.SS(xmm5, xmm2, 2), "66 0f 3a 0a ea 02"); CODEGEN_TEST(xROUND.SD(xmm8, xmm2, 3), "66 44 0f 3a 0b c2 03"); + CODEGEN_TEST(xCMPEQ.PS(xmm4, xmm8), "41 0f c2 e0 00"); + CODEGEN_TEST(xCMPLT.PD(xmm6, xmm9), "66 41 0f c2 f1 01"); + CODEGEN_TEST(xCMPLE.SS(xmm2, xmm5), "f3 0f c2 d5 02"); + CODEGEN_TEST(xCMPNE.SD(xmm1, xmm9), "f2 41 0f c2 c9 04"); + CODEGEN_TEST(xMIN.PS(xmm2, xmm8), "41 0f 5d d0"); + CODEGEN_TEST(xMIN.PD(xmm3, ptr[rax]), "66 0f 5d 18"); + CODEGEN_TEST(xMIN.SS(xmm8, xmm2), "f3 44 0f 5d c2"); + CODEGEN_TEST(xMIN.SD(xmm1, ptr[r8]), "f2 41 0f 5d 08"); + CODEGEN_TEST(xMAX.PS(xmm2, xmm8), "41 0f 5f d0"); + CODEGEN_TEST(xMAX.PD(xmm3, ptr[rax]), "66 0f 5f 18"); + CODEGEN_TEST(xMAX.SS(xmm8, xmm2), "f3 44 0f 5f c2"); + CODEGEN_TEST(xMAX.SD(xmm1, ptr[r8]), "f2 41 0f 5f 08"); + CODEGEN_TEST(xCOMI.SS(xmm2, xmm8), "41 0f 2f d0"); + CODEGEN_TEST(xCOMI.SD(xmm3, ptr[r8]), "66 41 0f 2f 18"); + CODEGEN_TEST(xUCOMI.SS(xmm8, xmm2), "44 0f 2e c2"); + CODEGEN_TEST(xUCOMI.SD(xmm2, xmm3), "66 0f 2e d3"); + CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1"); CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1"); CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08"); @@ -361,6 +378,23 @@ TEST(CodegenTests, AVXTest) CODEGEN_TEST(xROUND.SS(xmm5, xmm2, 2), "c4 e3 51 0a ea 02"); CODEGEN_TEST(xROUND.SD(xmm8, xmm2, 3), "c4 63 39 0b c2 03"); + CODEGEN_TEST(xCMPEQ.PS(xmm4, xmm8), "c4 c1 58 c2 e0 00"); + CODEGEN_TEST(xCMPLT.PD(xmm6, xmm9), "c4 c1 49 c2 f1 01"); + CODEGEN_TEST(xCMPLE.SS(xmm2, xmm5), "c5 ea c2 d5 02"); + CODEGEN_TEST(xCMPNE.SD(xmm1, xmm9), "c4 c1 73 c2 c9 04"); + CODEGEN_TEST(xMIN.PS(xmm2, xmm8), "c4 c1 68 5d d0"); + CODEGEN_TEST(xMIN.PD(xmm3, ptr[rax]), "c5 e1 5d 18"); + CODEGEN_TEST(xMIN.SS(xmm8, xmm2), "c5 3a 5d c2"); + CODEGEN_TEST(xMIN.SD(xmm1, ptr[r8]), "c4 c1 73 5d 08"); + CODEGEN_TEST(xMAX.PS(xmm2, xmm8), "c4 c1 68 5f d0"); + CODEGEN_TEST(xMAX.PD(xmm3, ptr[rax]), "c5 e1 5f 18"); + CODEGEN_TEST(xMAX.SS(xmm8, xmm2), "c5 3a 5f c2"); + CODEGEN_TEST(xMAX.SD(xmm1, ptr[r8]), "c4 c1 73 5f 08"); + CODEGEN_TEST(xCOMI.SS(xmm2, xmm8), "c4 c1 78 2f d0"); + CODEGEN_TEST(xCOMI.SD(xmm3, ptr[r8]), "c4 c1 79 2f 18"); + CODEGEN_TEST(xUCOMI.SS(xmm8, xmm2), "c5 78 2e c2"); + CODEGEN_TEST(xUCOMI.SD(xmm2, xmm3), "c5 f9 2e d3"); + CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1"); CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07"); CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");