From 62a5cd98da0c38d56c3bf80a5c071beab74bde04 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 1 Jun 2025 22:03:38 -0500 Subject: [PATCH] Common: Switch sqrt/rsqrt/andn instructions to auto SSE/AVX --- common/emitter/implement/simd_arithmetic.h | 17 ++++++------ common/emitter/simd.cpp | 27 ++++++++++--------- .../common/x86emitter/codegen_tests_main.cpp | 22 +++++++++++++++ 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/common/emitter/implement/simd_arithmetic.h b/common/emitter/implement/simd_arithmetic.h index ef9993f213..0e9630e38b 100644 --- a/common/emitter/implement/simd_arithmetic.h +++ b/common/emitter/implement/simd_arithmetic.h @@ -103,26 +103,27 @@ namespace x86Emitter // struct xImplSimd_rSqrt { - const xImplSimd_DestRegSSE PS; - const xImplSimd_DestRegSSE SS; + const xImplSimd_2Arg PS; + const xImplSimd_3Arg SS; }; ////////////////////////////////////////////////////////////////////////////////////////// - // SQRT has PS/SS/SD forms, but not the PD form. + // SQRT has PS/SS/PD/SD forms // struct xImplSimd_Sqrt { - const xImplSimd_DestRegSSE PS; - const xImplSimd_DestRegSSE SS; - const xImplSimd_DestRegSSE SD; + const xImplSimd_2Arg PS; + const xImplSimd_3Arg SS; + const xImplSimd_2Arg PD; + const xImplSimd_3Arg SD; }; ////////////////////////////////////////////////////////////////////////////////////////// // struct xImplSimd_AndNot { - const xImplSimd_DestRegSSE PS; - const xImplSimd_DestRegSSE PD; + const xImplSimd_3Arg PS; + const xImplSimd_3Arg PD; }; ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/common/emitter/simd.cpp b/common/emitter/simd.cpp index 6e595f9520..9756a73909 100644 --- a/common/emitter/simd.cpp +++ b/common/emitter/simd.cpp @@ -377,28 +377,29 @@ namespace x86Emitter }; const xImplSimd_rSqrt xRSQRT = - { - {0x00, 0x52}, // PS - {0xf3, 0x52} // SS + { + {SIMDInstructionInfo(0x52)}, // PS + {SIMDInstructionInfo(0x52).pf3()}, // SS }; const xImplSimd_rSqrt xRCP = - { - {0x00, 0x53}, // PS - {0xf3, 0x53} // SS + { + {SIMDInstructionInfo(0x53)}, // PS + {SIMDInstructionInfo(0x53).pf3()}, // SS }; const xImplSimd_Sqrt xSQRT = - { - {0x00, 0x51}, // PS - {0xf3, 0x51}, // SS - {0xf2, 0x51} // SS + { + {SIMDInstructionInfo(0x51)}, // PS + {SIMDInstructionInfo(0x51).pf3()}, // SS + {SIMDInstructionInfo(0x51).p66()}, // PD + {SIMDInstructionInfo(0x51).pf2()}, // SS }; const xImplSimd_AndNot xANDN = - { - {0x00, 0x55}, // PS - {0x66, 0x55} // PD + { + {SIMDInstructionInfo(0x55)}, // PS + {SIMDInstructionInfo(0x55).p66()}, // PD }; const xImplSimd_PAbsolute xPABS = diff --git a/tests/ctest/common/x86emitter/codegen_tests_main.cpp b/tests/ctest/common/x86emitter/codegen_tests_main.cpp index ae66077ae9..976809e409 100644 --- a/tests/ctest/common/x86emitter/codegen_tests_main.cpp +++ b/tests/ctest/common/x86emitter/codegen_tests_main.cpp @@ -211,6 +211,17 @@ TEST(CodegenTests, SSETest) CODEGEN_TEST(xPMUL.LD(xmm1, xmm8), "66 41 0f 38 40 c8"); CODEGEN_TEST(xPMUL.DQ(xmm4, xmm9), "66 41 0f 38 28 e1"); + CODEGEN_TEST(xRSQRT.PS(xmm0, xmm8), "41 0f 52 c0"); + CODEGEN_TEST(xRSQRT.SS(xmm4, ptr[r9]), "f3 41 0f 52 21"); + CODEGEN_TEST(xRCP.PS(xmm4, ptr[rcx]), "0f 53 21"); + CODEGEN_TEST(xRCP.SS(xmm5, xmm8), "f3 41 0f 53 e8"); + CODEGEN_TEST(xSQRT.PS(xmm4, xmm2), "0f 51 e2"); + CODEGEN_TEST(xSQRT.SS(xmm5, xmm1), "f3 0f 51 e9"); + CODEGEN_TEST(xSQRT.PD(xmm7, ptr[rdi]), "66 0f 51 3f"); + CODEGEN_TEST(xSQRT.SD(xmm5, xmm2), "f2 0f 51 ea"); + CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "0f 55 37"); + CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "66 41 0f 55 d8"); + CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1"); CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1"); CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08"); @@ -303,6 +314,17 @@ TEST(CodegenTests, AVXTest) CODEGEN_TEST(xPMUL.LD(xmm1, xmm8), "c4 c2 71 40 c8"); CODEGEN_TEST(xPMUL.DQ(xmm4, xmm9), "c4 c2 59 28 e1"); + CODEGEN_TEST(xRSQRT.PS(xmm0, xmm8), "c4 c1 78 52 c0"); + CODEGEN_TEST(xRSQRT.SS(xmm4, ptr[r9]), "c4 c1 5a 52 21"); + CODEGEN_TEST(xRCP.PS(xmm4, ptr[rcx]), "c5 f8 53 21"); + CODEGEN_TEST(xRCP.SS(xmm5, xmm8), "c4 c1 52 53 e8"); + CODEGEN_TEST(xSQRT.PS(xmm4, xmm2), "c5 f8 51 e2"); + CODEGEN_TEST(xSQRT.SS(xmm5, xmm1), "c5 d2 51 e9"); + CODEGEN_TEST(xSQRT.PD(xmm7, ptr[rdi]), "c5 f9 51 3f"); + CODEGEN_TEST(xSQRT.SD(xmm5, xmm2), "c5 d3 51 ea"); + CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "c5 c8 55 37"); + CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "c4 c1 61 55 d8"); + CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1"); CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07"); CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");