Common: Switch sqrt/rsqrt/andn instructions to auto SSE/AVX

This commit is contained in:
TellowKrinkle 2025-06-01 22:03:38 -05:00 committed by TellowKrinkle
parent 8ad9d7d047
commit 62a5cd98da
3 changed files with 45 additions and 21 deletions

View File

@ -103,26 +103,27 @@ namespace x86Emitter
//
struct xImplSimd_rSqrt
{
const xImplSimd_DestRegSSE PS;
const xImplSimd_DestRegSSE SS;
const xImplSimd_2Arg PS;
const xImplSimd_3Arg SS;
};
//////////////////////////////////////////////////////////////////////////////////////////
// SQRT has PS/SS/SD forms, but not the PD form.
// SQRT has PS/SS/PD/SD forms
//
struct xImplSimd_Sqrt
{
const xImplSimd_DestRegSSE PS;
const xImplSimd_DestRegSSE SS;
const xImplSimd_DestRegSSE SD;
const xImplSimd_2Arg PS;
const xImplSimd_3Arg SS;
const xImplSimd_2Arg PD;
const xImplSimd_3Arg SD;
};
//////////////////////////////////////////////////////////////////////////////////////////
//
struct xImplSimd_AndNot
{
const xImplSimd_DestRegSSE PS;
const xImplSimd_DestRegSSE PD;
const xImplSimd_3Arg PS;
const xImplSimd_3Arg PD;
};
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -377,28 +377,29 @@ namespace x86Emitter
};
const xImplSimd_rSqrt xRSQRT =
{
{0x00, 0x52}, // PS
{0xf3, 0x52} // SS
{
{SIMDInstructionInfo(0x52)}, // PS
{SIMDInstructionInfo(0x52).pf3()}, // SS
};
const xImplSimd_rSqrt xRCP =
{
{0x00, 0x53}, // PS
{0xf3, 0x53} // SS
{
{SIMDInstructionInfo(0x53)}, // PS
{SIMDInstructionInfo(0x53).pf3()}, // SS
};
const xImplSimd_Sqrt xSQRT =
{
{0x00, 0x51}, // PS
{0xf3, 0x51}, // SS
{0xf2, 0x51} // SS
{
{SIMDInstructionInfo(0x51)}, // PS
{SIMDInstructionInfo(0x51).pf3()}, // SS
{SIMDInstructionInfo(0x51).p66()}, // PD
{SIMDInstructionInfo(0x51).pf2()}, // SS
};
const xImplSimd_AndNot xANDN =
{
{0x00, 0x55}, // PS
{0x66, 0x55} // PD
{
{SIMDInstructionInfo(0x55)}, // PS
{SIMDInstructionInfo(0x55).p66()}, // PD
};
const xImplSimd_PAbsolute xPABS =

View File

@ -211,6 +211,17 @@ TEST(CodegenTests, SSETest)
CODEGEN_TEST(xPMUL.LD(xmm1, xmm8), "66 41 0f 38 40 c8");
CODEGEN_TEST(xPMUL.DQ(xmm4, xmm9), "66 41 0f 38 28 e1");
CODEGEN_TEST(xRSQRT.PS(xmm0, xmm8), "41 0f 52 c0");
CODEGEN_TEST(xRSQRT.SS(xmm4, ptr[r9]), "f3 41 0f 52 21");
CODEGEN_TEST(xRCP.PS(xmm4, ptr[rcx]), "0f 53 21");
CODEGEN_TEST(xRCP.SS(xmm5, xmm8), "f3 41 0f 53 e8");
CODEGEN_TEST(xSQRT.PS(xmm4, xmm2), "0f 51 e2");
CODEGEN_TEST(xSQRT.SS(xmm5, xmm1), "f3 0f 51 e9");
CODEGEN_TEST(xSQRT.PD(xmm7, ptr[rdi]), "66 0f 51 3f");
CODEGEN_TEST(xSQRT.SD(xmm5, xmm2), "f2 0f 51 ea");
CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "0f 55 37");
CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "66 41 0f 55 d8");
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
@ -303,6 +314,17 @@ TEST(CodegenTests, AVXTest)
CODEGEN_TEST(xPMUL.LD(xmm1, xmm8), "c4 c2 71 40 c8");
CODEGEN_TEST(xPMUL.DQ(xmm4, xmm9), "c4 c2 59 28 e1");
CODEGEN_TEST(xRSQRT.PS(xmm0, xmm8), "c4 c1 78 52 c0");
CODEGEN_TEST(xRSQRT.SS(xmm4, ptr[r9]), "c4 c1 5a 52 21");
CODEGEN_TEST(xRCP.PS(xmm4, ptr[rcx]), "c5 f8 53 21");
CODEGEN_TEST(xRCP.SS(xmm5, xmm8), "c4 c1 52 53 e8");
CODEGEN_TEST(xSQRT.PS(xmm4, xmm2), "c5 f8 51 e2");
CODEGEN_TEST(xSQRT.SS(xmm5, xmm1), "c5 d2 51 e9");
CODEGEN_TEST(xSQRT.PD(xmm7, ptr[rdi]), "c5 f9 51 3f");
CODEGEN_TEST(xSQRT.SD(xmm5, xmm2), "c5 d3 51 ea");
CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "c5 c8 55 37");
CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "c4 c1 61 55 d8");
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");