Common: Switch pabs/psign/pmadd instructions to auto SSE/AVX

This commit is contained in:
TellowKrinkle 2025-06-01 22:38:05 -05:00 committed by TellowKrinkle
parent 62a5cd98da
commit ddefb8a393
3 changed files with 37 additions and 19 deletions

View File

@ -133,15 +133,15 @@ namespace x86Emitter
{
// [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
// in dest, as UNSIGNED.
const xImplSimd_DestRegEither B;
const xImplSimd_2Arg B;
// [sSSE-3] Computes the absolute value of word in the src, and stores the result
// in dest, as UNSIGNED.
const xImplSimd_DestRegEither W;
const xImplSimd_2Arg W;
// [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
// result in dest, as UNSIGNED.
const xImplSimd_DestRegEither D;
const xImplSimd_2Arg D;
};
//////////////////////////////////////////////////////////////////////////////////////////
@ -155,21 +155,21 @@ namespace x86Emitter
// of a data element in src is positive, the corresponding data element in dest is
// unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero.
const xImplSimd_DestRegEither B;
const xImplSimd_3Arg B;
// [sSSE-3] negates each word element of dest if the signed integer value of the
// corresponding data element in src is less than zero. If the signed integer value
// of a data element in src is positive, the corresponding data element in dest is
// unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero.
const xImplSimd_DestRegEither W;
const xImplSimd_3Arg W;
// [sSSE-3] negates each doubleword element of dest if the signed integer value
// of the corresponding data element in src is less than zero. If the signed integer
// value of a data element in src is positive, the corresponding data element in dest
// is unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero.
const xImplSimd_DestRegEither D;
const xImplSimd_3Arg D;
};
//////////////////////////////////////////////////////////////////////////////////////////
@ -185,7 +185,7 @@ namespace x86Emitter
// DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] );
// [.. repeat in the case of XMM src/dest operands ..]
//
const xImplSimd_DestRegEither WD;
const xImplSimd_3Arg WD;
// [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding
// signed byte of src, producing intermediate signed 16-bit integers. Each adjacent
@ -201,7 +201,7 @@ namespace x86Emitter
// DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] );
// [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..]
//
const xImplSimd_DestRegEither UBSW;
const xImplSimd_3Arg UBSW;
};
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -404,22 +404,22 @@ namespace x86Emitter
const xImplSimd_PAbsolute xPABS =
{
{0x66, 0x1c38}, // B
{0x66, 0x1d38}, // W
{0x66, 0x1e38} // D
{SIMDInstructionInfo(0x1c).p66().m0f38().i()}, // B
{SIMDInstructionInfo(0x1d).p66().m0f38().i()}, // W
{SIMDInstructionInfo(0x1e).p66().m0f38().i()}, // D
};
const xImplSimd_PSign xPSIGN =
{
{0x66, 0x0838}, // B
{0x66, 0x0938}, // W
{0x66, 0x0a38}, // D
{SIMDInstructionInfo(0x08).p66().m0f38().i()}, // B
{SIMDInstructionInfo(0x09).p66().m0f38().i()}, // W
{SIMDInstructionInfo(0x0a).p66().m0f38().i()}, // D
};
const xImplSimd_PMultAdd xPMADD =
{
{0x66, 0xf5}, // WD
{0x66, 0xf438}, // UBSW
{SIMDInstructionInfo(0xf5).p66().i().commutative()}, // WD
{SIMDInstructionInfo(0x04).p66().m0f38().i().commutative()}, // UBSW
};
const xImplSimd_HorizAdd xHADD =

View File

@ -222,6 +222,15 @@ TEST(CodegenTests, SSETest)
CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "0f 55 37");
CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "66 41 0f 55 d8");
CODEGEN_TEST(xPABS.B(xmm0, xmm2), "66 0f 38 1c c2");
CODEGEN_TEST(xPABS.W(xmm4, xmm8), "66 41 0f 38 1d e0");
CODEGEN_TEST(xPABS.D(xmm6, ptr[rax]), "66 0f 38 1e 30");
CODEGEN_TEST(xPSIGN.B(xmm0, xmm2), "66 0f 38 08 c2");
CODEGEN_TEST(xPSIGN.W(xmm4, xmm8), "66 41 0f 38 09 e0");
CODEGEN_TEST(xPSIGN.D(xmm2, ptr[r8]), "66 41 0f 38 0a 10");
CODEGEN_TEST(xPMADD.WD(xmm0, xmm8), "66 41 0f f5 c0");
CODEGEN_TEST(xPMADD.UBSW(xmm0, xmm8), "66 41 0f 38 04 c0");
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
@ -325,6 +334,15 @@ TEST(CodegenTests, AVXTest)
CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "c5 c8 55 37");
CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "c4 c1 61 55 d8");
CODEGEN_TEST(xPABS.B(xmm0, xmm2), "c4 e2 79 1c c2");
CODEGEN_TEST(xPABS.W(xmm4, xmm8), "c4 c2 79 1d e0");
CODEGEN_TEST(xPABS.D(xmm6, ptr[rax]), "c4 e2 79 1e 30");
CODEGEN_TEST(xPSIGN.B(xmm0, xmm2), "c4 e2 79 08 c2");
CODEGEN_TEST(xPSIGN.W(xmm4, xmm8), "c4 c2 59 09 e0");
CODEGEN_TEST(xPSIGN.D(xmm2, ptr[r8]), "c4 c2 69 0a 10");
CODEGEN_TEST(xPMADD.WD(xmm0, xmm8), "c5 b9 f5 c0"); // => vpmaddwd xmm0, xmm8, xmm0
CODEGEN_TEST(xPMADD.UBSW(xmm0, xmm8), "c4 c2 79 04 c0");
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");