Common: Switch pabs/psign/pmadd instructions to auto SSE/AVX

This commit is contained in:
TellowKrinkle 2025-06-01 22:38:05 -05:00 committed by TellowKrinkle
parent 62a5cd98da
commit ddefb8a393
3 changed files with 37 additions and 19 deletions

View File

@ -133,15 +133,15 @@ namespace x86Emitter
{ {
// [sSSE-3] Computes the absolute value of bytes in the src, and stores the result // [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
// in dest, as UNSIGNED. // in dest, as UNSIGNED.
const xImplSimd_DestRegEither B; const xImplSimd_2Arg B;
// [sSSE-3] Computes the absolute value of word in the src, and stores the result // [sSSE-3] Computes the absolute value of word in the src, and stores the result
// in dest, as UNSIGNED. // in dest, as UNSIGNED.
const xImplSimd_DestRegEither W; const xImplSimd_2Arg W;
// [sSSE-3] Computes the absolute value of doublewords in the src, and stores the // [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
// result in dest, as UNSIGNED. // result in dest, as UNSIGNED.
const xImplSimd_DestRegEither D; const xImplSimd_2Arg D;
}; };
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
@ -155,21 +155,21 @@ namespace x86Emitter
// of a data element in src is positive, the corresponding data element in dest is // of a data element in src is positive, the corresponding data element in dest is
// unchanged. If a data element in src is zero, the corresponding data element in // unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero. // dest is set to zero.
const xImplSimd_DestRegEither B; const xImplSimd_3Arg B;
// [sSSE-3] negates each word element of dest if the signed integer value of the // [sSSE-3] negates each word element of dest if the signed integer value of the
// corresponding data element in src is less than zero. If the signed integer value // corresponding data element in src is less than zero. If the signed integer value
// of a data element in src is positive, the corresponding data element in dest is // of a data element in src is positive, the corresponding data element in dest is
// unchanged. If a data element in src is zero, the corresponding data element in // unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero. // dest is set to zero.
const xImplSimd_DestRegEither W; const xImplSimd_3Arg W;
// [sSSE-3] negates each doubleword element of dest if the signed integer value // [sSSE-3] negates each doubleword element of dest if the signed integer value
// of the corresponding data element in src is less than zero. If the signed integer // of the corresponding data element in src is less than zero. If the signed integer
// value of a data element in src is positive, the corresponding data element in dest // value of a data element in src is positive, the corresponding data element in dest
// is unchanged. If a data element in src is zero, the corresponding data element in // is unchanged. If a data element in src is zero, the corresponding data element in
// dest is set to zero. // dest is set to zero.
const xImplSimd_DestRegEither D; const xImplSimd_3Arg D;
}; };
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
@ -185,7 +185,7 @@ namespace x86Emitter
// DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] ); // DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] );
// [.. repeat in the case of XMM src/dest operands ..] // [.. repeat in the case of XMM src/dest operands ..]
// //
const xImplSimd_DestRegEither WD; const xImplSimd_3Arg WD;
// [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding // [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding
// signed byte of src, producing intermediate signed 16-bit integers. Each adjacent // signed byte of src, producing intermediate signed 16-bit integers. Each adjacent
@ -201,7 +201,7 @@ namespace x86Emitter
// DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] ); // DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] );
// [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..] // [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..]
// //
const xImplSimd_DestRegEither UBSW; const xImplSimd_3Arg UBSW;
}; };
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////

View File

@ -403,23 +403,23 @@ namespace x86Emitter
}; };
const xImplSimd_PAbsolute xPABS = const xImplSimd_PAbsolute xPABS =
{ {
{0x66, 0x1c38}, // B {SIMDInstructionInfo(0x1c).p66().m0f38().i()}, // B
{0x66, 0x1d38}, // W {SIMDInstructionInfo(0x1d).p66().m0f38().i()}, // W
{0x66, 0x1e38} // D {SIMDInstructionInfo(0x1e).p66().m0f38().i()}, // D
}; };
const xImplSimd_PSign xPSIGN = const xImplSimd_PSign xPSIGN =
{ {
{0x66, 0x0838}, // B {SIMDInstructionInfo(0x08).p66().m0f38().i()}, // B
{0x66, 0x0938}, // W {SIMDInstructionInfo(0x09).p66().m0f38().i()}, // W
{0x66, 0x0a38}, // D {SIMDInstructionInfo(0x0a).p66().m0f38().i()}, // D
}; };
const xImplSimd_PMultAdd xPMADD = const xImplSimd_PMultAdd xPMADD =
{ {
{0x66, 0xf5}, // WD {SIMDInstructionInfo(0xf5).p66().i().commutative()}, // WD
{0x66, 0xf438}, // UBSW {SIMDInstructionInfo(0x04).p66().m0f38().i().commutative()}, // UBSW
}; };
const xImplSimd_HorizAdd xHADD = const xImplSimd_HorizAdd xHADD =

View File

@ -222,6 +222,15 @@ TEST(CodegenTests, SSETest)
CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "0f 55 37"); CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "0f 55 37");
CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "66 41 0f 55 d8"); CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "66 41 0f 55 d8");
CODEGEN_TEST(xPABS.B(xmm0, xmm2), "66 0f 38 1c c2");
CODEGEN_TEST(xPABS.W(xmm4, xmm8), "66 41 0f 38 1d e0");
CODEGEN_TEST(xPABS.D(xmm6, ptr[rax]), "66 0f 38 1e 30");
CODEGEN_TEST(xPSIGN.B(xmm0, xmm2), "66 0f 38 08 c2");
CODEGEN_TEST(xPSIGN.W(xmm4, xmm8), "66 41 0f 38 09 e0");
CODEGEN_TEST(xPSIGN.D(xmm2, ptr[r8]), "66 41 0f 38 0a 10");
CODEGEN_TEST(xPMADD.WD(xmm0, xmm8), "66 41 0f f5 c0");
CODEGEN_TEST(xPMADD.UBSW(xmm0, xmm8), "66 41 0f 38 04 c0");
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1"); CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1"); CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08"); CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
@ -325,6 +334,15 @@ TEST(CodegenTests, AVXTest)
CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "c5 c8 55 37"); CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "c5 c8 55 37");
CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "c4 c1 61 55 d8"); CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "c4 c1 61 55 d8");
CODEGEN_TEST(xPABS.B(xmm0, xmm2), "c4 e2 79 1c c2");
CODEGEN_TEST(xPABS.W(xmm4, xmm8), "c4 c2 79 1d e0");
CODEGEN_TEST(xPABS.D(xmm6, ptr[rax]), "c4 e2 79 1e 30");
CODEGEN_TEST(xPSIGN.B(xmm0, xmm2), "c4 e2 79 08 c2");
CODEGEN_TEST(xPSIGN.W(xmm4, xmm8), "c4 c2 59 09 e0");
CODEGEN_TEST(xPSIGN.D(xmm2, ptr[r8]), "c4 c2 69 0a 10");
CODEGEN_TEST(xPMADD.WD(xmm0, xmm8), "c5 b9 f5 c0"); // => vpmaddwd xmm0, xmm8, xmm0
CODEGEN_TEST(xPMADD.UBSW(xmm0, xmm8), "c4 c2 79 04 c0");
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1"); CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07"); CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07"); CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");