mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
Common: Switch pabs/psign/pmadd instructions to auto SSE/AVX
This commit is contained in:
parent
62a5cd98da
commit
ddefb8a393
@ -133,15 +133,15 @@ namespace x86Emitter
|
||||
{
|
||||
// [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
|
||||
// in dest, as UNSIGNED.
|
||||
const xImplSimd_DestRegEither B;
|
||||
const xImplSimd_2Arg B;
|
||||
|
||||
// [sSSE-3] Computes the absolute value of word in the src, and stores the result
|
||||
// in dest, as UNSIGNED.
|
||||
const xImplSimd_DestRegEither W;
|
||||
const xImplSimd_2Arg W;
|
||||
|
||||
// [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
|
||||
// result in dest, as UNSIGNED.
|
||||
const xImplSimd_DestRegEither D;
|
||||
const xImplSimd_2Arg D;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -155,21 +155,21 @@ namespace x86Emitter
|
||||
// of a data element in src is positive, the corresponding data element in dest is
|
||||
// unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const xImplSimd_DestRegEither B;
|
||||
const xImplSimd_3Arg B;
|
||||
|
||||
// [sSSE-3] negates each word element of dest if the signed integer value of the
|
||||
// corresponding data element in src is less than zero. If the signed integer value
|
||||
// of a data element in src is positive, the corresponding data element in dest is
|
||||
// unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const xImplSimd_DestRegEither W;
|
||||
const xImplSimd_3Arg W;
|
||||
|
||||
// [sSSE-3] negates each doubleword element of dest if the signed integer value
|
||||
// of the corresponding data element in src is less than zero. If the signed integer
|
||||
// value of a data element in src is positive, the corresponding data element in dest
|
||||
// is unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const xImplSimd_DestRegEither D;
|
||||
const xImplSimd_3Arg D;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -185,7 +185,7 @@ namespace x86Emitter
|
||||
// DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] );
|
||||
// [.. repeat in the case of XMM src/dest operands ..]
|
||||
//
|
||||
const xImplSimd_DestRegEither WD;
|
||||
const xImplSimd_3Arg WD;
|
||||
|
||||
// [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding
|
||||
// signed byte of src, producing intermediate signed 16-bit integers. Each adjacent
|
||||
@ -201,7 +201,7 @@ namespace x86Emitter
|
||||
// DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] );
|
||||
// [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..]
|
||||
//
|
||||
const xImplSimd_DestRegEither UBSW;
|
||||
const xImplSimd_3Arg UBSW;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@ -403,23 +403,23 @@ namespace x86Emitter
|
||||
};
|
||||
|
||||
const xImplSimd_PAbsolute xPABS =
|
||||
{
|
||||
{0x66, 0x1c38}, // B
|
||||
{0x66, 0x1d38}, // W
|
||||
{0x66, 0x1e38} // D
|
||||
{
|
||||
{SIMDInstructionInfo(0x1c).p66().m0f38().i()}, // B
|
||||
{SIMDInstructionInfo(0x1d).p66().m0f38().i()}, // W
|
||||
{SIMDInstructionInfo(0x1e).p66().m0f38().i()}, // D
|
||||
};
|
||||
|
||||
const xImplSimd_PSign xPSIGN =
|
||||
{
|
||||
{0x66, 0x0838}, // B
|
||||
{0x66, 0x0938}, // W
|
||||
{0x66, 0x0a38}, // D
|
||||
{
|
||||
{SIMDInstructionInfo(0x08).p66().m0f38().i()}, // B
|
||||
{SIMDInstructionInfo(0x09).p66().m0f38().i()}, // W
|
||||
{SIMDInstructionInfo(0x0a).p66().m0f38().i()}, // D
|
||||
};
|
||||
|
||||
const xImplSimd_PMultAdd xPMADD =
|
||||
{
|
||||
{0x66, 0xf5}, // WD
|
||||
{0x66, 0xf438}, // UBSW
|
||||
{
|
||||
{SIMDInstructionInfo(0xf5).p66().i().commutative()}, // WD
|
||||
{SIMDInstructionInfo(0x04).p66().m0f38().i().commutative()}, // UBSW
|
||||
};
|
||||
|
||||
const xImplSimd_HorizAdd xHADD =
|
||||
|
||||
@ -222,6 +222,15 @@ TEST(CodegenTests, SSETest)
|
||||
CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "0f 55 37");
|
||||
CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "66 41 0f 55 d8");
|
||||
|
||||
CODEGEN_TEST(xPABS.B(xmm0, xmm2), "66 0f 38 1c c2");
|
||||
CODEGEN_TEST(xPABS.W(xmm4, xmm8), "66 41 0f 38 1d e0");
|
||||
CODEGEN_TEST(xPABS.D(xmm6, ptr[rax]), "66 0f 38 1e 30");
|
||||
CODEGEN_TEST(xPSIGN.B(xmm0, xmm2), "66 0f 38 08 c2");
|
||||
CODEGEN_TEST(xPSIGN.W(xmm4, xmm8), "66 41 0f 38 09 e0");
|
||||
CODEGEN_TEST(xPSIGN.D(xmm2, ptr[r8]), "66 41 0f 38 0a 10");
|
||||
CODEGEN_TEST(xPMADD.WD(xmm0, xmm8), "66 41 0f f5 c0");
|
||||
CODEGEN_TEST(xPMADD.UBSW(xmm0, xmm8), "66 41 0f 38 04 c0");
|
||||
|
||||
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
|
||||
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
|
||||
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
|
||||
@ -325,6 +334,15 @@ TEST(CodegenTests, AVXTest)
|
||||
CODEGEN_TEST(xANDN.PS(xmm6, ptr[rdi]), "c5 c8 55 37");
|
||||
CODEGEN_TEST(xANDN.PD(xmm3, xmm8), "c4 c1 61 55 d8");
|
||||
|
||||
CODEGEN_TEST(xPABS.B(xmm0, xmm2), "c4 e2 79 1c c2");
|
||||
CODEGEN_TEST(xPABS.W(xmm4, xmm8), "c4 c2 79 1d e0");
|
||||
CODEGEN_TEST(xPABS.D(xmm6, ptr[rax]), "c4 e2 79 1e 30");
|
||||
CODEGEN_TEST(xPSIGN.B(xmm0, xmm2), "c4 e2 79 08 c2");
|
||||
CODEGEN_TEST(xPSIGN.W(xmm4, xmm8), "c4 c2 59 09 e0");
|
||||
CODEGEN_TEST(xPSIGN.D(xmm2, ptr[r8]), "c4 c2 69 0a 10");
|
||||
CODEGEN_TEST(xPMADD.WD(xmm0, xmm8), "c5 b9 f5 c0"); // => vpmaddwd xmm0, xmm8, xmm0
|
||||
CODEGEN_TEST(xPMADD.UBSW(xmm0, xmm8), "c4 c2 79 04 c0");
|
||||
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
|
||||
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user