Common: Switch SIMD shift instructions to auto SSE/AVX

This commit is contained in:
TellowKrinkle 2025-06-01 19:14:07 -05:00 committed by TellowKrinkle
parent 23918e25c2
commit 10ed797881
3 changed files with 67 additions and 29 deletions

View File

@ -11,15 +11,16 @@ namespace x86Emitter
// --------------------------------------------------------------------------------------
struct _SimdShiftHelper
{
u8 Prefix;
u16 Opcode;
u16 OpcodeImm;
u8 Modcode;
SIMDInstructionInfo info;
SIMDInstructionInfo infoImm;
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const { (*this)(dst, dst, src); }
void operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const { (*this)(dst, dst, src); }
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const;
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const;
void operator()(const xRegisterSSE& to, u8 imm8) const;
void operator()(const xRegisterSSE& dst, u8 imm8) const { (*this)(dst, dst, imm8); }
void operator()(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm8) const;
};
// --------------------------------------------------------------------------------------
@ -42,7 +43,8 @@ namespace x86Emitter
const _SimdShiftHelper D;
const _SimdShiftHelper Q;
void DQ(const xRegisterSSE& to, u8 imm8) const;
void DQ(const xRegisterSSE& dst, u8 imm8) const { DQ(dst, dst, imm8); }
void DQ(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm8) const;
};
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -304,40 +304,38 @@ namespace x86Emitter
// SIMD Arithmetic Instructions
// =====================================================================================================
void _SimdShiftHelper::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); }
void _SimdShiftHelper::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); }
void _SimdShiftHelper::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(info, dst, src1, src2); }
void _SimdShiftHelper::operator()(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2) const { EmitSIMD(info, dst, src1, src2); }
void _SimdShiftHelper::operator()(const xRegisterSSE& to, u8 imm8) const
void _SimdShiftHelper::operator()(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm8) const { EmitSIMD(infoImm, dst, src, imm8); }
void xImplSimd_Shift::DQ(const xRegisterSSE& dst, const xRegisterSSE& src, u8 imm8) const
{
xOpWrite0F(0x66, OpcodeImm, (int)Modcode, to);
xWrite8(imm8);
}
void xImplSimd_Shift::DQ(const xRegisterSSE& to, u8 imm8) const
{
xOpWrite0F(0x66, 0x73, (int)Q.Modcode + 1, to, imm8);
SIMDInstructionInfo info = Q.infoImm;
info.ext += 1;
EmitSIMD(info, dst, src, imm8);
}
const xImplSimd_ShiftWithoutQ xPSRA =
{
{0x66, 0xe1, 0x71, 4}, // W
{0x66, 0xe2, 0x72, 4} // D
{
{SIMDInstructionInfo(0xe1).p66().i(), SIMDInstructionInfo(0x71, 4).p66().i()}, // W
{SIMDInstructionInfo(0xe2).p66().i(), SIMDInstructionInfo(0x72, 4).p66().i()}, // D
};
const xImplSimd_Shift xPSRL =
{
{0x66, 0xd1, 0x71, 2}, // W
{0x66, 0xd2, 0x72, 2}, // D
{0x66, 0xd3, 0x73, 2}, // Q
{
{SIMDInstructionInfo(0xd1).p66().i(), SIMDInstructionInfo(0x71, 2).p66().i()}, // W
{SIMDInstructionInfo(0xd2).p66().i(), SIMDInstructionInfo(0x72, 2).p66().i()}, // D
{SIMDInstructionInfo(0xd3).p66().i(), SIMDInstructionInfo(0x73, 2).p66().i()}, // Q
};
const xImplSimd_Shift xPSLL =
{
{0x66, 0xf1, 0x71, 6}, // W
{0x66, 0xf2, 0x72, 6}, // D
{0x66, 0xf3, 0x73, 6}, // Q
{
{SIMDInstructionInfo(0xf1).p66().i(), SIMDInstructionInfo(0x71, 6).p66().i()}, // W
{SIMDInstructionInfo(0xf2).p66().i(), SIMDInstructionInfo(0x72, 6).p66().i()}, // D
{SIMDInstructionInfo(0xf3).p66().i(), SIMDInstructionInfo(0x73, 6).p66().i()}, // Q
};
const xImplSimd_AddSub xPADD =

View File

@ -168,6 +168,25 @@ TEST(CodegenTests, SSETest)
CODEGEN_TEST(xCVTTSD2SI(rdx, xmm4), "f2 48 0f 2c d4");
CODEGEN_TEST(xCVTTSS2SI(ecx, xmm3), "f3 0f 2c cb");
CODEGEN_TEST(xPSLL.W(xmm8, ptr[r8]), "66 45 0f f1 00");
CODEGEN_TEST(xPSLL.D(xmm0, xmm1), "66 0f f2 c1");
CODEGEN_TEST(xPSLL.Q(xmm4, ptr[rcx]), "66 0f f3 21");
CODEGEN_TEST(xPSLL.W(xmm5, 2), "66 0f 71 f5 02");
CODEGEN_TEST(xPSLL.D(xmm6, 3), "66 0f 72 f6 03");
CODEGEN_TEST(xPSLL.Q(xmm7, 4), "66 0f 73 f7 04");
CODEGEN_TEST(xPSLL.DQ(xmm8, 5), "66 41 0f 73 f8 05");
CODEGEN_TEST(xPSRA.W(xmm4, xmm2), "66 0f e1 e2");
CODEGEN_TEST(xPSRA.D(xmm5, ptr[rdi]), "66 0f e2 2f");
CODEGEN_TEST(xPSRA.W(xmm4, 3), "66 0f 71 e4 03");
CODEGEN_TEST(xPSRA.D(xmm5, 7), "66 0f 72 e5 07");
CODEGEN_TEST(xPSRL.W(xmm8, ptr[r8]), "66 45 0f d1 00");
CODEGEN_TEST(xPSRL.D(xmm0, xmm1), "66 0f d2 c1");
CODEGEN_TEST(xPSRL.Q(xmm4, ptr[rcx]), "66 0f d3 21");
CODEGEN_TEST(xPSRL.W(xmm5, 2), "66 0f 71 d5 02");
CODEGEN_TEST(xPSRL.D(xmm6, 3), "66 0f 72 d6 03");
CODEGEN_TEST(xPSRL.Q(xmm7, 4), "66 0f 73 d7 04");
CODEGEN_TEST(xPSRL.DQ(xmm8, 5), "66 41 0f 73 d8 05");
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
@ -217,6 +236,25 @@ TEST(CodegenTests, AVXTest)
CODEGEN_TEST(xCVTTSD2SI(rdx, xmm4), "c4 e1 fb 2c d4");
CODEGEN_TEST(xCVTTSS2SI(ecx, xmm3), "c5 fa 2c cb");
CODEGEN_TEST(xPSLL.W(xmm8, ptr[r8]), "c4 41 39 f1 00");
CODEGEN_TEST(xPSLL.D(xmm0, xmm1), "c5 f9 f2 c1");
CODEGEN_TEST(xPSLL.Q(xmm4, ptr[rcx]), "c5 d9 f3 21");
CODEGEN_TEST(xPSLL.W(xmm5, 2), "c5 d1 71 f5 02");
CODEGEN_TEST(xPSLL.D(xmm6, 3), "c5 c9 72 f6 03");
CODEGEN_TEST(xPSLL.Q(xmm7, 4), "c5 c1 73 f7 04");
CODEGEN_TEST(xPSLL.DQ(xmm8, 5), "c4 c1 39 73 f8 05");
CODEGEN_TEST(xPSRA.W(xmm4, xmm2), "c5 d9 e1 e2");
CODEGEN_TEST(xPSRA.D(xmm5, ptr[rdi]), "c5 d1 e2 2f");
CODEGEN_TEST(xPSRA.W(xmm4, 3), "c5 d9 71 e4 03");
CODEGEN_TEST(xPSRA.D(xmm5, 7), "c5 d1 72 e5 07");
CODEGEN_TEST(xPSRL.W(xmm8, ptr[r8]), "c4 41 39 d1 00");
CODEGEN_TEST(xPSRL.D(xmm0, xmm1), "c5 f9 d2 c1");
CODEGEN_TEST(xPSRL.Q(xmm4, ptr[rcx]), "c5 d9 d3 21");
CODEGEN_TEST(xPSRL.W(xmm5, 2), "c5 d1 71 d5 02");
CODEGEN_TEST(xPSRL.D(xmm6, 3), "c5 c9 72 d6 03");
CODEGEN_TEST(xPSRL.Q(xmm7, 4), "c5 c1 73 d7 04");
CODEGEN_TEST(xPSRL.DQ(xmm8, 5), "c4 c1 39 73 d8 05");
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");