mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
Common: Switch simd mov to auto SSE/AVX
This commit is contained in:
parent
a052a43b84
commit
a045c917e7
@ -44,7 +44,7 @@ namespace x86Emitter
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_MoveSSE
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD
|
||||
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD / MOVDQA / MOVDQU
|
||||
//
|
||||
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||
@ -52,27 +52,10 @@ namespace x86Emitter
|
||||
//
|
||||
struct xImplSimd_MoveSSE
|
||||
{
|
||||
u8 Prefix;
|
||||
bool isAligned;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xIndirectVoid& to, const xRegisterSSE& from) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_MoveDQ
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Implementations for MOVDQA / MOVDQU
|
||||
//
|
||||
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||
// which can be checked for alignment at runtime.
|
||||
|
||||
struct xImplSimd_MoveDQ
|
||||
{
|
||||
u8 Prefix;
|
||||
bool isAligned;
|
||||
SIMDInstructionInfo aligned_load;
|
||||
SIMDInstructionInfo aligned_store;
|
||||
SIMDInstructionInfo unaligned_load;
|
||||
SIMDInstructionInfo unaligned_store;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
|
||||
@ -486,14 +486,8 @@ namespace x86Emitter
|
||||
extern const xImplSimd_MoveSSE xMOVUPS;
|
||||
extern const xImplSimd_MoveSSE xMOVAPD;
|
||||
extern const xImplSimd_MoveSSE xMOVUPD;
|
||||
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
extern const xImplSimd_MoveSSE xMOVDQA;
|
||||
extern const xImplSimd_MoveSSE xMOVDQU;
|
||||
#else
|
||||
extern const xImplSimd_MoveDQ xMOVDQA;
|
||||
extern const xImplSimd_MoveDQ xMOVDQU;
|
||||
#endif
|
||||
|
||||
extern const xImplSimd_MovHL xMOVH;
|
||||
extern const xImplSimd_MovHL xMOVL;
|
||||
|
||||
@ -647,53 +647,55 @@ namespace x86Emitter
|
||||
void xImplSimd_MovHL_RtoR::PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(info, dst, src1, src2); }
|
||||
void xImplSimd_MovHL_RtoR::PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2) const { EmitSIMD(info.p66(), dst, src1, src2); }
|
||||
|
||||
static const u16 MovPS_OpAligned = 0x28; // Aligned [aps] form
|
||||
static const u16 MovPS_OpUnaligned = 0x10; // unaligned [ups] form
|
||||
|
||||
void xImplSimd_MoveSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const
|
||||
static bool IsAligned(const xRegisterSSE& reg, const xIndirectVoid& mem)
|
||||
{
|
||||
if (to != from)
|
||||
xOpWrite0F(Prefix, MovPS_OpAligned, to, from);
|
||||
u32 mask = reg.GetOperandSize() - 1;
|
||||
// Aligned if it's displacement-only and the displacement is aligned
|
||||
if (mem.Displacement & mask)
|
||||
return false;
|
||||
return mem.Index.IsEmpty() && mem.Base.IsEmpty();
|
||||
}
|
||||
|
||||
void xImplSimd_MoveSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const
|
||||
static const xImplSimd_MoveSSE& GetLoadStoreOp(const xImplSimd_MoveSSE* op)
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || (((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty());
|
||||
|
||||
xOpWrite0F(Prefix, isReallyAligned ? MovPS_OpAligned : MovPS_OpUnaligned, to, from);
|
||||
if (!x86Emitter::use_avx)
|
||||
{
|
||||
// movaps is shorter, and no processor differentiates between the various movs for load/store
|
||||
const bool aligned = std::bit_cast<u32>(op->aligned_load) == std::bit_cast<u32>(op->unaligned_load);
|
||||
return aligned ? xMOVAPS : xMOVUPS;
|
||||
}
|
||||
return *op;
|
||||
}
|
||||
|
||||
void xImplSimd_MoveSSE::operator()(const xIndirectVoid& to, const xRegisterSSE& from) const
|
||||
void xImplSimd_MoveSSE::operator()(const xRegisterSSE& dst, const xRegisterSSE& src) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ((to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty());
|
||||
xOpWrite0F(Prefix, isReallyAligned ? MovPS_OpAligned + 1 : MovPS_OpUnaligned + 1, from, to);
|
||||
if (dst.GetId() == src.GetId() && dst.GetOperandSize() == src.GetOperandSize())
|
||||
return;
|
||||
SIMDInstructionInfo info = aligned_load;
|
||||
const xRegisterSSE* arg0 = &dst;
|
||||
const xRegisterSSE* arg1 = &src;
|
||||
if (x86Emitter::use_avx)
|
||||
{
|
||||
if (arg1->IsExtended() && !arg0->IsExtended())
|
||||
{
|
||||
// Can save a byte by using the store opcode
|
||||
info = aligned_store;
|
||||
std::swap(arg0, arg1);
|
||||
}
|
||||
}
|
||||
EmitSIMD(info, *arg0, *arg0, *arg1);
|
||||
}
|
||||
|
||||
static const u8 MovDQ_PrefixAligned = 0x66; // Aligned [dqa] form
|
||||
static const u8 MovDQ_PrefixUnaligned = 0xf3; // unaligned [dqu] form
|
||||
|
||||
void xImplSimd_MoveDQ::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const
|
||||
void xImplSimd_MoveSSE::operator()(const xRegisterSSE& dst, const xIndirectVoid& src) const
|
||||
{
|
||||
if (to != from)
|
||||
xOpWrite0F(MovDQ_PrefixAligned, 0x6f, to, from);
|
||||
const xImplSimd_MoveSSE& op = GetLoadStoreOp(this);
|
||||
EmitSIMD(IsAligned(dst, src) ? op.aligned_load : op.unaligned_load, dst, dst, src);
|
||||
}
|
||||
|
||||
void xImplSimd_MoveDQ::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const
|
||||
void xImplSimd_MoveSSE::operator()(const xIndirectVoid& dst, const xRegisterSSE& src) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ((from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty());
|
||||
xOpWrite0F(isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x6f, to, from);
|
||||
}
|
||||
|
||||
void xImplSimd_MoveDQ::operator()(const xIndirectVoid& to, const xRegisterSSE& from) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ((to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty());
|
||||
|
||||
// use opcode 0x7f : alternate ModRM encoding (reverse src/dst)
|
||||
xOpWrite0F(isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x7f, from, to);
|
||||
const xImplSimd_MoveSSE& op = GetLoadStoreOp(this);
|
||||
EmitSIMD(IsAligned(src, dst) ? aligned_store : op.unaligned_store, src, src, dst);
|
||||
}
|
||||
|
||||
void xImplSimd_PMove::BW(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase); }
|
||||
@ -715,21 +717,39 @@ namespace x86Emitter
|
||||
void xImplSimd_PMove::DQ(const xRegisterSSE& to, const xIndirect64& from) const { OpWriteSSE(0x66, OpcodeBase + 0x500); }
|
||||
|
||||
|
||||
const xImplSimd_MoveSSE xMOVAPS = {0x00, true};
|
||||
const xImplSimd_MoveSSE xMOVUPS = {0x00, false};
|
||||
const xImplSimd_MoveSSE xMOVAPS = {
|
||||
SIMDInstructionInfo(0x28).mov(), SIMDInstructionInfo(0x29).mov(),
|
||||
SIMDInstructionInfo(0x28).mov(), SIMDInstructionInfo(0x29).mov(),
|
||||
};
|
||||
const xImplSimd_MoveSSE xMOVUPS = {
|
||||
SIMDInstructionInfo(0x28).mov(), SIMDInstructionInfo(0x29).mov(),
|
||||
SIMDInstructionInfo(0x10).mov(), SIMDInstructionInfo(0x11).mov(),
|
||||
};
|
||||
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
const xImplSimd_MoveSSE xMOVDQA = {0x00, true};
|
||||
const xImplSimd_MoveSSE xMOVAPD = {0x00, true};
|
||||
const xImplSimd_MoveSSE xMOVDQA = xMOVAPS;
|
||||
const xImplSimd_MoveSSE xMOVAPD = xMOVAPS;
|
||||
|
||||
const xImplSimd_MoveSSE xMOVDQU = {0x00, false};
|
||||
const xImplSimd_MoveSSE xMOVUPD = {0x00, false};
|
||||
const xImplSimd_MoveSSE xMOVDQU = xMOVUPS;
|
||||
const xImplSimd_MoveSSE xMOVUPD = xMOVUPS;
|
||||
#else
|
||||
const xImplSimd_MoveDQ xMOVDQA = {0x66, true};
|
||||
const xImplSimd_MoveSSE xMOVAPD = {0x66, true};
|
||||
const xImplSimd_MoveSSE xMOVDQA = {
|
||||
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
|
||||
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
|
||||
};
|
||||
const xImplSimd_MoveSSE xMOVDQU = {
|
||||
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
|
||||
SIMDInstructionInfo(0x6f).pf3().mov(), SIMDInstructionInfo(0x7f).pf3().mov(),
|
||||
};
|
||||
|
||||
const xImplSimd_MoveDQ xMOVDQU = {0xf3, false};
|
||||
const xImplSimd_MoveSSE xMOVUPD = {0x66, false};
|
||||
const xImplSimd_MoveSSE xMOVAPD = {
|
||||
SIMDInstructionInfo(0x28).p66().mov(), SIMDInstructionInfo(0x29).p66().mov(),
|
||||
SIMDInstructionInfo(0x28).p66().mov(), SIMDInstructionInfo(0x29).p66().mov(),
|
||||
};
|
||||
const xImplSimd_MoveSSE xMOVUPD = {
|
||||
SIMDInstructionInfo(0x28).p66().mov(), SIMDInstructionInfo(0x29).p66().mov(),
|
||||
SIMDInstructionInfo(0x10).p66().mov(), SIMDInstructionInfo(0x11).p66().mov(),
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@ -337,10 +337,32 @@ TEST(CodegenTests, SSETest)
|
||||
CODEGEN_TEST(xMOVHL.PS(xmm4, xmm9), "41 0f 12 e1");
|
||||
CODEGEN_TEST(xMOVLH.PS(xmm2, xmm1), "0f 16 d1");
|
||||
|
||||
CODEGEN_TEST(xMOVAPS(xmm0, xmm1), "0f 28 c1");
|
||||
CODEGEN_TEST(xMOVAPS(xmm8, xmm9), "45 0f 28 c1");
|
||||
CODEGEN_TEST(xMOVUPS(xmm8, ptr128[r8+r9]), "47 0f 10 04 08");
|
||||
CODEGEN_TEST(xMOVAPS(ptr128[rax+r9], xmm8), "46 0f 29 04 08");
|
||||
CODEGEN_TEST(xMOVAPS(xmm0, xmm8), "41 0f 28 c0");
|
||||
CODEGEN_TEST(xMOVUPS(xmm8, xmm3), "44 0f 28 c3");
|
||||
CODEGEN_TEST(xMOVAPS(ptr[r8], xmm4), "41 0f 29 20");
|
||||
CODEGEN_TEST(xMOVUPS(ptr[rax], xmm5), "0f 11 28");
|
||||
CODEGEN_TEST(xMOVAPS(xmm8, ptr[r8]), "45 0f 28 00");
|
||||
CODEGEN_TEST(xMOVUPS(xmm5, ptr[r9]), "41 0f 10 29");
|
||||
CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "44 0f 29 01");
|
||||
CODEGEN_TEST(xMOVUPD(ptr[r8], xmm11), "45 0f 11 18");
|
||||
CODEGEN_TEST(xMOVAPD(xmm15, ptr[r9]), "45 0f 28 39");
|
||||
CODEGEN_TEST(xMOVUPD(xmm1, ptr[rax]), "0f 10 08");
|
||||
CODEGEN_TEST(xMOVDQA(ptr[r9], xmm0), "41 0f 29 01");
|
||||
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "41 0f 11 18");
|
||||
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "44 0f 28 06");
|
||||
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "0f 10 39");
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "41 0f 28 e0");
|
||||
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "0f 28 cc");
|
||||
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "45 0f 28 cb");
|
||||
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "41 0f 28 fa");
|
||||
#else
|
||||
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "66 41 0f 28 e0");
|
||||
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "66 0f 28 cc");
|
||||
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "66 45 0f 6f cb");
|
||||
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "66 41 0f 6f fa");
|
||||
#endif
|
||||
|
||||
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55");
|
||||
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa");
|
||||
CODEGEN_TEST(xPBLEND.W(xmm0, xmm1, 0x55), "66 0f 3a 0e c1 55");
|
||||
@ -545,6 +567,40 @@ TEST(CodegenTests, AVXTest)
|
||||
CODEGEN_TEST(xMOVHL.PS(xmm4, xmm9), "c4 c1 58 12 e1");
|
||||
CODEGEN_TEST(xMOVLH.PS(xmm2, xmm1), "c5 e8 16 d1");
|
||||
|
||||
CODEGEN_TEST(xMOVAPS(xmm0, xmm8), "c5 78 29 c0");
|
||||
CODEGEN_TEST(xMOVUPS(xmm8, xmm3), "c5 78 28 c3");
|
||||
CODEGEN_TEST(xMOVAPS(ptr[r8], xmm4), "c4 c1 78 29 20");
|
||||
CODEGEN_TEST(xMOVUPS(ptr[rax], xmm5), "c5 f8 11 28");
|
||||
CODEGEN_TEST(xMOVAPS(xmm8, ptr[r8]), "c4 41 78 28 00");
|
||||
CODEGEN_TEST(xMOVUPS(xmm5, ptr[r9]), "c4 c1 78 10 29");
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 78 29 c4");
|
||||
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f8 28 cc");
|
||||
CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 78 29 01");
|
||||
CODEGEN_TEST(xMOVUPD(ptr[r8], xmm11), "c4 41 78 11 18");
|
||||
CODEGEN_TEST(xMOVAPD(xmm15, ptr[r9]), "c4 41 78 28 39");
|
||||
CODEGEN_TEST(xMOVUPD(xmm1, ptr[rax]), "c5 f8 10 08");
|
||||
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "c4 41 78 28 cb");
|
||||
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "c5 78 29 d7");
|
||||
CODEGEN_TEST(xMOVDQA(ptr[r9], xmm0), "c4 c1 78 29 01");
|
||||
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 78 11 18");
|
||||
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 78 28 06");
|
||||
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 f8 10 39");
|
||||
#else
|
||||
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 79 29 c4");
|
||||
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f9 28 cc");
|
||||
CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 79 29 01");
|
||||
CODEGEN_TEST(xMOVUPD(ptr[r8], xmm11), "c4 41 79 11 18");
|
||||
CODEGEN_TEST(xMOVAPD(xmm15, ptr[r9]), "c4 41 79 28 39");
|
||||
CODEGEN_TEST(xMOVUPD(xmm1, ptr[rax]), "c5 f9 10 08");
|
||||
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "c4 41 79 6f cb");
|
||||
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "c5 79 7f d7");
|
||||
CODEGEN_TEST(xMOVDQA(ptr[r9], xmm0), "c4 c1 79 7f 01");
|
||||
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 7a 7f 18");
|
||||
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 79 6f 06");
|
||||
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 fa 6f 39");
|
||||
#endif
|
||||
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
|
||||
CODEGEN_TEST(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
|
||||
CODEGEN_TEST(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user