Common: Disable and remove ALWAYS_USE_MOVAPS

In AVX, MOVAPS is the same size as all the other instructions.

In Nehalem (one of the main targets of SSE4), there are bypass penalties for using the wrong mov.
This commit is contained in:
TellowKrinkle 2025-08-10 01:14:36 -05:00 committed by TellowKrinkle
parent cc5f594384
commit 4662d0e5dc
3 changed files with 0 additions and 49 deletions

View File

@ -34,12 +34,10 @@ namespace x86Emitter
__emitinline static SIMDInstructionInfo getMov(SIMDInstructionInfo::Type type) __emitinline static SIMDInstructionInfo getMov(SIMDInstructionInfo::Type type)
{ {
switch (type) { switch (type) {
#ifndef ALWAYS_USE_MOVAPS
case SIMDInstructionInfo::Type::Integer: case SIMDInstructionInfo::Type::Integer:
return SIMDInstructionInfo(0x6f).p66().mov(); return SIMDInstructionInfo(0x6f).p66().mov();
case SIMDInstructionInfo::Type::Double: case SIMDInstructionInfo::Type::Double:
return SIMDInstructionInfo(0x28).p66().mov(); return SIMDInstructionInfo(0x28).p66().mov();
#endif
default: default:
case SIMDInstructionInfo::Type::Float: case SIMDInstructionInfo::Type::Float:
return SIMDInstructionInfo(0x28).mov(); return SIMDInstructionInfo(0x28).mov();
@ -712,13 +710,6 @@ namespace x86Emitter
SIMDInstructionInfo(0x10).mov(), SIMDInstructionInfo(0x11).mov(), SIMDInstructionInfo(0x10).mov(), SIMDInstructionInfo(0x11).mov(),
}; };
#ifdef ALWAYS_USE_MOVAPS
const xImplSimd_MoveSSE xMOVDQA = xMOVAPS;
const xImplSimd_MoveSSE xMOVAPD = xMOVAPS;
const xImplSimd_MoveSSE xMOVDQU = xMOVUPS;
const xImplSimd_MoveSSE xMOVUPD = xMOVUPS;
#else
const xImplSimd_MoveSSE xMOVDQA = { const xImplSimd_MoveSSE xMOVDQA = {
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(), SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(), SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
@ -736,7 +727,6 @@ namespace x86Emitter
SIMDInstructionInfo(0x28).p66().mov(), SIMDInstructionInfo(0x29).p66().mov(), SIMDInstructionInfo(0x28).p66().mov(), SIMDInstructionInfo(0x29).p66().mov(),
SIMDInstructionInfo(0x10).p66().mov(), SIMDInstructionInfo(0x11).p66().mov(), SIMDInstructionInfo(0x10).p66().mov(), SIMDInstructionInfo(0x11).p66().mov(),
}; };
#endif
const xImplSimd_MovHL xMOVH = {SIMDInstructionInfo(0x16)}; const xImplSimd_MovHL xMOVH = {SIMDInstructionInfo(0x16)};

View File

@ -50,23 +50,6 @@ namespace x86Emitter
template <typename T> template <typename T>
void xWrite(T val); void xWrite(T val);
// --------------------------------------------------------------------------------------
// ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const]
// --------------------------------------------------------------------------------------
// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions
// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache
// and some marginal speed gains as a result. (it's possible someday in the future the per-
// formance of the two instructions could change, so this constant is provided to restore MOVDQA
// use easily at a later time, if needed).
//
#define ALWAYS_USE_MOVAPS
#ifdef ALWAYS_USE_MOVAPS
static const bool AlwaysUseMovaps = true;
#else
static const bool AlwaysUseMovaps = false;
#endif
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// __emitline - preprocessors definition // __emitline - preprocessors definition
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------

View File

@ -372,17 +372,10 @@ TEST(CodegenTests, SSETest)
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "41 0f 11 18"); CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "41 0f 11 18");
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "44 0f 28 06"); CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "44 0f 28 06");
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "0f 10 39"); CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "0f 10 39");
#ifdef ALWAYS_USE_MOVAPS
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "41 0f 28 e0");
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "0f 28 cc");
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "45 0f 28 cb");
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "41 0f 28 fa");
#else
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "66 41 0f 28 e0"); CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "66 41 0f 28 e0");
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "66 0f 28 cc"); CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "66 0f 28 cc");
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "66 45 0f 6f cb"); CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "66 45 0f 6f cb");
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "66 41 0f 6f fa"); CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "66 41 0f 6f fa");
#endif
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55"); CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55");
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa"); CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa");
@ -662,20 +655,6 @@ TEST(CodegenTests, AVXTest)
CODEGEN_TEST(xMOVUPS(ptr[rax], xmm5), "c5 f8 11 28"); CODEGEN_TEST(xMOVUPS(ptr[rax], xmm5), "c5 f8 11 28");
CODEGEN_TEST(xMOVAPS(xmm8, ptr[r8]), "c4 41 78 28 00"); CODEGEN_TEST(xMOVAPS(xmm8, ptr[r8]), "c4 41 78 28 00");
CODEGEN_TEST(xMOVUPS(xmm5, ptr[r9]), "c4 c1 78 10 29"); CODEGEN_TEST(xMOVUPS(xmm5, ptr[r9]), "c4 c1 78 10 29");
#ifdef ALWAYS_USE_MOVAPS
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 78 29 c4");
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f8 28 cc");
CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 78 29 01");
CODEGEN_TEST(xMOVUPD(ptr[r8], xmm11), "c4 41 78 11 18");
CODEGEN_TEST(xMOVAPD(xmm15, ptr[r9]), "c4 41 78 28 39");
CODEGEN_TEST(xMOVUPD(xmm1, ptr[rax]), "c5 f8 10 08");
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "c4 41 78 28 cb");
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "c5 78 29 d7");
CODEGEN_TEST(xMOVDQA(ptr[r9], xmm0), "c4 c1 78 29 01");
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 78 11 18");
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 78 28 06");
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 f8 10 39");
#else
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 79 29 c4"); CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 79 29 c4");
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f9 28 cc"); CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f9 28 cc");
CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 79 29 01"); CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 79 29 01");
@ -688,7 +667,6 @@ TEST(CodegenTests, AVXTest)
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 7a 7f 18"); CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 7a 7f 18");
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 79 6f 06"); CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 79 6f 06");
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 fa 6f 39"); CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 fa 6f 39");
#endif
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "c4 e3 79 0c c1 55"); CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "c4 e3 79 0c c1 55");
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "c4 43 39 0d c1 aa"); CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "c4 43 39 0d c1 aa");