mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
Common: Disable and remove ALWAYS_USE_MOVAPS
In AVX, MOVAPS is the same size as all the other instructions. In Nehalem (one of the main targets of SSE4), there are bypass penalties for using the wrong mov.
This commit is contained in:
parent
cc5f594384
commit
4662d0e5dc
@ -34,12 +34,10 @@ namespace x86Emitter
|
|||||||
__emitinline static SIMDInstructionInfo getMov(SIMDInstructionInfo::Type type)
|
__emitinline static SIMDInstructionInfo getMov(SIMDInstructionInfo::Type type)
|
||||||
{
|
{
|
||||||
switch (type) {
|
switch (type) {
|
||||||
#ifndef ALWAYS_USE_MOVAPS
|
|
||||||
case SIMDInstructionInfo::Type::Integer:
|
case SIMDInstructionInfo::Type::Integer:
|
||||||
return SIMDInstructionInfo(0x6f).p66().mov();
|
return SIMDInstructionInfo(0x6f).p66().mov();
|
||||||
case SIMDInstructionInfo::Type::Double:
|
case SIMDInstructionInfo::Type::Double:
|
||||||
return SIMDInstructionInfo(0x28).p66().mov();
|
return SIMDInstructionInfo(0x28).p66().mov();
|
||||||
#endif
|
|
||||||
default:
|
default:
|
||||||
case SIMDInstructionInfo::Type::Float:
|
case SIMDInstructionInfo::Type::Float:
|
||||||
return SIMDInstructionInfo(0x28).mov();
|
return SIMDInstructionInfo(0x28).mov();
|
||||||
@ -712,13 +710,6 @@ namespace x86Emitter
|
|||||||
SIMDInstructionInfo(0x10).mov(), SIMDInstructionInfo(0x11).mov(),
|
SIMDInstructionInfo(0x10).mov(), SIMDInstructionInfo(0x11).mov(),
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef ALWAYS_USE_MOVAPS
|
|
||||||
const xImplSimd_MoveSSE xMOVDQA = xMOVAPS;
|
|
||||||
const xImplSimd_MoveSSE xMOVAPD = xMOVAPS;
|
|
||||||
|
|
||||||
const xImplSimd_MoveSSE xMOVDQU = xMOVUPS;
|
|
||||||
const xImplSimd_MoveSSE xMOVUPD = xMOVUPS;
|
|
||||||
#else
|
|
||||||
const xImplSimd_MoveSSE xMOVDQA = {
|
const xImplSimd_MoveSSE xMOVDQA = {
|
||||||
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
|
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
|
||||||
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
|
SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(),
|
||||||
@ -736,7 +727,6 @@ namespace x86Emitter
|
|||||||
SIMDInstructionInfo(0x28).p66().mov(), SIMDInstructionInfo(0x29).p66().mov(),
|
SIMDInstructionInfo(0x28).p66().mov(), SIMDInstructionInfo(0x29).p66().mov(),
|
||||||
SIMDInstructionInfo(0x10).p66().mov(), SIMDInstructionInfo(0x11).p66().mov(),
|
SIMDInstructionInfo(0x10).p66().mov(), SIMDInstructionInfo(0x11).p66().mov(),
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
const xImplSimd_MovHL xMOVH = {SIMDInstructionInfo(0x16)};
|
const xImplSimd_MovHL xMOVH = {SIMDInstructionInfo(0x16)};
|
||||||
|
|||||||
@ -50,23 +50,6 @@ namespace x86Emitter
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void xWrite(T val);
|
void xWrite(T val);
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
|
||||||
// ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const]
|
|
||||||
// --------------------------------------------------------------------------------------
|
|
||||||
// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions
|
|
||||||
// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache
|
|
||||||
// and some marginal speed gains as a result. (it's possible someday in the future the per-
|
|
||||||
// formance of the two instructions could change, so this constant is provided to restore MOVDQA
|
|
||||||
// use easily at a later time, if needed).
|
|
||||||
//
|
|
||||||
#define ALWAYS_USE_MOVAPS
|
|
||||||
|
|
||||||
#ifdef ALWAYS_USE_MOVAPS
|
|
||||||
static const bool AlwaysUseMovaps = true;
|
|
||||||
#else
|
|
||||||
static const bool AlwaysUseMovaps = false;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
// __emitline - preprocessors definition
|
// __emitline - preprocessors definition
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
|
|||||||
@ -372,17 +372,10 @@ TEST(CodegenTests, SSETest)
|
|||||||
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "41 0f 11 18");
|
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "41 0f 11 18");
|
||||||
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "44 0f 28 06");
|
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "44 0f 28 06");
|
||||||
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "0f 10 39");
|
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "0f 10 39");
|
||||||
#ifdef ALWAYS_USE_MOVAPS
|
|
||||||
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "41 0f 28 e0");
|
|
||||||
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "0f 28 cc");
|
|
||||||
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "45 0f 28 cb");
|
|
||||||
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "41 0f 28 fa");
|
|
||||||
#else
|
|
||||||
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "66 41 0f 28 e0");
|
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "66 41 0f 28 e0");
|
||||||
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "66 0f 28 cc");
|
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "66 0f 28 cc");
|
||||||
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "66 45 0f 6f cb");
|
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "66 45 0f 6f cb");
|
||||||
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "66 41 0f 6f fa");
|
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "66 41 0f 6f fa");
|
||||||
#endif
|
|
||||||
|
|
||||||
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55");
|
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55");
|
||||||
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa");
|
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa");
|
||||||
@ -662,20 +655,6 @@ TEST(CodegenTests, AVXTest)
|
|||||||
CODEGEN_TEST(xMOVUPS(ptr[rax], xmm5), "c5 f8 11 28");
|
CODEGEN_TEST(xMOVUPS(ptr[rax], xmm5), "c5 f8 11 28");
|
||||||
CODEGEN_TEST(xMOVAPS(xmm8, ptr[r8]), "c4 41 78 28 00");
|
CODEGEN_TEST(xMOVAPS(xmm8, ptr[r8]), "c4 41 78 28 00");
|
||||||
CODEGEN_TEST(xMOVUPS(xmm5, ptr[r9]), "c4 c1 78 10 29");
|
CODEGEN_TEST(xMOVUPS(xmm5, ptr[r9]), "c4 c1 78 10 29");
|
||||||
#ifdef ALWAYS_USE_MOVAPS
|
|
||||||
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 78 29 c4");
|
|
||||||
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f8 28 cc");
|
|
||||||
CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 78 29 01");
|
|
||||||
CODEGEN_TEST(xMOVUPD(ptr[r8], xmm11), "c4 41 78 11 18");
|
|
||||||
CODEGEN_TEST(xMOVAPD(xmm15, ptr[r9]), "c4 41 78 28 39");
|
|
||||||
CODEGEN_TEST(xMOVUPD(xmm1, ptr[rax]), "c5 f8 10 08");
|
|
||||||
CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "c4 41 78 28 cb");
|
|
||||||
CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "c5 78 29 d7");
|
|
||||||
CODEGEN_TEST(xMOVDQA(ptr[r9], xmm0), "c4 c1 78 29 01");
|
|
||||||
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 78 11 18");
|
|
||||||
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 78 28 06");
|
|
||||||
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 f8 10 39");
|
|
||||||
#else
|
|
||||||
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 79 29 c4");
|
CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 79 29 c4");
|
||||||
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f9 28 cc");
|
CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f9 28 cc");
|
||||||
CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 79 29 01");
|
CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 79 29 01");
|
||||||
@ -688,7 +667,6 @@ TEST(CodegenTests, AVXTest)
|
|||||||
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 7a 7f 18");
|
CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 7a 7f 18");
|
||||||
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 79 6f 06");
|
CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 79 6f 06");
|
||||||
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 fa 6f 39");
|
CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 fa 6f 39");
|
||||||
#endif
|
|
||||||
|
|
||||||
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "c4 e3 79 0c c1 55");
|
CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "c4 e3 79 0c c1 55");
|
||||||
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "c4 43 39 0d c1 aa");
|
CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "c4 43 39 0d c1 aa");
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user