From 4662d0e5dcd4d65b6ae67d5700c16cf078ee20bf Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Sun, 10 Aug 2025 01:14:36 -0500 Subject: [PATCH] Common: Disable and remove ALWAYS_USE_MOVAPS In AVX, MOVAPS is the same size as all the other instructions. In Nehalem (one of the main targets of SSE4), there are bypass penalties for using the wrong mov. --- common/emitter/simd.cpp | 10 --------- common/emitter/x86types.h | 17 -------------- .../common/x86emitter/codegen_tests_main.cpp | 22 ------------------- 3 files changed, 49 deletions(-) diff --git a/common/emitter/simd.cpp b/common/emitter/simd.cpp index 0725f086c6..a5c6ac9065 100644 --- a/common/emitter/simd.cpp +++ b/common/emitter/simd.cpp @@ -34,12 +34,10 @@ namespace x86Emitter __emitinline static SIMDInstructionInfo getMov(SIMDInstructionInfo::Type type) { switch (type) { -#ifndef ALWAYS_USE_MOVAPS case SIMDInstructionInfo::Type::Integer: return SIMDInstructionInfo(0x6f).p66().mov(); case SIMDInstructionInfo::Type::Double: return SIMDInstructionInfo(0x28).p66().mov(); -#endif default: case SIMDInstructionInfo::Type::Float: return SIMDInstructionInfo(0x28).mov(); @@ -712,13 +710,6 @@ namespace x86Emitter SIMDInstructionInfo(0x10).mov(), SIMDInstructionInfo(0x11).mov(), }; -#ifdef ALWAYS_USE_MOVAPS - const xImplSimd_MoveSSE xMOVDQA = xMOVAPS; - const xImplSimd_MoveSSE xMOVAPD = xMOVAPS; - - const xImplSimd_MoveSSE xMOVDQU = xMOVUPS; - const xImplSimd_MoveSSE xMOVUPD = xMOVUPS; -#else const xImplSimd_MoveSSE xMOVDQA = { SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(), SIMDInstructionInfo(0x6f).p66().mov(), SIMDInstructionInfo(0x7f).p66().mov(), @@ -736,7 +727,6 @@ namespace x86Emitter SIMDInstructionInfo(0x28).p66().mov(), SIMDInstructionInfo(0x29).p66().mov(), SIMDInstructionInfo(0x10).p66().mov(), SIMDInstructionInfo(0x11).p66().mov(), }; -#endif const xImplSimd_MovHL xMOVH = {SIMDInstructionInfo(0x16)}; diff --git a/common/emitter/x86types.h b/common/emitter/x86types.h index 63eee59625..d0a2f8c95e 100644 --- a/common/emitter/x86types.h +++ b/common/emitter/x86types.h @@ -50,23 +50,6 @@ namespace x86Emitter template void xWrite(T val); -// -------------------------------------------------------------------------------------- -// ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const] -// -------------------------------------------------------------------------------------- -// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions -// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache -// and some marginal speed gains as a result. (it's possible someday in the future the per- -// formance of the two instructions could change, so this constant is provided to restore MOVDQA -// use easily at a later time, if needed). -// -#define ALWAYS_USE_MOVAPS - -#ifdef ALWAYS_USE_MOVAPS - static const bool AlwaysUseMovaps = true; -#else - static const bool AlwaysUseMovaps = false; -#endif - // -------------------------------------------------------------------------------------- // __emitline - preprocessors definition // -------------------------------------------------------------------------------------- diff --git a/tests/ctest/common/x86emitter/codegen_tests_main.cpp b/tests/ctest/common/x86emitter/codegen_tests_main.cpp index bc19388195..aba7e15eb6 100644 --- a/tests/ctest/common/x86emitter/codegen_tests_main.cpp +++ b/tests/ctest/common/x86emitter/codegen_tests_main.cpp @@ -372,17 +372,10 @@ TEST(CodegenTests, SSETest) CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "41 0f 11 18"); CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "44 0f 28 06"); CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "0f 10 39"); -#ifdef ALWAYS_USE_MOVAPS - CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "41 0f 28 e0"); - CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "0f 28 cc"); - CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "45 0f 28 cb"); - CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "41 0f 28 fa"); -#else CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "66 41 0f 28 e0"); CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "66 0f 28 cc"); CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "66 45 0f 6f cb"); CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "66 41 0f 6f fa"); -#endif CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "66 0f 3a 0c c1 55"); CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa"); @@ -662,20 +655,6 @@ TEST(CodegenTests, AVXTest) CODEGEN_TEST(xMOVUPS(ptr[rax], xmm5), "c5 f8 11 28"); CODEGEN_TEST(xMOVAPS(xmm8, ptr[r8]), "c4 41 78 28 00"); CODEGEN_TEST(xMOVUPS(xmm5, ptr[r9]), "c4 c1 78 10 29"); -#ifdef ALWAYS_USE_MOVAPS - CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 78 29 c4"); - CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f8 28 cc"); - CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 78 29 01"); - CODEGEN_TEST(xMOVUPD(ptr[r8], xmm11), "c4 41 78 11 18"); - CODEGEN_TEST(xMOVAPD(xmm15, ptr[r9]), "c4 41 78 28 39"); - CODEGEN_TEST(xMOVUPD(xmm1, ptr[rax]), "c5 f8 10 08"); - CODEGEN_TEST(xMOVDQA(xmm9, xmm11), "c4 41 78 28 cb"); - CODEGEN_TEST(xMOVDQU(xmm7, xmm10), "c5 78 29 d7"); - CODEGEN_TEST(xMOVDQA(ptr[r9], xmm0), "c4 c1 78 29 01"); - CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 78 11 18"); - CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 78 28 06"); - CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 f8 10 39"); -#else CODEGEN_TEST(xMOVAPD(xmm4, xmm8), "c5 79 29 c4"); CODEGEN_TEST(xMOVUPD(xmm1, xmm4), "c5 f9 28 cc"); CODEGEN_TEST(xMOVAPD(ptr[rcx], xmm8), "c5 79 29 01"); @@ -688,7 +667,6 @@ TEST(CodegenTests, AVXTest) CODEGEN_TEST(xMOVDQU(ptr[r8], xmm3), "c4 c1 7a 7f 18"); CODEGEN_TEST(xMOVDQA(xmm8, ptr[rsi]), "c5 79 6f 06"); CODEGEN_TEST(xMOVDQU(xmm7, ptr[rcx]), "c5 fa 6f 39"); -#endif CODEGEN_TEST(xBLEND.PS(xmm0, xmm1, 0x55), "c4 e3 79 0c c1 55"); CODEGEN_TEST(xBLEND.PD(xmm8, xmm9, 0xaa), "c4 43 39 0d c1 aa");