pcsx2/common/emitter/implement/simd_shufflepack.h

205 lines
9.6 KiB
C++

// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
// SPDX-License-Identifier: GPL-3.0+
#pragma once
namespace x86Emitter
{
// --------------------------------------------------------------------------------------
// xImplSimd_Shuffle
// --------------------------------------------------------------------------------------
struct xImplSimd_Shuffle
{
inline void _selector_assertion_check(u8 selector) const;
void PS(const xRegisterSSE& dst, const xRegisterSSE& src, u8 selector) const { PS(dst, dst, src, selector); }
void PS(const xRegisterSSE& dst, const xIndirectVoid& src, u8 selector) const { PS(dst, dst, src, selector); }
void PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 selector) const;
void PS(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 selector) const;
void PD(const xRegisterSSE& dst, const xRegisterSSE& src, u8 selector) const { PD(dst, dst, src, selector); }
void PD(const xRegisterSSE& dst, const xIndirectVoid& src, u8 selector) const { PD(dst, dst, src, selector); }
void PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegisterSSE& src2, u8 selector) const;
void PD(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirectVoid& src2, u8 selector) const;
};
// --------------------------------------------------------------------------------------
// xImplSimd_PShuffle
// --------------------------------------------------------------------------------------
struct xImplSimd_PShuffle
{
// Copies doublewords from src and inserts them into dest at dword locations selected
// with the order operand (8 bit immediate).
const xImplSimd_2ArgImm D;
// Copies words from the low quadword of src and inserts them into the low quadword
// of dest at word locations selected with the order operand (8 bit immediate).
// The high quadword of src is copied to the high quadword of dest.
const xImplSimd_2ArgImm LW;
// Copies words from the high quadword of src and inserts them into the high quadword
// of dest at word locations selected with the order operand (8 bit immediate).
// The low quadword of src is copied to the low quadword of dest.
const xImplSimd_2ArgImm HW;
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
// control mask in src. If the most significant bit (bit[7]) of each byte of the
// shuffle control mask is set, then constant zero is written in the result byte.
// Each byte in the shuffle control mask forms an index to permute the corresponding
// byte in dest. The value of each index is the least significant 4 bits (128-bit
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
//
const xImplSimd_3Arg B;
};
// --------------------------------------------------------------------------------------
// SimdImpl_PUnpack
// --------------------------------------------------------------------------------------
struct SimdImpl_PUnpack
{
// Unpack and interleave low-order bytes from src and dest into dest.
const xImplSimd_3Arg LBW;
// Unpack and interleave low-order words from src and dest into dest.
const xImplSimd_3Arg LWD;
// Unpack and interleave low-order doublewords from src and dest into dest.
const xImplSimd_3Arg LDQ;
// Unpack and interleave low-order quadwords from src and dest into dest.
const xImplSimd_3Arg LQDQ;
// Unpack and interleave high-order bytes from src and dest into dest.
const xImplSimd_3Arg HBW;
// Unpack and interleave high-order words from src and dest into dest.
const xImplSimd_3Arg HWD;
// Unpack and interleave high-order doublewords from src and dest into dest.
const xImplSimd_3Arg HDQ;
// Unpack and interleave high-order quadwords from src and dest into dest.
const xImplSimd_3Arg HQDQ;
};
// --------------------------------------------------------------------------------------
// SimdImpl_Pack
// --------------------------------------------------------------------------------------
// Pack with Signed or Unsigned Saturation
//
struct SimdImpl_Pack
{
// Converts packed signed word integers from src and dest into packed signed
// byte integers in dest, using signed saturation.
const xImplSimd_3Arg SSWB;
// Converts packed signed dword integers from src and dest into packed signed
// word integers in dest, using signed saturation.
const xImplSimd_3Arg SSDW;
// Converts packed unsigned word integers from src and dest into packed unsigned
// byte integers in dest, using unsigned saturation.
const xImplSimd_3Arg USWB;
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
// unsigned word integers in dest, using signed saturation.
const xImplSimd_3Arg USDW;
};
// --------------------------------------------------------------------------------------
// SimdImpl_Unpack
// --------------------------------------------------------------------------------------
struct xImplSimd_Unpack
{
// Unpacks the high doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest[0] <- dest[2]
// dest[1] <- src[2]
// dest[2] <- dest[3]
// dest[3] <- src[3]
//
const xImplSimd_3Arg HPS;
// Unpacks the high quadword [double-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest.lo <- dest.hi
// dest.hi <- src.hi
//
const xImplSimd_3Arg HPD;
// Unpacks the low doubleword [single-precision] values from src and dest into
// dest, such that the result of dest looks like this:
// dest[3] <- src[1]
// dest[2] <- dest[1]
// dest[1] <- src[0]
// dest[0] <- dest[0]
//
const xImplSimd_3Arg LPS;
// Unpacks the low quadword [double-precision] values from src and dest into
// dest, effectively moving the low portion of src into the upper portion of dest.
// The result of dest is loaded as such:
// dest.hi <- src.lo
// dest.lo <- dest.lo [remains unchanged!]
//
const xImplSimd_3Arg LPD;
};
// --------------------------------------------------------------------------------------
// SimdImpl_PInsert
// --------------------------------------------------------------------------------------
// PINSRW/B/D [all but Word form are SSE4.1 only!]
//
struct xImplSimd_PInsert
{
void B(const xRegisterSSE& dst, const xRegister32& src, u8 imm8) const { B(dst, dst, src, imm8); }
void B(const xRegisterSSE& dst, const xIndirect8& src, u8 imm8) const { B(dst, dst, src, imm8); }
void B(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const;
void B(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect8& src2, u8 imm8) const;
void W(const xRegisterSSE& dst, const xRegister32& src, u8 imm8) const { W(dst, dst, src, imm8); }
void W(const xRegisterSSE& dst, const xIndirect16& src, u8 imm8) const { W(dst, dst, src, imm8); }
void W(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const;
void W(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect16& src2, u8 imm8) const;
void D(const xRegisterSSE& dst, const xRegister32& src, u8 imm8) const { D(dst, dst, src, imm8); }
void D(const xRegisterSSE& dst, const xIndirect32& src, u8 imm8) const { D(dst, dst, src, imm8); }
void D(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister32& src2, u8 imm8) const;
void D(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect32& src2, u8 imm8) const;
void Q(const xRegisterSSE& dst, const xRegister64& src, u8 imm8) const { Q(dst, dst, src, imm8); }
void Q(const xRegisterSSE& dst, const xIndirect64& src, u8 imm8) const { Q(dst, dst, src, imm8); }
void Q(const xRegisterSSE& dst, const xRegisterSSE& src1, const xRegister64& src2, u8 imm8) const;
void Q(const xRegisterSSE& dst, const xRegisterSSE& src1, const xIndirect64& src2, u8 imm8) const;
};
//////////////////////////////////////////////////////////////////////////////////////////
// PEXTRW/B/D [all but Word form are SSE4.1 only!]
//
// Note: Word form's indirect memory form is only available in SSE4.1.
//
struct SimdImpl_PExtract
{
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// byte value from src into an x86 32 bit register.
void B(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const;
void B(const xIndirect8& dst, const xRegisterSSE& src, u8 imm8) const;
// Copies the word element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// word value from src into an x86 32 bit register.
//
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
//
void W(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const;
void W(const xIndirect16& dst, const xRegisterSSE& src, u8 imm8) const;
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
// used to extract any single packed dword value from src into an x86 32 bit register.
void D(const xRegister32& dst, const xRegisterSSE& src, u8 imm8) const;
void D(const xIndirect32& dst, const xRegisterSSE& src, u8 imm8) const;
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
// used to extract any single packed dword value from src into an x86 64 bit register.
void Q(const xRegister64& dst, const xRegisterSSE& src, u8 imm8) const;
void Q(const xIndirect64& dst, const xRegisterSSE& src, u8 imm8) const;
};
} // namespace x86Emitter