3rdparty: Update xbyak to v7.30

Signed-off-by: SternXD <stern@sidestore.io>
This commit is contained in:
SternXD 2025-11-26 14:23:39 -05:00 committed by Ty
parent cf0bf4db5a
commit ee6b080fa2
3 changed files with 262 additions and 185 deletions

View File

@ -123,8 +123,10 @@
#define XBYAK_TLS thread_local #define XBYAK_TLS thread_local
#define XBYAK_VARIADIC_TEMPLATE #define XBYAK_VARIADIC_TEMPLATE
#define XBYAK_NOEXCEPT noexcept #define XBYAK_NOEXCEPT noexcept
#define XBYAK_OVERRIDE override
#else #else
#define XBYAK_NOEXCEPT throw() #define XBYAK_NOEXCEPT throw()
#define XBYAK_OVERRIDE
#endif #endif
// require c++14 or later // require c++14 or later
@ -161,7 +163,7 @@ namespace Xbyak {
enum { enum {
DEFAULT_MAX_CODE_SIZE = 4096, DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7270 /* 0xABCD = A.BC(.D) */ VERSION = 0x7300 /* 0xABCD = A.BC(.D) */
}; };
#ifndef MIE_INTEGER_TYPE_DEFINED #ifndef MIE_INTEGER_TYPE_DEFINED
@ -340,7 +342,7 @@ public:
} }
} }
operator int() const { return err_; } operator int() const { return err_; }
const char *what() const XBYAK_NOEXCEPT const char *what() const XBYAK_NOEXCEPT XBYAK_OVERRIDE
{ {
return ConvertErrorToString(err_); return ConvertErrorToString(err_);
} }
@ -384,11 +386,6 @@ inline void AlignedFree(void *p)
#endif #endif
} }
template<class To, class From>
inline const To CastTo(From p) XBYAK_NOEXCEPT
{
return (const To)(size_t)(p);
}
namespace inner { namespace inner {
#ifdef _WIN32 #ifdef _WIN32
@ -434,6 +431,14 @@ enum LabelMode {
LaddTop // (addr + top) for mov(reg, label) with AutoGrow LaddTop // (addr + top) for mov(reg, label) with AutoGrow
}; };
enum AddressMode {
M_none,
M_ModRM,
M_64bitDisp,
M_rip,
M_ripAddr
};
} // inner } // inner
/* /*
@ -487,7 +492,7 @@ class MmapAllocator : public Allocator {
AllocationList allocList_; AllocationList allocList_;
public: public:
explicit MmapAllocator(const std::string& name = "xbyak") : name_(name) {} explicit MmapAllocator(const std::string& name = "xbyak") : name_(name) {}
uint8_t *alloc(size_t size) uint8_t *alloc(size_t size) XBYAK_OVERRIDE
{ {
const size_t alignedSizeM1 = inner::getPageSize() - 1; const size_t alignedSizeM1 = inner::getPageSize() - 1;
size = (size + alignedSizeM1) & ~alignedSizeM1; size = (size + alignedSizeM1) & ~alignedSizeM1;
@ -526,7 +531,7 @@ public:
#endif #endif
return (uint8_t*)p; return (uint8_t*)p;
} }
void free(uint8_t *p) void free(uint8_t *p) XBYAK_OVERRIDE
{ {
if (p == 0) return; if (p == 0) return;
AllocationList::iterator i = allocList_.find((uintptr_t)p); AllocationList::iterator i = allocList_.find((uintptr_t)p);
@ -903,30 +908,6 @@ struct Reg64 : public Reg32e {
explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {} explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {}
}; };
struct RegRip { struct RegRip {
int64_t disp_;
const Label* label_;
bool isAddr_;
explicit XBYAK_CONSTEXPR RegRip(int64_t disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
friend const RegRip operator+(const RegRip& r, int disp) {
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
}
friend const RegRip operator-(const RegRip& r, int disp) {
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
}
friend const RegRip operator+(const RegRip& r, int64_t disp) {
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
}
friend const RegRip operator-(const RegRip& r, int64_t disp) {
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
}
friend const RegRip operator+(const RegRip& r, const Label& label) {
if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
return RegRip(r.disp_, &label);
}
friend const RegRip operator+(const RegRip& r, const void *addr) {
if (r.label_ || r.isAddr_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegRip());
return RegRip(r.disp_ + (int64_t)addr, 0, true);
}
}; };
#endif #endif
@ -987,17 +968,30 @@ public:
}; };
#endif #endif
/*
pattern
[base]? [+index[*scale]]? [+/-disp]* [+label]?
rip [+/-disp]* [+label]?
rip+disp if backward reference then use label.getAddress()
rip+label if forward reference
[&var]?[+/-disp]*
*/
class RegExp { class RegExp {
friend class Address;
public: public:
#ifdef XBYAK64 #ifdef XBYAK64
enum { i32e = 32 | 64 }; enum { i32e = 32 | 64 };
#else #else
enum { i32e = 32 }; enum { i32e = 32 };
#endif #endif
XBYAK_CONSTEXPR RegExp(size_t disp = 0) : scale_(0), disp_(disp) { } XBYAK_CONSTEXPR RegExp() : scale_(0), disp_(0), label_(0), rip_(false), setLabel_(false) { }
XBYAK_CONSTEXPR RegExp(size_t disp) : scale_(0), disp_(disp), label_(0), rip_(false), setLabel_(false) { }
XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1) XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1)
: scale_(scale) : scale_(scale)
, disp_(0) , disp_(0)
, label_(0)
, rip_(false)
, setLabel_(false)
{ {
if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
if (scale == 0) return; if (scale == 0) return;
@ -1008,6 +1002,26 @@ public:
base_ = r; base_ = r;
} }
} }
RegExp(Label& label);
RegExp(const void *addr)
: scale_(1)
, disp_(size_t(addr))
, label_(0)
, rip_(false)
, setLabel_(true)
{
}
#ifdef XBYAK64
RegExp(const RegRip& /*rip*/)
: scale_(0)
, disp_(0)
, label_(0)
, rip_(true)
, setLabel_(false)
{
}
#endif
bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); } bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); }
RegExp optimize() const RegExp optimize() const
{ {
@ -1025,6 +1039,8 @@ public:
} }
const Reg& getBase() const { return base_; } const Reg& getBase() const { return base_; }
const Reg& getIndex() const { return index_; } const Reg& getIndex() const { return index_; }
const Label *getLabel() const { return label_; }
bool isOnlyDisp() const { return !base_.getBit() && !index_.getBit(); } // for mov eax
int getScale() const { return scale_; } int getScale() const { return scale_; }
size_t getDisp() const { return disp_; } size_t getDisp() const { return disp_; }
XBYAK_CONSTEXPR void verify() const XBYAK_CONSTEXPR void verify() const
@ -1045,13 +1061,22 @@ private:
Reg base_; Reg base_;
Reg index_; Reg index_;
int scale_; int scale_;
size_t disp_; size_t disp_; // absolute address
Label *label_;
bool rip_;
bool setLabel_; // disp_ contains the address of label
}; };
inline RegExp operator+(const RegExp& a, const RegExp& b) inline RegExp operator+(const RegExp& a, const RegExp& b)
{ {
if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
if (a.label_ && b.label_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
if (b.rip_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
if (a.rip_ && !b.isOnlyDisp()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
if (a.setLabel_ && b.setLabel_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp())
RegExp ret = a; RegExp ret = a;
if (ret.label_ == 0) ret.label_ = b.label_;
if (ret.setLabel_ == 0) ret.setLabel_ = b.setLabel_;
if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; } if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; }
if (b.base_.getBit()) { if (b.base_.getBit()) {
if (ret.base_.getBit()) { if (ret.base_.getBit()) {
@ -1076,6 +1101,9 @@ inline RegExp operator*(int scale, const Reg& r)
{ {
return r * scale; return r * scale;
} }
// backward compatibility for eax+0
inline RegExp operator+(const RegExp& a, size_t b) { return a + RegExp(b); }
inline RegExp operator-(const RegExp& e, size_t disp) inline RegExp operator-(const RegExp& e, size_t disp)
{ {
RegExp ret = e; RegExp ret = e;
@ -1323,33 +1351,34 @@ public:
class Address : public Operand { class Address : public Operand {
public: public:
enum Mode {
M_ModRM,
M_64bitDisp,
M_rip,
M_ripAddr
};
XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e) XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e)
: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true) : Operand(0, MEM, sizeBit), e_(e), label_(e.label_), mode_(), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true)
{ {
if (e.rip_) {
mode_ = (e.label_ || e.setLabel_) ? inner::M_ripAddr : inner::M_rip;
} else {
#ifdef XBYAK64
uint64_t disp = e.getDisp();
if (e.isOnlyDisp() && ((0x80000000 <= disp && disp <= 0xffffffff80000000) || e.getLabel())) {
mode_ = inner::M_64bitDisp;
} else
#endif
{
mode_ = inner::M_ModRM;
}
}
e_.verify(); e_.verify();
} }
#ifdef XBYAK64
explicit XBYAK_CONSTEXPR Address(size_t disp)
: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), immSize(0), disp8N(0), permitVsib(false), broadcast_(false), optimize_(true) { }
XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegRip& addr)
: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true) { }
#endif
RegExp getRegExp() const RegExp getRegExp() const
{ {
return optimize_ ? e_.optimize() : e_; return optimize_ ? e_.optimize() : e_;
} }
Address cloneNoOptimize() const { Address addr = *this; addr.optimize_ = false; return addr; } Address cloneNoOptimize() const { Address addr = *this; addr.optimize_ = false; return addr; }
Mode getMode() const { return mode_; } inner::AddressMode getMode() const { return mode_; }
bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; } bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax bool isOnlyDisp() const { return e_.isOnlyDisp(); }
size_t getDisp() const { return e_.getDisp(); } size_t getDisp() const { return e_.getDisp(); }
bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset bool is64bitDisp() const { return mode_ == inner::M_64bitDisp; } // for moffset
bool isBroadcast() const { return broadcast_; } bool isBroadcast() const { return broadcast_; }
bool hasRex2() const { return e_.getBase().hasRex2() || e_.getIndex().hasRex2(); } bool hasRex2() const { return e_.getBase().hasRex2() || e_.getIndex().hasRex2(); }
const Label* getLabel() const { return label_; } const Label* getLabel() const { return label_; }
@ -1362,7 +1391,7 @@ public:
private: private:
RegExp e_; RegExp e_;
const Label* label_; const Label* label_;
Mode mode_; inner::AddressMode mode_;
public: public:
int immSize; // the size of immediate value of nmemonics (0, 1, 2, 4) int immSize; // the size of immediate value of nmemonics (0, 1, 2, 4)
int disp8N; // 0(normal), 1(force disp32), disp8N = {2, 4, 8} int disp8N; // 0(normal), 1(force disp32), disp8N = {2, 4, 8}
@ -1406,21 +1435,13 @@ public:
{ {
return Address(bit_, broadcast_, e); return Address(bit_, broadcast_, e);
} }
Address operator[](const void *disp) const
{
return Address(bit_, broadcast_, RegExp(reinterpret_cast<size_t>(disp)));
}
#ifdef XBYAK64
Address operator[](uint64_t disp) const { return Address(disp); }
Address operator[](const RegRip& addr) const { return Address(bit_, broadcast_, addr); }
#endif
}; };
struct JmpLabel { struct JmpLabel {
size_t endOfJmp; /* offset from top to the end address of jmp */ size_t endOfJmp; /* offset from top to the end address of jmp */
int jmpSize; int jmpSize;
inner::LabelMode mode; inner::LabelMode mode;
size_t disp; // disp for [rip + disp] size_t disp; // disp for [rip + disp] or [forward ref label + disp]
explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0) explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0)
: endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp) : endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp)
{ {
@ -1440,6 +1461,7 @@ public:
~Label(); ~Label();
void clear() { mgr = 0; id = 0; } void clear() { mgr = 0; id = 0; }
int getId() const { return id; } int getId() const { return id; }
bool isDefined() const;
const uint8_t *getAddress() const; const uint8_t *getAddress() const;
// backward compatibility // backward compatibility
@ -1456,6 +1478,22 @@ public:
} }
}; };
inline RegExp::RegExp(Label& label)
: scale_(1)
, disp_(0)
, label_(0)
, rip_(false)
, setLabel_(true)
{
const uint8_t *addr = label.getAddress();
if (addr) {
disp_ = size_t(addr);
label_ = 0;
} else {
label_ = &label;
}
}
class LabelManager { class LabelManager {
// for string label // for string label
struct SlabelVal { struct SlabelVal {
@ -1517,6 +1555,9 @@ class LabelManager {
#endif #endif
if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR)
} }
if (jmp->mode != inner::LasIs) {
disp += jmp->disp;
}
if (base_->isAutoGrow()) { if (base_->isAutoGrow()) {
base_->save(offset, disp, jmp->jmpSize, jmp->mode); base_->save(offset, disp, jmp->jmpSize, jmp->mode);
} else { } else {
@ -1673,8 +1714,13 @@ public:
bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); } bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); }
const uint8_t *getCode() const { return base_->getCode(); } const uint8_t *getCode() const { return base_->getCode(); }
bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); } bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); }
bool isDefined(const Label& label) const { return clabelDefList_.find(label.id) != clabelDefList_.end(); }
}; };
inline bool Label::isDefined() const
{
return mgr && mgr->isDefined(*this);
}
inline Label::Label(const Label& rhs) inline Label::Label(const Label& rhs)
{ {
id = rhs.id; id = rhs.id;
@ -2010,8 +2056,11 @@ private:
{ {
db(static_cast<uint8_t>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7))); db(static_cast<uint8_t>((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)));
} }
void setSIB(const RegExp& e, int reg, int disp8N = 0) void setSIB(const Address& addr, int reg)
{ {
const RegExp& e = addr.getRegExp();
const Label *label = e.getLabel();
int disp8N = addr.disp8N;
uint64_t disp64 = e.getDisp(); uint64_t disp64 = e.getDisp();
#if defined(XBYAK64) && !defined(__ILP32__) #if defined(XBYAK64) && !defined(__ILP32__)
#ifdef XBYAK_OLD_DISP_CHECK #ifdef XBYAK_OLD_DISP_CHECK
@ -2034,8 +2083,10 @@ private:
mod00 = 0, mod01 = 1, mod10 = 2 mod00 = 0, mod01 = 1, mod10 = 2
}; };
int mod = mod10; // disp32 int mod = mod10; // disp32
if (!baseBit || ((baseIdx & 7) != Operand::EBP && disp == 0)) { if (!baseBit || ((baseIdx & 7) != Operand::EBP && (label == 0 && disp == 0))) {
mod = mod00; mod = mod00;
} else if (label) {
// always disp32
} else { } else {
if (disp8N == 0) { if (disp8N == 0) {
if (inner::IsInDisp8(disp)) { if (inner::IsInDisp8(disp)) {
@ -2069,9 +2120,13 @@ private:
if (mod == mod01) { if (mod == mod01) {
db(disp); db(disp);
} else if (mod == mod10 || (mod == mod00 && !baseBit)) { } else if (mod == mod10 || (mod == mod00 && !baseBit)) {
if (label) {
putL_inner(*label, false, e.getDisp() - addr.immSize, 4);
} else {
dd(disp); dd(disp);
} }
} }
}
LabelManager labelMgr_; LabelManager labelMgr_;
void writeCode(uint64_t type, const Reg& r, int code, bool rex2 = false) void writeCode(uint64_t type, const Reg& r, int code, bool rex2 = false)
{ {
@ -2119,7 +2174,7 @@ private:
// for only MPX(bnd*) // for only MPX(bnd*)
void opMIB(const Address& addr, const Reg& reg, uint64_t type, int code) void opMIB(const Address& addr, const Reg& reg, uint64_t type, int code)
{ {
if (addr.getMode() != Address::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS) if (addr.getMode() != inner::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS)
opMR(addr.cloneNoOptimize(), reg, type, code); opMR(addr.cloneNoOptimize(), reg, type, code);
} }
void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
@ -2188,15 +2243,15 @@ private:
void opAddr(const Address &addr, int reg) void opAddr(const Address &addr, int reg)
{ {
if (!addr.permitVsib && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) if (!addr.permitVsib && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
if (addr.getMode() == Address::M_ModRM) { if (addr.getMode() == inner::M_ModRM) {
setSIB(addr.getRegExp(), reg, addr.disp8N); setSIB(addr, reg);
} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) { } else if (addr.getMode() == inner::M_rip || addr.getMode() == inner::M_ripAddr) {
setModRM(0, reg, 5); setModRM(0, reg, 5);
if (addr.getLabel()) { // [rip + Label] if (addr.getLabel()) { // [rip + Label]
putL_inner(*addr.getLabel(), true, addr.getDisp() - addr.immSize); putL_inner(*addr.getLabel(), true, addr.getDisp() - addr.immSize, 4);
} else { } else {
size_t disp = addr.getDisp(); size_t disp = addr.getDisp();
if (addr.getMode() == Address::M_ripAddr) { if (addr.getMode() == inner::M_ripAddr) {
if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW) if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW)
disp -= (size_t)getCurr() + 4 + addr.immSize; disp -= (size_t)getCurr() + 4 + addr.immSize;
} }
@ -2448,9 +2503,9 @@ private:
return bit / 8; return bit / 8;
} }
template<class T> template<class T>
void putL_inner(T& label, bool relative = false, size_t disp = 0) void putL_inner(T& label, bool relative = false, size_t disp = 0, int jmpSize = (int)sizeof(size_t))
{ {
const int jmpSize = relative ? 4 : (int)sizeof(size_t); if (relative) jmpSize = 4;
if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory();
size_t offset = 0; size_t offset = 0;
if (labelMgr_.getOffset(&offset, label)) { if (labelMgr_.getOffset(&offset, label)) {
@ -3028,7 +3083,11 @@ public:
if (code) { if (code) {
rex(*reg); rex(*reg);
db(op1.isREG(8) ? 0xA0 : op1.isREG() ? 0xA1 : op2.isREG(8) ? 0xA2 : 0xA3); db(op1.isREG(8) ? 0xA0 : op1.isREG() ? 0xA1 : op2.isREG(8) ? 0xA2 : 0xA3);
if (addr->getLabel()) {
putL_inner(*addr->getLabel(), false, addr->getDisp() - addr->immSize, 8);
} else {
db(addr->getDisp(), 8); db(addr->getDisp(), 8);
}
} else { } else {
XBYAK_THROW(ERR_BAD_COMBINATION) XBYAK_THROW(ERR_BAD_COMBINATION)
} }
@ -3037,7 +3096,11 @@ public:
if (code && addr->isOnlyDisp()) { if (code && addr->isOnlyDisp()) {
rex(*reg, *addr); rex(*reg, *addr);
db(code | (reg->isBit(8) ? 0 : 1)); db(code | (reg->isBit(8) ? 0 : 1));
if (addr->getLabel()) {
putL_inner(*addr->getLabel(), false, addr->getDisp() - addr->immSize);
} else {
dd(static_cast<uint32_t>(addr->getDisp())); dd(static_cast<uint32_t>(addr->getDisp()));
}
} else } else
#endif #endif
{ {

View File

@ -1,4 +1,4 @@
const char *getVersionString() const { return "7.27"; } const char *getVersionString() const { return "7.30"; }
void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); } void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); } void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); } void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
@ -1878,6 +1878,7 @@ void cmpxchg16b(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xC7); }
void fxrstor64(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xAE); } void fxrstor64(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xAE); }
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x7E); } void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x7E); }
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x6E); } void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x6E); }
void movrs(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, reg.isBit(8) ? 0x8A : 0x8B); }
void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, T_ALLOW_DIFF_SIZE, 0x63); } void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, T_ALLOW_DIFF_SIZE, 0x63); }
void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); } void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }
void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); } void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }
@ -2684,6 +2685,8 @@ void vucomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F3
void vucomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_W0|T_SAE_X|T_MUST_EVEX, 0x2E); } void vucomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_W0|T_SAE_X|T_MUST_EVEX, 0x2E); }
#ifdef XBYAK64 #ifdef XBYAK64
void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); } void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }
void tcvtrowd2ps(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F3|T_0F38|T_W0|T_MUST_EVEX, 0x4A); }
void tcvtrowd2ps(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F3|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); }
void tcvtrowps2bf16h(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F2|T_0F38|T_W0|T_MUST_EVEX, 0x6D); } void tcvtrowps2bf16h(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F2|T_0F38|T_W0|T_MUST_EVEX, 0x6D); }
void tcvtrowps2bf16h(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F2|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); } void tcvtrowps2bf16h(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F2|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); }
void tcvtrowps2bf16l(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F3|T_0F38|T_W0|T_MUST_EVEX, 0x6D); } void tcvtrowps2bf16l(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F3|T_0F38|T_W0|T_MUST_EVEX, 0x6D); }
@ -2694,6 +2697,10 @@ void tcvtrowps2phl(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t,
void tcvtrowps2phl(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F2|T_0F3A|T_W0|T_MUST_EVEX, 0x77, imm); } void tcvtrowps2phl(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F2|T_0F3A|T_W0|T_MUST_EVEX, 0x77, imm); }
void tilemovrow(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_66|T_0F38|T_W0|T_MUST_EVEX, 0x4A); } void tilemovrow(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_66|T_0F38|T_W0|T_MUST_EVEX, 0x4A); }
void tilemovrow(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_66|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); } void tilemovrow(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_66|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); }
void vmovrsb(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2|T_MAP5|T_W0|T_MUST_EVEX, 0x6F); }
void vmovrsd(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_MAP5|T_W0|T_MUST_EVEX, 0x6F); }
void vmovrsq(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_MAP5|T_EW1|T_MUST_EVEX, 0x6F); }
void vmovrsw(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2|T_MAP5|T_EW1|T_MUST_EVEX, 0x6F); }
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); } void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); }
#endif #endif
#endif #endif

View File

@ -114,6 +114,10 @@ inline T min_(T x, T y) { return x < y ? x : y; }
CPU detection class CPU detection class
@note static inline const member is supported by c++17 or later, so use template hack @note static inline const member is supported by c++17 or later, so use template hack
*/ */
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4459)
#endif
class Cpu { class Cpu {
public: public:
class Type { class Type {
@ -154,10 +158,10 @@ private:
{ {
return (1U << n) - 1; return (1U << n) - 1;
} }
// [EBX:ECX:EDX] == s? // [ebx:ecx:edx] == s?
bool isEqualStr(uint32_t EBX, uint32_t ECX, uint32_t EDX, const char s[12]) const bool isEqualStr(uint32_t ebx, uint32_t ecx, uint32_t edx, const char s[12]) const
{ {
return get32bitAsBE(&s[0]) == EBX && get32bitAsBE(&s[4]) == EDX && get32bitAsBE(&s[8]) == ECX; return get32bitAsBE(&s[0]) == ebx && get32bitAsBE(&s[4]) == edx && get32bitAsBE(&s[8]) == ecx;
} }
uint32_t extractBit(uint32_t val, uint32_t base, uint32_t end) const uint32_t extractBit(uint32_t val, uint32_t base, uint32_t end) const
{ {
@ -567,172 +571,172 @@ public:
, avx10version_(0) , avx10version_(0)
{ {
uint32_t data[4] = {}; uint32_t data[4] = {};
const uint32_t& EAX = data[0]; const uint32_t& eax = data[0];
const uint32_t& EBX = data[1]; const uint32_t& ebx = data[1];
const uint32_t& ECX = data[2]; const uint32_t& ecx = data[2];
const uint32_t& EDX = data[3]; const uint32_t& edx = data[3];
getCpuid(0, data); getCpuid(0, data);
const uint32_t maxNum = EAX; const uint32_t maxNum = eax;
if (isEqualStr(EBX, ECX, EDX, "AuthenticAMD")) { if (isEqualStr(ebx, ecx, edx, "AuthenticAMD")) {
type_ |= tAMD; type_ |= tAMD;
getCpuid(0x80000001, data); getCpuid(0x80000001, data);
if (EDX & (1U << 31)) { if (edx & (1U << 31)) {
type_ |= t3DN; type_ |= t3DN;
// 3DNow! implies support for PREFETCHW on AMD // 3DNow! implies support for PREFETCHW on AMD
type_ |= tPREFETCHW; type_ |= tPREFETCHW;
} }
if (EDX & (1U << 29)) { if (edx & (1U << 29)) {
// Long mode implies support for PREFETCHW on AMD // Long mode implies support for PREFETCHW on AMD
type_ |= tPREFETCHW; type_ |= tPREFETCHW;
} }
} else if (isEqualStr(EBX, ECX, EDX, "GenuineIntel")) { } else if (isEqualStr(ebx, ecx, edx, "GenuineIntel")) {
type_ |= tINTEL; type_ |= tINTEL;
} }
// Extended flags information // Extended flags information
getCpuid(0x80000000, data); getCpuid(0x80000000, data);
const uint32_t maxExtendedNum = EAX; const uint32_t maxExtendedNum = eax;
if (maxExtendedNum >= 0x80000001) { if (maxExtendedNum >= 0x80000001) {
getCpuid(0x80000001, data); getCpuid(0x80000001, data);
if (ECX & (1U << 5)) type_ |= tLZCNT; if (ecx & (1U << 5)) type_ |= tLZCNT;
if (ECX & (1U << 6)) type_ |= tSSE4a; if (ecx & (1U << 6)) type_ |= tSSE4a;
if (ECX & (1U << 8)) type_ |= tPREFETCHW; if (ecx & (1U << 8)) type_ |= tPREFETCHW;
if (EDX & (1U << 15)) type_ |= tCMOV; if (edx & (1U << 15)) type_ |= tCMOV;
if (EDX & (1U << 22)) type_ |= tMMX2; if (edx & (1U << 22)) type_ |= tMMX2;
if (EDX & (1U << 27)) type_ |= tRDTSCP; if (edx & (1U << 27)) type_ |= tRDTSCP;
if (EDX & (1U << 30)) type_ |= tE3DN; if (edx & (1U << 30)) type_ |= tE3DN;
if (EDX & (1U << 31)) type_ |= t3DN; if (edx & (1U << 31)) type_ |= t3DN;
} }
if (maxExtendedNum >= 0x80000008) { if (maxExtendedNum >= 0x80000008) {
getCpuid(0x80000008, data); getCpuid(0x80000008, data);
if (EBX & (1U << 0)) type_ |= tCLZERO; if (ebx & (1U << 0)) type_ |= tCLZERO;
} }
getCpuid(1, data); getCpuid(1, data);
if (ECX & (1U << 0)) type_ |= tSSE3; if (ecx & (1U << 0)) type_ |= tSSE3;
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ; if (ecx & (1U << 1)) type_ |= tPCLMULQDQ;
if (ECX & (1U << 9)) type_ |= tSSSE3; if (ecx & (1U << 9)) type_ |= tSSSE3;
if (ECX & (1U << 19)) type_ |= tSSE41; if (ecx & (1U << 19)) type_ |= tSSE41;
if (ECX & (1U << 20)) type_ |= tSSE42; if (ecx & (1U << 20)) type_ |= tSSE42;
if (ECX & (1U << 22)) type_ |= tMOVBE; if (ecx & (1U << 22)) type_ |= tMOVBE;
if (ECX & (1U << 23)) type_ |= tPOPCNT; if (ecx & (1U << 23)) type_ |= tPOPCNT;
if (ECX & (1U << 25)) type_ |= tAESNI; if (ecx & (1U << 25)) type_ |= tAESNI;
if (ECX & (1U << 26)) type_ |= tXSAVE; if (ecx & (1U << 26)) type_ |= tXSAVE;
if (ECX & (1U << 27)) type_ |= tOSXSAVE; if (ecx & (1U << 27)) type_ |= tOSXSAVE;
if (ECX & (1U << 29)) type_ |= tF16C; if (ecx & (1U << 29)) type_ |= tF16C;
if (ECX & (1U << 30)) type_ |= tRDRAND; if (ecx & (1U << 30)) type_ |= tRDRAND;
if (EDX & (1U << 15)) type_ |= tCMOV; if (edx & (1U << 15)) type_ |= tCMOV;
if (EDX & (1U << 23)) type_ |= tMMX; if (edx & (1U << 23)) type_ |= tMMX;
if (EDX & (1U << 25)) type_ |= tMMX2 | tSSE; if (edx & (1U << 25)) type_ |= tMMX2 | tSSE;
if (EDX & (1U << 26)) type_ |= tSSE2; if (edx & (1U << 26)) type_ |= tSSE2;
if (type_ & tOSXSAVE) { if (type_ & tOSXSAVE) {
// check XFEATURE_ENABLED_MASK[2:1] = '11b' // check XFEATURE_ENABLED_MASK[2:1] = '11b'
uint64_t bv = getXfeature(); uint64_t bv = getXfeature();
if ((bv & 6) == 6) { if ((bv & 6) == 6) {
if (ECX & (1U << 12)) type_ |= tFMA; if (ecx & (1U << 12)) type_ |= tFMA;
if (ECX & (1U << 28)) type_ |= tAVX; if (ecx & (1U << 28)) type_ |= tAVX;
// do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support // do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support
#if !defined(__APPLE__) #if !defined(__APPLE__)
if (((bv >> 5) & 7) == 7) if (((bv >> 5) & 7) == 7)
#endif #endif
{ {
getCpuidEx(7, 0, data); getCpuidEx(7, 0, data);
if (EBX & (1U << 16)) type_ |= tAVX512F; if (ebx & (1U << 16)) type_ |= tAVX512F;
if (type_ & tAVX512F) { if (type_ & tAVX512F) {
if (EBX & (1U << 17)) type_ |= tAVX512DQ; if (ebx & (1U << 17)) type_ |= tAVX512DQ;
if (EBX & (1U << 21)) type_ |= tAVX512_IFMA; if (ebx & (1U << 21)) type_ |= tAVX512_IFMA;
if (EBX & (1U << 26)) type_ |= tAVX512PF; if (ebx & (1U << 26)) type_ |= tAVX512PF;
if (EBX & (1U << 27)) type_ |= tAVX512ER; if (ebx & (1U << 27)) type_ |= tAVX512ER;
if (EBX & (1U << 28)) type_ |= tAVX512CD; if (ebx & (1U << 28)) type_ |= tAVX512CD;
if (EBX & (1U << 30)) type_ |= tAVX512BW; if (ebx & (1U << 30)) type_ |= tAVX512BW;
if (EBX & (1U << 31)) type_ |= tAVX512VL; if (ebx & (1U << 31)) type_ |= tAVX512VL;
if (ECX & (1U << 1)) type_ |= tAVX512_VBMI; if (ecx & (1U << 1)) type_ |= tAVX512_VBMI;
if (ECX & (1U << 6)) type_ |= tAVX512_VBMI2; if (ecx & (1U << 6)) type_ |= tAVX512_VBMI2;
if (ECX & (1U << 11)) type_ |= tAVX512_VNNI; if (ecx & (1U << 11)) type_ |= tAVX512_VNNI;
if (ECX & (1U << 12)) type_ |= tAVX512_BITALG; if (ecx & (1U << 12)) type_ |= tAVX512_BITALG;
if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ; if (ecx & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW; if (edx & (1U << 2)) type_ |= tAVX512_4VNNIW;
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS; if (edx & (1U << 3)) type_ |= tAVX512_4FMAPS;
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT; if (edx & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
if ((type_ & tAVX512BW) && (EDX & (1U << 23))) type_ |= tAVX512_FP16; if ((type_ & tAVX512BW) && (edx & (1U << 23))) type_ |= tAVX512_FP16;
} }
} }
} }
} }
if (maxNum >= 7) { if (maxNum >= 7) {
getCpuidEx(7, 0, data); getCpuidEx(7, 0, data);
const uint32_t maxNumSubLeaves = EAX; const uint32_t maxNumSubLeaves = eax;
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2; if (type_ & tAVX && (ebx & (1U << 5))) type_ |= tAVX2;
if (EBX & (1U << 3)) type_ |= tBMI1; if (ebx & (1U << 3)) type_ |= tBMI1;
if (EBX & (1U << 4)) type_ |= tHLE; if (ebx & (1U << 4)) type_ |= tHLE;
if (EBX & (1U << 8)) type_ |= tBMI2; if (ebx & (1U << 8)) type_ |= tBMI2;
if (EBX & (1U << 9)) type_ |= tENHANCED_REP; if (ebx & (1U << 9)) type_ |= tENHANCED_REP;
if (EBX & (1U << 11)) type_ |= tRTM; if (ebx & (1U << 11)) type_ |= tRTM;
if (EBX & (1U << 14)) type_ |= tMPX; if (ebx & (1U << 14)) type_ |= tMPX;
if (EBX & (1U << 18)) type_ |= tRDSEED; if (ebx & (1U << 18)) type_ |= tRDSEED;
if (EBX & (1U << 19)) type_ |= tADX; if (ebx & (1U << 19)) type_ |= tADX;
if (EBX & (1U << 20)) type_ |= tSMAP; if (ebx & (1U << 20)) type_ |= tSMAP;
if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT; if (ebx & (1U << 23)) type_ |= tCLFLUSHOPT;
if (EBX & (1U << 24)) type_ |= tCLWB; if (ebx & (1U << 24)) type_ |= tCLWB;
if (EBX & (1U << 29)) type_ |= tSHA; if (ebx & (1U << 29)) type_ |= tSHA;
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1; if (ecx & (1U << 0)) type_ |= tPREFETCHWT1;
if (ECX & (1U << 5)) type_ |= tWAITPKG; if (ecx & (1U << 5)) type_ |= tWAITPKG;
if (ECX & (1U << 8)) type_ |= tGFNI; if (ecx & (1U << 8)) type_ |= tGFNI;
if (ECX & (1U << 9)) type_ |= tVAES; if (ecx & (1U << 9)) type_ |= tVAES;
if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ; if (ecx & (1U << 10)) type_ |= tVPCLMULQDQ;
if (ECX & (1U << 23)) type_ |= tKEYLOCKER; if (ecx & (1U << 23)) type_ |= tKEYLOCKER;
if (ECX & (1U << 25)) type_ |= tCLDEMOTE; if (ecx & (1U << 25)) type_ |= tCLDEMOTE;
if (ECX & (1U << 27)) type_ |= tMOVDIRI; if (ecx & (1U << 27)) type_ |= tMOVDIRI;
if (ECX & (1U << 28)) type_ |= tMOVDIR64B; if (ecx & (1U << 28)) type_ |= tMOVDIR64B;
if (EDX & (1U << 5)) type_ |= tUINTR; if (edx & (1U << 5)) type_ |= tUINTR;
if (EDX & (1U << 14)) type_ |= tSERIALIZE; if (edx & (1U << 14)) type_ |= tSERIALIZE;
if (EDX & (1U << 16)) type_ |= tTSXLDTRK; if (edx & (1U << 16)) type_ |= tTSXLDTRK;
if (EDX & (1U << 22)) type_ |= tAMX_BF16; if (edx & (1U << 22)) type_ |= tAMX_BF16;
if (EDX & (1U << 24)) type_ |= tAMX_TILE; if (edx & (1U << 24)) type_ |= tAMX_TILE;
if (EDX & (1U << 25)) type_ |= tAMX_INT8; if (edx & (1U << 25)) type_ |= tAMX_INT8;
if (maxNumSubLeaves >= 1) { if (maxNumSubLeaves >= 1) {
getCpuidEx(7, 1, data); getCpuidEx(7, 1, data);
if (EAX & (1U << 0)) type_ |= tSHA512; if (eax & (1U << 0)) type_ |= tSHA512;
if (EAX & (1U << 1)) type_ |= tSM3; if (eax & (1U << 1)) type_ |= tSM3;
if (EAX & (1U << 2)) type_ |= tSM4; if (eax & (1U << 2)) type_ |= tSM4;
if (EAX & (1U << 3)) type_ |= tRAO_INT; if (eax & (1U << 3)) type_ |= tRAO_INT;
if (EAX & (1U << 4)) type_ |= tAVX_VNNI; if (eax & (1U << 4)) type_ |= tAVX_VNNI;
if (type_ & tAVX512F) { if (type_ & tAVX512F) {
if (EAX & (1U << 5)) type_ |= tAVX512_BF16; if (eax & (1U << 5)) type_ |= tAVX512_BF16;
} }
if (EAX & (1U << 7)) type_ |= tCMPCCXADD; if (eax & (1U << 7)) type_ |= tCMPCCXADD;
if (EAX & (1U << 21)) type_ |= tAMX_FP16; if (eax & (1U << 21)) type_ |= tAMX_FP16;
if (EAX & (1U << 23)) type_ |= tAVX_IFMA; if (eax & (1U << 23)) type_ |= tAVX_IFMA;
if (EAX & (1U << 31)) type_ |= tMOVRS; if (eax & (1U << 31)) type_ |= tMOVRS;
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8; if (edx & (1U << 4)) type_ |= tAVX_VNNI_INT8;
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT; if (edx & (1U << 5)) type_ |= tAVX_NE_CONVERT;
if (EDX & (1U << 10)) type_ |= tAVX_VNNI_INT16; if (edx & (1U << 10)) type_ |= tAVX_VNNI_INT16;
if (EDX & (1U << 14)) type_ |= tPREFETCHITI; if (edx & (1U << 14)) type_ |= tPREFETCHITI;
if (EDX & (1U << 19)) type_ |= tAVX10; if (edx & (1U << 19)) type_ |= tAVX10;
if (EDX & (1U << 21)) type_ |= tAPX_F; if (edx & (1U << 21)) type_ |= tAPX_F;
getCpuidEx(0x1e, 1, data); getCpuidEx(0x1e, 1, data);
if (EAX & (1U << 4)) type_ |= tAMX_FP8; if (eax & (1U << 4)) type_ |= tAMX_FP8;
if (EAX & (1U << 5)) type_ |= tAMX_TRANSPOSE; if (eax & (1U << 5)) type_ |= tAMX_TRANSPOSE;
if (EAX & (1U << 6)) type_ |= tAMX_TF32; if (eax & (1U << 6)) type_ |= tAMX_TF32;
if (EAX & (1U << 7)) type_ |= tAMX_AVX512; if (eax & (1U << 7)) type_ |= tAMX_AVX512;
if (EAX & (1U << 8)) type_ |= tAMX_MOVRS; if (eax & (1U << 8)) type_ |= tAMX_MOVRS;
} }
} }
if (maxNum >= 0x19) { if (maxNum >= 0x19) {
getCpuidEx(0x19, 0, data); getCpuidEx(0x19, 0, data);
if (EBX & (1U << 0)) type_ |= tAESKLE; if (ebx & (1U << 0)) type_ |= tAESKLE;
if (EBX & (1U << 2)) type_ |= tWIDE_KL; if (ebx & (1U << 2)) type_ |= tWIDE_KL;
if (type_ & (tKEYLOCKER|tAESKLE|tWIDE_KL)) type_ |= tKEYLOCKER_WIDE; if (type_ & (tKEYLOCKER|tAESKLE|tWIDE_KL)) type_ |= tKEYLOCKER_WIDE;
} }
if (has(tAVX10) && maxNum >= 0x24) { if (has(tAVX10) && maxNum >= 0x24) {
getCpuidEx(0x24, 0, data); getCpuidEx(0x24, 0, data);
avx10version_ = EBX & mask(7); avx10version_ = ebx & mask(7);
} }
setFamily(); setFamily();
setNumCores(); setNumCores();
@ -752,6 +756,9 @@ public:
} }
int getAVX10version() const { return avx10version_; } int getAVX10version() const { return avx10version_; }
}; };
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#ifndef XBYAK_ONLY_CLASS_CPU #ifndef XBYAK_ONLY_CLASS_CPU
class Clock { class Clock {