First Commit
This commit is contained in:
172
common/emitter/avx.cpp
Normal file
172
common/emitter/avx.cpp
Normal file
@@ -0,0 +1,172 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#include "common/emitter/internal.h"
|
||||
|
||||
// warning: suggest braces around initialization of subobject [-Wmissing-braces]
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
#endif
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
const xImplAVX_Move xVMOVAPS = {0x00, 0x28, 0x29};
|
||||
const xImplAVX_Move xVMOVUPS = {0x00, 0x10, 0x11};
|
||||
|
||||
const xImplAVX_ArithFloat xVADD = {
|
||||
{0x00, 0x58}, // VADDPS
|
||||
{0x66, 0x58}, // VADDPD
|
||||
{0xF3, 0x58}, // VADDSS
|
||||
{0xF2, 0x58}, // VADDSD
|
||||
};
|
||||
const xImplAVX_ArithFloat xVSUB = {
|
||||
{0x00, 0x5C}, // VSUBPS
|
||||
{0x66, 0x5C}, // VSUBPD
|
||||
{0xF3, 0x5C}, // VSUBSS
|
||||
{0xF2, 0x5C}, // VSUBSD
|
||||
};
|
||||
const xImplAVX_ArithFloat xVMUL = {
|
||||
{0x00, 0x59}, // VMULPS
|
||||
{0x66, 0x59}, // VMULPD
|
||||
{0xF3, 0x59}, // VMULSS
|
||||
{0xF2, 0x59}, // VMULSD
|
||||
};
|
||||
const xImplAVX_ArithFloat xVDIV = {
|
||||
{0x00, 0x5E}, // VDIVPS
|
||||
{0x66, 0x5E}, // VDIVPD
|
||||
{0xF3, 0x5E}, // VDIVSS
|
||||
{0xF2, 0x5E}, // VDIVSD
|
||||
};
|
||||
const xImplAVX_CmpFloat xVCMP = {
|
||||
{SSE2_Equal},
|
||||
{SSE2_Less},
|
||||
{SSE2_LessOrEqual},
|
||||
{SSE2_Unordered},
|
||||
{SSE2_NotEqual},
|
||||
{SSE2_NotLess},
|
||||
{SSE2_NotLessOrEqual},
|
||||
{SSE2_Ordered},
|
||||
};
|
||||
const xImplAVX_ThreeArgYMM xVPAND = {0x66, 0xDB};
|
||||
const xImplAVX_ThreeArgYMM xVPANDN = {0x66, 0xDF};
|
||||
const xImplAVX_ThreeArgYMM xVPOR = {0x66, 0xEB};
|
||||
const xImplAVX_ThreeArgYMM xVPXOR = {0x66, 0xEF};
|
||||
const xImplAVX_CmpInt xVPCMP = {
|
||||
{0x66, 0x74}, // VPCMPEQB
|
||||
{0x66, 0x75}, // VPCMPEQW
|
||||
{0x66, 0x76}, // VPCMPEQD
|
||||
{0x66, 0x64}, // VPCMPGTB
|
||||
{0x66, 0x65}, // VPCMPGTW
|
||||
{0x66, 0x66}, // VPCMPGTD
|
||||
};
|
||||
|
||||
void xVPMOVMSKB(const xRegister32& to, const xRegisterSSE& from)
|
||||
{
|
||||
xOpWriteC5(0x66, 0xd7, to, xRegister32(), from);
|
||||
}
|
||||
|
||||
void xVMOVMSKPS(const xRegister32& to, const xRegisterSSE& from)
|
||||
{
|
||||
xOpWriteC5(0x00, 0x50, to, xRegister32(), from);
|
||||
}
|
||||
|
||||
void xVMOVMSKPD(const xRegister32& to, const xRegisterSSE& from)
|
||||
{
|
||||
xOpWriteC5(0x66, 0x50, to, xRegister32(), from);
|
||||
}
|
||||
|
||||
void xVZEROUPPER()
|
||||
{
|
||||
// rather than dealing with nonexistant operands..
|
||||
xWrite8(0xc5);
|
||||
xWrite8(0xf8);
|
||||
xWrite8(0x77);
|
||||
}
|
||||
|
||||
void xImplAVX_Move::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const
|
||||
{
|
||||
if (to != from)
|
||||
xOpWriteC5(Prefix, LoadOpcode, to, xRegisterSSE(), from);
|
||||
}
|
||||
|
||||
void xImplAVX_Move::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const
|
||||
{
|
||||
xOpWriteC5(Prefix, LoadOpcode, to, xRegisterSSE(), from);
|
||||
}
|
||||
|
||||
void xImplAVX_Move::operator()(const xIndirectVoid& to, const xRegisterSSE& from) const
|
||||
{
|
||||
xOpWriteC5(Prefix, StoreOpcode, from, xRegisterSSE(), to);
|
||||
}
|
||||
|
||||
void xImplAVX_ThreeArg::operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
pxAssert(!to.IsWideSIMD() && !from1.IsWideSIMD() && !from2.IsWideSIMD());
|
||||
xOpWriteC5(Prefix, Opcode, to, from1, from2);
|
||||
}
|
||||
|
||||
void xImplAVX_ThreeArg::operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
pxAssert(!to.IsWideSIMD() && !from1.IsWideSIMD());
|
||||
xOpWriteC5(Prefix, Opcode, to, from1, from2);
|
||||
}
|
||||
|
||||
void xImplAVX_ThreeArgYMM::operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(Prefix, Opcode, to, from1, from2);
|
||||
}
|
||||
|
||||
void xImplAVX_ThreeArgYMM::operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(Prefix, Opcode, to, from1, from2);
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::PS(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(0x00, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::PS(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(0x00, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::PD(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(0x66, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::PD(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(0x66, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::SS(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(0xF3, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::SS(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(0xF3, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::SD(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(0xF2, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::SD(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(0xF2, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
} // namespace x86Emitter
|
||||
22
common/emitter/bmi.cpp
Normal file
22
common/emitter/bmi.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#include "common/emitter/internal.h"
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
const xImplBMI_RVM xMULX = {0xF2, 0x38, 0xF6};
|
||||
const xImplBMI_RVM xPDEP = {0xF2, 0x38, 0xF5};
|
||||
const xImplBMI_RVM xPEXT = {0xF3, 0x38, 0xF5};
|
||||
const xImplBMI_RVM xANDN_S = {0x00, 0x38, 0xF2};
|
||||
|
||||
void xImplBMI_RVM::operator()(const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const
|
||||
{
|
||||
xOpWriteC4(Prefix, MbPrefix, Opcode, to, from1, from2);
|
||||
}
|
||||
void xImplBMI_RVM::operator()(const xRegisterInt& to, const xRegisterInt& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC4(Prefix, MbPrefix, Opcode, to, from1, from2);
|
||||
}
|
||||
} // namespace x86Emitter
|
||||
60
common/emitter/fpu.cpp
Normal file
60
common/emitter/fpu.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#include "common/emitter/legacy_internal.h"
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// FPU instructions
|
||||
//------------------------------------------------------------------
|
||||
/* fld m32 to fpu reg stack */
|
||||
emitterT void FLD32(u32 from)
|
||||
{
|
||||
xWrite8(0xD9);
|
||||
ModRM(0, 0x0, DISP32);
|
||||
xWrite32(MEMADDR(from, 4));
|
||||
}
|
||||
|
||||
// fld st(i)
|
||||
emitterT void FLD(int st) { xWrite16(0xc0d9 + (st << 8)); }
|
||||
emitterT void FLD1() { xWrite16(0xe8d9); }
|
||||
emitterT void FLDL2E() { xWrite16(0xead9); }
|
||||
|
||||
/* fstp m32 from fpu reg stack */
|
||||
emitterT void FSTP32(u32 to)
|
||||
{
|
||||
xWrite8(0xD9);
|
||||
ModRM(0, 0x3, DISP32);
|
||||
xWrite32(MEMADDR(to, 4));
|
||||
}
|
||||
|
||||
// fstp st(i)
|
||||
emitterT void FSTP(int st) { xWrite16(0xd8dd + (st << 8)); }
|
||||
|
||||
emitterT void FRNDINT() { xWrite16(0xfcd9); }
|
||||
emitterT void FXCH(int st) { xWrite16(0xc8d9 + (st << 8)); }
|
||||
emitterT void F2XM1() { xWrite16(0xf0d9); }
|
||||
emitterT void FSCALE() { xWrite16(0xfdd9); }
|
||||
emitterT void FPATAN(void) { xWrite16(0xf3d9); }
|
||||
emitterT void FSIN(void) { xWrite16(0xfed9); }
|
||||
|
||||
/* fadd ST(0) to fpu reg stack ST(src) */
|
||||
emitterT void FADD320toR(x86IntRegType src)
|
||||
{
|
||||
xWrite8(0xDC);
|
||||
xWrite8(0xC0 + src);
|
||||
}
|
||||
|
||||
/* fsub ST(src) to fpu reg stack ST(0) */
|
||||
emitterT void FSUB32Rto0(x86IntRegType src)
|
||||
{
|
||||
xWrite8(0xD8);
|
||||
xWrite8(0xE0 + src);
|
||||
}
|
||||
|
||||
/* fmul m32 to fpu reg stack */
|
||||
emitterT void FMUL32(u32 from)
|
||||
{
|
||||
xWrite8(0xD8);
|
||||
ModRM(0, 0x1, DISP32);
|
||||
xWrite32(MEMADDR(from, 4));
|
||||
}
|
||||
259
common/emitter/groups.cpp
Normal file
259
common/emitter/groups.cpp
Normal file
@@ -0,0 +1,259 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
/*
|
||||
* ix86 core v0.9.1
|
||||
*
|
||||
* Original Authors (v0.6.2 and prior):
|
||||
* linuzappz <linuzappz@pcsx.net>
|
||||
* alexey silinov
|
||||
* goldfinger
|
||||
* zerofrog(@gmail.com)
|
||||
*
|
||||
* Authors of v0.9.1:
|
||||
* Jake.Stine(@gmail.com)
|
||||
* cottonvibes(@gmail.com)
|
||||
* sudonim(1@gmail.com)
|
||||
*/
|
||||
|
||||
#include "common/emitter/internal.h"
|
||||
#include "common/emitter/implement/helpers.h"
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// =====================================================================================================
|
||||
// Group 1 Instructions - ADD, SUB, ADC, etc.
|
||||
// =====================================================================================================
|
||||
|
||||
// Note on "[Indirect],Imm" forms : use int as the source operand since it's "reasonably inert" from a
|
||||
// compiler perspective. (using uint tends to make the compiler try and fail to match signed immediates
|
||||
// with one of the other overloads).
|
||||
static void _g1_IndirectImm(G1Type InstType, const xIndirect64orLess& sibdest, int imm)
|
||||
{
|
||||
if (sibdest.Is8BitOp())
|
||||
{
|
||||
xOpWrite(sibdest.GetPrefix16(), 0x80, InstType, sibdest, 1);
|
||||
|
||||
xWrite<s8>(imm);
|
||||
}
|
||||
else
|
||||
{
|
||||
u8 opcode = is_s8(imm) ? 0x83 : 0x81;
|
||||
xOpWrite(sibdest.GetPrefix16(), opcode, InstType, sibdest, is_s8(imm) ? 1 : sibdest.GetImmSize());
|
||||
|
||||
if (is_s8(imm))
|
||||
xWrite<s8>(imm);
|
||||
else
|
||||
sibdest.xWriteImm(imm);
|
||||
}
|
||||
}
|
||||
|
||||
void _g1_EmitOp(G1Type InstType, const xRegisterInt& to, const xRegisterInt& from)
|
||||
{
|
||||
pxAssert(to.GetOperandSize() == from.GetOperandSize());
|
||||
|
||||
u8 opcode = (to.Is8BitOp() ? 0 : 1) | (InstType << 3);
|
||||
xOpWrite(to.GetPrefix16(), opcode, from, to);
|
||||
}
|
||||
|
||||
static void _g1_EmitOp(G1Type InstType, const xIndirectVoid& sibdest, const xRegisterInt& from)
|
||||
{
|
||||
u8 opcode = (from.Is8BitOp() ? 0 : 1) | (InstType << 3);
|
||||
xOpWrite(from.GetPrefix16(), opcode, from, sibdest);
|
||||
}
|
||||
|
||||
static void _g1_EmitOp(G1Type InstType, const xRegisterInt& to, const xIndirectVoid& sibsrc)
|
||||
{
|
||||
u8 opcode = (to.Is8BitOp() ? 2 : 3) | (InstType << 3);
|
||||
xOpWrite(to.GetPrefix16(), opcode, to, sibsrc);
|
||||
}
|
||||
|
||||
static void _g1_EmitOp(G1Type InstType, const xRegisterInt& to, int imm)
|
||||
{
|
||||
if (!to.Is8BitOp() && is_s8(imm))
|
||||
{
|
||||
xOpWrite(to.GetPrefix16(), 0x83, InstType, to);
|
||||
xWrite<s8>(imm);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (to.IsAccumulator())
|
||||
{
|
||||
u8 opcode = (to.Is8BitOp() ? 4 : 5) | (InstType << 3);
|
||||
xOpAccWrite(to.GetPrefix16(), opcode, InstType, to);
|
||||
}
|
||||
else
|
||||
{
|
||||
u8 opcode = to.Is8BitOp() ? 0x80 : 0x81;
|
||||
xOpWrite(to.GetPrefix16(), opcode, InstType, to);
|
||||
}
|
||||
to.xWriteImm(imm);
|
||||
}
|
||||
}
|
||||
|
||||
#define ImplementGroup1(g1type, insttype) \
|
||||
void g1type::operator()(const xRegisterInt& to, const xRegisterInt& from) const { _g1_EmitOp(insttype, to, from); } \
|
||||
void g1type::operator()(const xIndirectVoid& to, const xRegisterInt& from) const { _g1_EmitOp(insttype, to, from); } \
|
||||
void g1type::operator()(const xRegisterInt& to, const xIndirectVoid& from) const { _g1_EmitOp(insttype, to, from); } \
|
||||
void g1type::operator()(const xRegisterInt& to, int imm) const { _g1_EmitOp(insttype, to, imm); } \
|
||||
void g1type::operator()(const xIndirect64orLess& sibdest, int imm) const { _g1_IndirectImm(insttype, sibdest, imm); }
|
||||
|
||||
ImplementGroup1(xImpl_Group1, InstType)
|
||||
ImplementGroup1(xImpl_G1Logic, InstType)
|
||||
ImplementGroup1(xImpl_G1Arith, InstType)
|
||||
ImplementGroup1(xImpl_G1Compare, G1Type_CMP)
|
||||
|
||||
const xImpl_G1Logic xAND = {G1Type_AND, {0x00, 0x54}, {0x66, 0x54}};
|
||||
const xImpl_G1Logic xOR = {G1Type_OR, {0x00, 0x56}, {0x66, 0x56}};
|
||||
const xImpl_G1Logic xXOR = {G1Type_XOR, {0x00, 0x57}, {0x66, 0x57}};
|
||||
|
||||
const xImpl_G1Arith xADD = {G1Type_ADD, {0x00, 0x58}, {0x66, 0x58}, {0xf3, 0x58}, {0xf2, 0x58}};
|
||||
const xImpl_G1Arith xSUB = {G1Type_SUB, {0x00, 0x5c}, {0x66, 0x5c}, {0xf3, 0x5c}, {0xf2, 0x5c}};
|
||||
const xImpl_G1Compare xCMP = {{0x00, 0xc2}, {0x66, 0xc2}, {0xf3, 0xc2}, {0xf2, 0xc2}};
|
||||
|
||||
const xImpl_Group1 xADC = {G1Type_ADC};
|
||||
const xImpl_Group1 xSBB = {G1Type_SBB};
|
||||
|
||||
// =====================================================================================================
|
||||
// Group 2 Instructions - SHR, SHL, etc.
|
||||
// =====================================================================================================
|
||||
|
||||
void xImpl_Group2::operator()(const xRegisterInt& to, const xRegisterCL& /* from */) const
|
||||
{
|
||||
xOpWrite(to.GetPrefix16(), to.Is8BitOp() ? 0xd2 : 0xd3, InstType, to);
|
||||
}
|
||||
|
||||
void xImpl_Group2::operator()(const xRegisterInt& to, u8 imm) const
|
||||
{
|
||||
if (imm == 0)
|
||||
return;
|
||||
|
||||
if (imm == 1)
|
||||
{
|
||||
// special encoding of 1's
|
||||
xOpWrite(to.GetPrefix16(), to.Is8BitOp() ? 0xd0 : 0xd1, InstType, to);
|
||||
}
|
||||
else
|
||||
{
|
||||
xOpWrite(to.GetPrefix16(), to.Is8BitOp() ? 0xc0 : 0xc1, InstType, to);
|
||||
xWrite8(imm);
|
||||
}
|
||||
}
|
||||
|
||||
void xImpl_Group2::operator()(const xIndirect64orLess& sibdest, const xRegisterCL& /* from */) const
|
||||
{
|
||||
xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xd2 : 0xd3, InstType, sibdest);
|
||||
}
|
||||
|
||||
void xImpl_Group2::operator()(const xIndirect64orLess& sibdest, u8 imm) const
|
||||
{
|
||||
if (imm == 0)
|
||||
return;
|
||||
|
||||
if (imm == 1)
|
||||
{
|
||||
// special encoding of 1's
|
||||
xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xd0 : 0xd1, InstType, sibdest);
|
||||
}
|
||||
else
|
||||
{
|
||||
xOpWrite(sibdest.GetPrefix16(), sibdest.Is8BitOp() ? 0xc0 : 0xc1, InstType, sibdest, 1);
|
||||
xWrite8(imm);
|
||||
}
|
||||
}
|
||||
|
||||
const xImpl_Group2 xROL = {G2Type_ROL};
|
||||
const xImpl_Group2 xROR = {G2Type_ROR};
|
||||
const xImpl_Group2 xRCL = {G2Type_RCL};
|
||||
const xImpl_Group2 xRCR = {G2Type_RCR};
|
||||
const xImpl_Group2 xSHL = {G2Type_SHL};
|
||||
const xImpl_Group2 xSHR = {G2Type_SHR};
|
||||
const xImpl_Group2 xSAR = {G2Type_SAR};
|
||||
|
||||
|
||||
// =====================================================================================================
|
||||
// Group 3 Instructions - NOT, NEG, MUL, DIV
|
||||
// =====================================================================================================
|
||||
|
||||
static void _g3_EmitOp(G3Type InstType, const xRegisterInt& from)
|
||||
{
|
||||
xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0xf6 : 0xf7, InstType, from);
|
||||
}
|
||||
|
||||
static void _g3_EmitOp(G3Type InstType, const xIndirect64orLess& from)
|
||||
{
|
||||
xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0xf6 : 0xf7, InstType, from);
|
||||
}
|
||||
|
||||
void xImpl_Group3::operator()(const xRegisterInt& from) const { _g3_EmitOp(InstType, from); }
|
||||
void xImpl_Group3::operator()(const xIndirect64orLess& from) const { _g3_EmitOp(InstType, from); }
|
||||
|
||||
void xImpl_iDiv::operator()(const xRegisterInt& from) const { _g3_EmitOp(G3Type_iDIV, from); }
|
||||
void xImpl_iDiv::operator()(const xIndirect64orLess& from) const { _g3_EmitOp(G3Type_iDIV, from); }
|
||||
|
||||
template <typename SrcType>
|
||||
static void _imul_ImmStyle(const xRegisterInt& param1, const SrcType& param2, int imm)
|
||||
{
|
||||
pxAssert(param1.GetOperandSize() == param2.GetOperandSize());
|
||||
|
||||
xOpWrite0F(param1.GetPrefix16(), is_s8(imm) ? 0x6b : 0x69, param1, param2, is_s8(imm) ? 1 : param1.GetImmSize());
|
||||
|
||||
if (is_s8(imm))
|
||||
xWrite8((u8)imm);
|
||||
else
|
||||
param1.xWriteImm(imm);
|
||||
}
|
||||
|
||||
void xImpl_iMul::operator()(const xRegisterInt& from) const { _g3_EmitOp(G3Type_iMUL, from); }
|
||||
void xImpl_iMul::operator()(const xIndirect64orLess& from) const { _g3_EmitOp(G3Type_iMUL, from); }
|
||||
|
||||
void xImpl_iMul::operator()(const xRegister32& to, const xRegister32& from) const { xOpWrite0F(0xaf, to, from); }
|
||||
void xImpl_iMul::operator()(const xRegister32& to, const xIndirectVoid& src) const { xOpWrite0F(0xaf, to, src); }
|
||||
void xImpl_iMul::operator()(const xRegister16& to, const xRegister16& from) const { xOpWrite0F(0x66, 0xaf, to, from); }
|
||||
void xImpl_iMul::operator()(const xRegister16& to, const xIndirectVoid& src) const { xOpWrite0F(0x66, 0xaf, to, src); }
|
||||
|
||||
void xImpl_iMul::operator()(const xRegister32& to, const xRegister32& from, s32 imm) const { _imul_ImmStyle(to, from, imm); }
|
||||
void xImpl_iMul::operator()(const xRegister32& to, const xIndirectVoid& from, s32 imm) const { _imul_ImmStyle(to, from, imm); }
|
||||
void xImpl_iMul::operator()(const xRegister16& to, const xRegister16& from, s16 imm) const { _imul_ImmStyle(to, from, imm); }
|
||||
void xImpl_iMul::operator()(const xRegister16& to, const xIndirectVoid& from, s16 imm) const { _imul_ImmStyle(to, from, imm); }
|
||||
|
||||
const xImpl_Group3 xNOT = {G3Type_NOT};
|
||||
const xImpl_Group3 xNEG = {G3Type_NEG};
|
||||
const xImpl_Group3 xUMUL = {G3Type_MUL};
|
||||
const xImpl_Group3 xUDIV = {G3Type_DIV};
|
||||
|
||||
const xImpl_iDiv xDIV = {{0x00, 0x5e}, {0x66, 0x5e}, {0xf3, 0x5e}, {0xf2, 0x5e}};
|
||||
const xImpl_iMul xMUL = {{0x00, 0x59}, {0x66, 0x59}, {0xf3, 0x59}, {0xf2, 0x59}};
|
||||
|
||||
// =====================================================================================================
|
||||
// Group 8 Instructions
|
||||
// =====================================================================================================
|
||||
|
||||
void xImpl_Group8::operator()(const xRegister16or32or64& bitbase, const xRegister16or32or64& bitoffset) const
|
||||
{
|
||||
pxAssert(bitbase->GetOperandSize() == bitoffset->GetOperandSize());
|
||||
xOpWrite0F(bitbase->GetPrefix16(), 0xa3 | (InstType << 3), bitbase, bitoffset);
|
||||
}
|
||||
void xImpl_Group8::operator()(const xIndirect64& bitbase, u8 bitoffset) const { xOpWrite0F(0xba, InstType, bitbase, bitoffset); }
|
||||
void xImpl_Group8::operator()(const xIndirect32& bitbase, u8 bitoffset) const { xOpWrite0F(0xba, InstType, bitbase, bitoffset); }
|
||||
void xImpl_Group8::operator()(const xIndirect16& bitbase, u8 bitoffset) const { xOpWrite0F(0x66, 0xba, InstType, bitbase, bitoffset); }
|
||||
|
||||
void xImpl_Group8::operator()(const xRegister16or32or64& bitbase, u8 bitoffset) const
|
||||
{
|
||||
xOpWrite0F(bitbase->GetPrefix16(), 0xba, InstType, bitbase, bitoffset);
|
||||
}
|
||||
|
||||
void xImpl_Group8::operator()(const xIndirectVoid& bitbase, const xRegister16or32or64& bitoffset) const
|
||||
{
|
||||
xOpWrite0F(bitoffset->GetPrefix16(), 0xa3 | (InstType << 3), bitoffset, bitbase);
|
||||
}
|
||||
|
||||
const xImpl_Group8 xBT = {G8Type_BT};
|
||||
const xImpl_Group8 xBTR = {G8Type_BTR};
|
||||
const xImpl_Group8 xBTS = {G8Type_BTS};
|
||||
const xImpl_Group8 xBTC = {G8Type_BTC};
|
||||
|
||||
|
||||
|
||||
} // End namespace x86Emitter
|
||||
101
common/emitter/implement/avx.h
Normal file
101
common/emitter/implement/avx.h
Normal file
@@ -0,0 +1,101 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
struct xImplAVX_Move
|
||||
{
|
||||
u8 Prefix;
|
||||
u8 LoadOpcode;
|
||||
u8 StoreOpcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xIndirectVoid& to, const xRegisterSSE& from) const;
|
||||
};
|
||||
|
||||
struct xImplAVX_ThreeArg
|
||||
{
|
||||
u8 Prefix;
|
||||
u8 Opcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
};
|
||||
|
||||
struct xImplAVX_ThreeArgYMM : xImplAVX_ThreeArg
|
||||
{
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
};
|
||||
|
||||
struct xImplAVX_ArithFloat
|
||||
{
|
||||
xImplAVX_ThreeArgYMM PS;
|
||||
xImplAVX_ThreeArgYMM PD;
|
||||
xImplAVX_ThreeArg SS;
|
||||
xImplAVX_ThreeArg SD;
|
||||
};
|
||||
|
||||
struct xImplAVX_CmpFloatHelper
|
||||
{
|
||||
SSE2_ComparisonType CType;
|
||||
|
||||
void PS(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void PS(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
void PD(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void PD(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
|
||||
void SS(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void SS(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
void SD(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void SD(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
};
|
||||
|
||||
struct xImplAVX_CmpFloat
|
||||
{
|
||||
xImplAVX_CmpFloatHelper EQ;
|
||||
xImplAVX_CmpFloatHelper LT;
|
||||
xImplAVX_CmpFloatHelper LE;
|
||||
xImplAVX_CmpFloatHelper UO;
|
||||
xImplAVX_CmpFloatHelper NE;
|
||||
xImplAVX_CmpFloatHelper GE;
|
||||
xImplAVX_CmpFloatHelper GT;
|
||||
xImplAVX_CmpFloatHelper OR;
|
||||
};
|
||||
|
||||
struct xImplAVX_CmpInt
|
||||
{
|
||||
// Compare packed bytes for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM EQB;
|
||||
|
||||
// Compare packed words for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM EQW;
|
||||
|
||||
// Compare packed doublewords [32-bits] for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM EQD;
|
||||
|
||||
// Compare packed signed bytes for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM GTB;
|
||||
|
||||
// Compare packed signed words for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM GTW;
|
||||
|
||||
// Compare packed signed doublewords [32-bits] for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM GTD;
|
||||
};
|
||||
} // namespace x86Emitter
|
||||
49
common/emitter/implement/bmi.h
Normal file
49
common/emitter/implement/bmi.h
Normal file
@@ -0,0 +1,49 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
// Implement BMI1/BMI2 instruction set
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
struct xImplBMI_RVM
|
||||
{
|
||||
u8 Prefix;
|
||||
u8 MbPrefix;
|
||||
u8 Opcode;
|
||||
|
||||
// RVM
|
||||
// MULX Unsigned multiply without affecting flags, and arbitrary destination registers
|
||||
// PDEP Parallel bits deposit
|
||||
// PEXT Parallel bits extract
|
||||
// ANDN Logical and not ~x & y
|
||||
void operator()(const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const;
|
||||
void operator()(const xRegisterInt& to, const xRegisterInt& from1, const xIndirectVoid& from2) const;
|
||||
|
||||
#if 0
|
||||
// RMV
|
||||
// BEXTR Bit field extract (with register) (src >> start) & ((1 << len)-1)[9]
|
||||
// BZHI Zero high bits starting with specified bit position
|
||||
// SARX Shift arithmetic right without affecting flags
|
||||
// SHRX Shift logical right without affecting flags
|
||||
// SHLX Shift logical left without affecting flags
|
||||
// FIXME: WARNING same as above but V and M are inverted
|
||||
//void operator()( const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const;
|
||||
//void operator()( const xRegisterInt& to, const xIndirectVoid& from1, const xRegisterInt& from2) const;
|
||||
|
||||
// VM
|
||||
// BLSI Extract lowest set isolated bit x & -x
|
||||
// BLSMSK Get mask up to lowest set bit x ^ (x - 1)
|
||||
// BLSR Reset lowest set bit x & (x - 1)
|
||||
void operator()( const xRegisterInt& to, const xRegisterInt& from) const;
|
||||
void operator()( const xRegisterInt& to, const xIndirectVoid& from) const;
|
||||
|
||||
// RMI
|
||||
//RORX Rotate right logical without affecting flags
|
||||
void operator()( const xRegisterInt& to, const xRegisterInt& from, u8 imm) const;
|
||||
void operator()( const xRegisterInt& to, const xIndirectVoid& from, u8 imm) const;
|
||||
#endif
|
||||
};
|
||||
} // namespace x86Emitter
|
||||
32
common/emitter/implement/dwshift.h
Normal file
32
common/emitter/implement/dwshift.h
Normal file
@@ -0,0 +1,32 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// Implementations here cover SHLD and SHRD.
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_DowrdShift
|
||||
// --------------------------------------------------------------------------------------
|
||||
// I use explicit method declarations here instead of templates, in order to provide
|
||||
// *only* 32 and 16 bit register operand forms (8 bit registers are not valid in SHLD/SHRD).
|
||||
//
|
||||
// Optimization Note: Imm shifts by 0 are ignore (no code generated). This is a safe optimization
|
||||
// because shifts by 0 do *not* affect flags status (intel docs cited).
|
||||
//
|
||||
struct xImpl_DwordShift
|
||||
{
|
||||
u16 OpcodeBase;
|
||||
|
||||
void operator()(const xRegister16or32or64& to, const xRegister16or32or64& from, const xRegisterCL& clreg) const;
|
||||
|
||||
void operator()(const xRegister16or32or64& to, const xRegister16or32or64& from, u8 shiftcnt) const;
|
||||
|
||||
void operator()(const xIndirectVoid& dest, const xRegister16or32or64& from, const xRegisterCL& clreg) const;
|
||||
void operator()(const xIndirectVoid& dest, const xRegister16or32or64& from, u8 shiftcnt) const;
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
131
common/emitter/implement/group1.h
Normal file
131
common/emitter/implement/group1.h
Normal file
@@ -0,0 +1,131 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
enum G1Type
|
||||
{
|
||||
G1Type_ADD = 0,
|
||||
G1Type_OR,
|
||||
G1Type_ADC,
|
||||
G1Type_SBB,
|
||||
G1Type_AND,
|
||||
G1Type_SUB,
|
||||
G1Type_XOR,
|
||||
G1Type_CMP
|
||||
};
|
||||
|
||||
extern void _g1_EmitOp(G1Type InstType, const xRegisterInt& to, const xRegisterInt& from);
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_Group1
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct xImpl_Group1
|
||||
{
|
||||
G1Type InstType;
|
||||
|
||||
void operator()(const xRegisterInt& to, const xRegisterInt& from) const;
|
||||
|
||||
void operator()(const xIndirectVoid& to, const xRegisterInt& from) const;
|
||||
void operator()(const xRegisterInt& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xRegisterInt& to, int imm) const;
|
||||
void operator()(const xIndirect64orLess& to, int imm) const;
|
||||
|
||||
#if 0
|
||||
// ------------------------------------------------------------------------
|
||||
template< typename T > __noinline void operator()( const ModSibBase& to, const xImmReg<T>& immOrReg ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, immOrReg );
|
||||
}
|
||||
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xImmReg<T>& immOrReg ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, immOrReg );
|
||||
}
|
||||
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, int imm ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, imm );
|
||||
}
|
||||
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xDirectOrIndirect<T>& from ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, from );
|
||||
}
|
||||
|
||||
// FIXME : Make this struct to 8, 16, and 32 bit registers
|
||||
template< typename T > __noinline void operator()( const xRegisterBase& to, const xDirectOrIndirect<T>& from ) const
|
||||
{
|
||||
_DoI_helpermess( *this, xDirectOrIndirect<T>( to ), from );
|
||||
}
|
||||
|
||||
// FIXME : Make this struct to 8, 16, and 32 bit registers
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xRegisterBase& from ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, xDirectOrIndirect<T>( from ) );
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// This class combines x86 with SSE/SSE2 logic operations (ADD, OR, and NOT).
|
||||
// Note: ANDN [AndNot] is handled below separately.
|
||||
//
|
||||
struct xImpl_G1Logic
|
||||
{
|
||||
G1Type InstType;
|
||||
|
||||
void operator()(const xRegisterInt& to, const xRegisterInt& from) const;
|
||||
|
||||
void operator()(const xIndirectVoid& to, const xRegisterInt& from) const;
|
||||
void operator()(const xRegisterInt& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xRegisterInt& to, int imm) const;
|
||||
|
||||
void operator()(const xIndirect64orLess& to, int imm) const;
|
||||
|
||||
xImplSimd_DestRegSSE PS; // packed single precision
|
||||
xImplSimd_DestRegSSE PD; // packed double precision
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// This class combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB).
|
||||
//
|
||||
struct xImpl_G1Arith
|
||||
{
|
||||
G1Type InstType;
|
||||
|
||||
void operator()(const xRegisterInt& to, const xRegisterInt& from) const;
|
||||
|
||||
void operator()(const xIndirectVoid& to, const xRegisterInt& from) const;
|
||||
void operator()(const xRegisterInt& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xRegisterInt& to, int imm) const;
|
||||
|
||||
void operator()(const xIndirect64orLess& to, int imm) const;
|
||||
|
||||
xImplSimd_DestRegSSE PS; // packed single precision
|
||||
xImplSimd_DestRegSSE PD; // packed double precision
|
||||
xImplSimd_DestRegSSE SS; // scalar single precision
|
||||
xImplSimd_DestRegSSE SD; // scalar double precision
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
struct xImpl_G1Compare
|
||||
{
|
||||
void operator()(const xRegisterInt& to, const xRegisterInt& from) const;
|
||||
|
||||
void operator()(const xIndirectVoid& to, const xRegisterInt& from) const;
|
||||
void operator()(const xRegisterInt& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xRegisterInt& to, int imm) const;
|
||||
|
||||
void operator()(const xIndirect64orLess& to, int imm) const;
|
||||
|
||||
xImplSimd_DestSSE_CmpImm PS;
|
||||
xImplSimd_DestSSE_CmpImm PD;
|
||||
xImplSimd_DestSSE_CmpImm SS;
|
||||
xImplSimd_DestSSE_CmpImm SD;
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
51
common/emitter/implement/group2.h
Normal file
51
common/emitter/implement/group2.h
Normal file
@@ -0,0 +1,51 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
enum G2Type
|
||||
{
|
||||
G2Type_ROL = 0,
|
||||
G2Type_ROR,
|
||||
G2Type_RCL,
|
||||
G2Type_RCR,
|
||||
G2Type_SHL,
|
||||
G2Type_SHR,
|
||||
G2Type_Unused,
|
||||
G2Type_SAR
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_Group2
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Group 2 (shift) instructions have no Sib/ModRM forms.
|
||||
// Optimization Note: For Imm forms, we ignore the instruction if the shift count is zero.
|
||||
// This is a safe optimization since any zero-value shift does not affect any flags.
|
||||
//
|
||||
struct xImpl_Group2
|
||||
{
|
||||
G2Type InstType;
|
||||
|
||||
void operator()(const xRegisterInt& to, const xRegisterCL& from) const;
|
||||
void operator()(const xIndirect64orLess& to, const xRegisterCL& from) const;
|
||||
void operator()(const xRegisterInt& to, u8 imm) const;
|
||||
void operator()(const xIndirect64orLess& to, u8 imm) const;
|
||||
|
||||
#if 0
|
||||
// ------------------------------------------------------------------------
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, u8 imm ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, imm );
|
||||
}
|
||||
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xRegisterCL& from ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, from );
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
97
common/emitter/implement/group3.h
Normal file
97
common/emitter/implement/group3.h
Normal file
@@ -0,0 +1,97 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
enum G3Type
|
||||
{
|
||||
G3Type_NOT = 2,
|
||||
G3Type_NEG = 3,
|
||||
G3Type_MUL = 4,
|
||||
G3Type_iMUL = 5, // partial implementation, iMul has additional forms in ix86.cpp
|
||||
G3Type_DIV = 6,
|
||||
G3Type_iDIV = 7
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_Group3
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct xImpl_Group3
|
||||
{
|
||||
G3Type InstType;
|
||||
|
||||
void operator()(const xRegisterInt& from) const;
|
||||
void operator()(const xIndirect64orLess& from) const;
|
||||
|
||||
#if 0
|
||||
template< typename T >
|
||||
void operator()( const xDirectOrIndirect<T>& from ) const
|
||||
{
|
||||
_DoI_helpermess( *this, from );
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_MulDivBase
|
||||
// --------------------------------------------------------------------------------------
|
||||
// This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV.
|
||||
//
|
||||
struct xImpl_MulDivBase
|
||||
{
|
||||
G3Type InstType;
|
||||
u16 OpcodeSSE;
|
||||
|
||||
void operator()(const xRegisterInt& from) const;
|
||||
void operator()(const xIndirect64orLess& from) const;
|
||||
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE PD;
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_iDiv
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct xImpl_iDiv
|
||||
{
|
||||
void operator()(const xRegisterInt& from) const;
|
||||
void operator()(const xIndirect64orLess& from) const;
|
||||
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE PD;
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_iMul
|
||||
// --------------------------------------------------------------------------------------
|
||||
//
|
||||
struct xImpl_iMul
|
||||
{
|
||||
void operator()(const xRegisterInt& from) const;
|
||||
void operator()(const xIndirect64orLess& from) const;
|
||||
|
||||
// The following iMul-specific forms are valid for 16 and 32 bit register operands only!
|
||||
|
||||
void operator()(const xRegister32& to, const xRegister32& from) const;
|
||||
void operator()(const xRegister32& to, const xIndirectVoid& src) const;
|
||||
void operator()(const xRegister16& to, const xRegister16& from) const;
|
||||
void operator()(const xRegister16& to, const xIndirectVoid& src) const;
|
||||
|
||||
void operator()(const xRegister32& to, const xRegister32& from, s32 imm) const;
|
||||
void operator()(const xRegister32& to, const xIndirectVoid& from, s32 imm) const;
|
||||
void operator()(const xRegister16& to, const xRegister16& from, s16 imm) const;
|
||||
void operator()(const xRegister16& to, const xIndirectVoid& from, s16 imm) const;
|
||||
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE PD;
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
};
|
||||
} // namespace x86Emitter
|
||||
80
common/emitter/implement/helpers.h
Normal file
80
common/emitter/implement/helpers.h
Normal file
@@ -0,0 +1,80 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// helpermess is currently broken >_<
|
||||
|
||||
#if 0
|
||||
|
||||
template< typename xImpl, typename T >
|
||||
void _DoI_helpermess( const xImpl& helpme, const xDirectOrIndirect& to, const xImmReg<T>& immOrReg )
|
||||
{
|
||||
if( to.IsDirect() )
|
||||
{
|
||||
if( immOrReg.IsReg() )
|
||||
helpme( to.GetReg(), immOrReg.GetReg() );
|
||||
else
|
||||
helpme( to.GetReg(), immOrReg.GetImm() );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( immOrReg.IsReg() )
|
||||
helpme( to.GetMem(), immOrReg.GetReg() );
|
||||
else
|
||||
helpme( to.GetMem(), immOrReg.GetImm() );
|
||||
}
|
||||
}
|
||||
|
||||
template< typename xImpl, typename T >
|
||||
void _DoI_helpermess( const xImpl& helpme, const ModSibBase& to, const xImmReg<T>& immOrReg )
|
||||
{
|
||||
if( immOrReg.IsReg() )
|
||||
helpme( to, immOrReg.GetReg() );
|
||||
else
|
||||
helpme( (ModSibStrict)to, immOrReg.GetImm() );
|
||||
}
|
||||
|
||||
template< typename xImpl, typename T >
|
||||
void _DoI_helpermess( const xImpl& helpme, const xDirectOrIndirect<T>& to, int imm )
|
||||
{
|
||||
if( to.IsDirect() )
|
||||
helpme( to.GetReg(), imm );
|
||||
else
|
||||
helpme( to.GetMem(), imm );
|
||||
}
|
||||
|
||||
template< typename xImpl, typename T >
|
||||
void _DoI_helpermess( const xImpl& helpme, const xDirectOrIndirect<T>& parm )
|
||||
{
|
||||
if( parm.IsDirect() )
|
||||
helpme( parm.GetReg() );
|
||||
else
|
||||
helpme( parm.GetMem() );
|
||||
}
|
||||
|
||||
template< typename xImpl, typename T >
|
||||
void _DoI_helpermess( const xImpl& helpme, const xDirectOrIndirect<T>& to, const xDirectOrIndirect<T>& from )
|
||||
{
|
||||
if( to.IsDirect() && from.IsDirect() )
|
||||
helpme( to.GetReg(), from.GetReg() );
|
||||
|
||||
else if( to.IsDirect() )
|
||||
helpme( to.GetReg(), from.GetMem() );
|
||||
|
||||
else if( from.IsDirect() )
|
||||
helpme( to.GetMem(), from.GetReg() );
|
||||
|
||||
else
|
||||
|
||||
// One of the fields needs to be direct, or else we cannot complete the operation.
|
||||
// (intel doesn't support indirects in both fields)
|
||||
|
||||
pxFailDev( "Invalid asm instruction: Both operands are indirect memory addresses." );
|
||||
}
|
||||
#endif
|
||||
|
||||
} // End namespace x86Emitter
|
||||
23
common/emitter/implement/incdec.h
Normal file
23
common/emitter/implement/incdec.h
Normal file
@@ -0,0 +1,23 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
// Implementations found here: Increment and Decrement Instructions!
|
||||
// (They're soooo lonely... but I dunno where else to stick this class!)
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_IncDec
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct xImpl_IncDec
|
||||
{
|
||||
bool isDec;
|
||||
|
||||
void operator()(const xRegisterInt& to) const;
|
||||
void operator()(const xIndirect64orLess& to) const;
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
80
common/emitter/implement/jmpcall.h
Normal file
80
common/emitter/implement/jmpcall.h
Normal file
@@ -0,0 +1,80 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
// Implementations found here: CALL and JMP! (unconditional only)
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
extern void xJccKnownTarget(JccComparisonType comparison, const void* target, bool slideForward);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
struct xImpl_JmpCall
|
||||
{
|
||||
bool isJmp;
|
||||
|
||||
void operator()(const xAddressReg& absreg) const;
|
||||
void operator()(const xIndirectNative& src) const;
|
||||
|
||||
// Special form for calling functions. This form automatically resolves the
|
||||
// correct displacement based on the size of the instruction being generated.
|
||||
void operator()(const void* func) const
|
||||
{
|
||||
if (isJmp)
|
||||
xJccKnownTarget(Jcc_Unconditional, (const void*)(uptr)func, false); // double cast to/from (uptr) needed to appease GCC
|
||||
else
|
||||
{
|
||||
// calls are relative to the instruction after this one, and length is
|
||||
// always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic).
|
||||
|
||||
sptr dest = (sptr)func - ((sptr)xGetPtr() + 5);
|
||||
pxAssertMsg(dest == (s32)dest, "Indirect jump is too far, must use a register!");
|
||||
xWrite8(0xe8);
|
||||
xWrite32(dest);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// yes it is awful. Due to template code is in a header with a nice circular dep.
|
||||
extern const xImpl_Mov xMOV;
|
||||
extern const xImpl_JmpCall xCALL;
|
||||
|
||||
struct xImpl_FastCall
|
||||
{
|
||||
// FIXME: current 64 bits is mostly a copy/past potentially it would require to push/pop
|
||||
// some registers. But I think it is enough to handle the first call.
|
||||
|
||||
void operator()(const void* f, const xRegister32& a1 = xEmptyReg, const xRegister32& a2 = xEmptyReg) const;
|
||||
|
||||
void operator()(const void* f, u32 a1, const xRegister32& a2) const;
|
||||
void operator()(const void* f, const xIndirect32& a1) const;
|
||||
void operator()(const void* f, u32 a1, u32 a2) const;
|
||||
void operator()(const void* f, void* a1) const;
|
||||
|
||||
void operator()(const void* f, const xRegisterLong& a1, const xRegisterLong& a2 = xEmptyReg) const;
|
||||
void operator()(const void* f, u32 a1, const xRegisterLong& a2) const;
|
||||
|
||||
template <typename T>
|
||||
__fi void operator()(T* func, u32 a1, const xRegisterLong& a2 = xEmptyReg) const
|
||||
{
|
||||
(*this)((const void*)func, a1, a2);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__fi void operator()(T* func, const xIndirect32& a1) const
|
||||
{
|
||||
(*this)((const void*)func, a1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__fi void operator()(T* func, u32 a1, u32 a2) const
|
||||
{
|
||||
(*this)((const void*)func, a1, a2);
|
||||
}
|
||||
|
||||
void operator()(const xIndirectNative& f, const xRegisterLong& a1 = xEmptyReg, const xRegisterLong& a2 = xEmptyReg) const;
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
128
common/emitter/implement/movs.h
Normal file
128
common/emitter/implement/movs.h
Normal file
@@ -0,0 +1,128 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
// Header: ix86_impl_movs.h -- covers mov, cmov, movsx/movzx, and SETcc (which shares
|
||||
// with cmov many similarities).
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// MovImplAll
|
||||
// --------------------------------------------------------------------------------------
|
||||
// MOV instruction Implementation, plus many SIMD sub-mov variants.
|
||||
//
|
||||
struct xImpl_Mov
|
||||
{
|
||||
xImpl_Mov() {} // Satisfy GCC's whims.
|
||||
|
||||
void operator()(const xRegisterInt& to, const xRegisterInt& from) const;
|
||||
void operator()(const xIndirectVoid& dest, const xRegisterInt& from) const;
|
||||
void operator()(const xRegisterInt& to, const xIndirectVoid& src) const;
|
||||
void operator()(const xIndirect64orLess& dest, sptr imm) const;
|
||||
void operator()(const xRegisterInt& to, sptr imm, bool preserve_flags = false) const;
|
||||
|
||||
#if 0
|
||||
template< typename T > __noinline void operator()( const ModSibBase& to, const xImmReg<T>& immOrReg ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, immOrReg );
|
||||
}
|
||||
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xImmReg<T>& immOrReg ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, immOrReg );
|
||||
}
|
||||
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, int imm ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, imm );
|
||||
}
|
||||
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xDirectOrIndirect<T>& from ) const
|
||||
{
|
||||
if( to == from ) return;
|
||||
_DoI_helpermess( *this, to, from );
|
||||
}
|
||||
|
||||
/*template< typename T > __noinline void operator()( const xRegister<T>& to, const xDirectOrIndirect<T>& from ) const
|
||||
{
|
||||
_DoI_helpermess( *this, xDirectOrIndirect<T>( to ), from );
|
||||
}
|
||||
|
||||
template< typename T > __noinline void operator()( const xDirectOrIndirect<T>& to, const xRegister<T>& from ) const
|
||||
{
|
||||
_DoI_helpermess( *this, to, xDirectOrIndirect<T>( from ) );
|
||||
}*/
|
||||
#endif
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_MovImm64
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Mov with 64-bit immediates (only available on 64-bit platforms)
|
||||
//
|
||||
struct xImpl_MovImm64
|
||||
{
|
||||
xImpl_MovImm64() {} // Satisfy GCC's whims.
|
||||
|
||||
void operator()(const xRegister64& to, s64 imm, bool preserve_flags = false) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_CMov
|
||||
// --------------------------------------------------------------------------------------
|
||||
// CMOVcc !! [in all of it's disappointing lack-of glory] .. and ..
|
||||
// SETcc !! [more glory, less lack!]
|
||||
//
|
||||
// CMOV Disclaimer: Caution! This instruction can look exciting and cool, until you
|
||||
// realize that it cannot load immediate values into registers. -_-
|
||||
//
|
||||
// I use explicit method declarations here instead of templates, in order to provide
|
||||
// *only* 32 and 16 bit register operand forms (8 bit registers are not valid in CMOV).
|
||||
//
|
||||
|
||||
struct xImpl_CMov
|
||||
{
|
||||
JccComparisonType ccType;
|
||||
void operator()(const xRegister16or32or64& to, const xRegister16or32or64& from) const;
|
||||
void operator()(const xRegister16or32or64& to, const xIndirectVoid& sibsrc) const;
|
||||
|
||||
//void operator()( const xDirectOrIndirect32& to, const xDirectOrIndirect32& from );
|
||||
//void operator()( const xDirectOrIndirect16& to, const xDirectOrIndirect16& from ) const;
|
||||
};
|
||||
|
||||
struct xImpl_Set
|
||||
{
|
||||
JccComparisonType ccType;
|
||||
|
||||
void operator()(const xRegister8& to) const;
|
||||
void operator()(const xIndirect8& dest) const;
|
||||
|
||||
//void operator()( const xDirectOrIndirect8& dest ) const;
|
||||
};
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_MovExtend
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Mov with sign/zero extension implementations (movsx / movzx)
|
||||
//
|
||||
struct xImpl_MovExtend
|
||||
{
|
||||
bool SignExtend;
|
||||
|
||||
void operator()(const xRegister16or32or64& to, const xRegister8& from) const;
|
||||
void operator()(const xRegister16or32or64& to, const xIndirect8& sibsrc) const;
|
||||
void operator()(const xRegister32or64& to, const xRegister16& from) const;
|
||||
void operator()(const xRegister32or64& to, const xIndirect16& sibsrc) const;
|
||||
void operator()(const xRegister64& to, const xRegister32& from) const;
|
||||
void operator()(const xRegister64& to, const xIndirect32& sibsrc) const;
|
||||
|
||||
//void operator()( const xRegister32& to, const xDirectOrIndirect16& src ) const;
|
||||
//void operator()( const xRegister16or32& to, const xDirectOrIndirect8& src ) const;
|
||||
//void operator()( const xRegister16& to, const xDirectOrIndirect8& src ) const;
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
304
common/emitter/implement/simd_arithmetic.h
Normal file
304
common/emitter/implement/simd_arithmetic.h
Normal file
@@ -0,0 +1,304 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// _SimdShiftHelper
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct _SimdShiftHelper
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
u16 OpcodeImm;
|
||||
u8 Modcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
|
||||
void operator()(const xRegisterSSE& to, u8 imm8) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_Shift / xImplSimd_ShiftWithoutQ
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
// Used for PSRA, which lacks the Q form.
|
||||
//
|
||||
struct xImplSimd_ShiftWithoutQ
|
||||
{
|
||||
const _SimdShiftHelper W;
|
||||
const _SimdShiftHelper D;
|
||||
};
|
||||
|
||||
// Implements PSRL and PSLL
|
||||
//
|
||||
struct xImplSimd_Shift
|
||||
{
|
||||
const _SimdShiftHelper W;
|
||||
const _SimdShiftHelper D;
|
||||
const _SimdShiftHelper Q;
|
||||
|
||||
void DQ(const xRegisterSSE& to, u8 imm8) const;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
struct xImplSimd_AddSub
|
||||
{
|
||||
const xImplSimd_DestRegEither B;
|
||||
const xImplSimd_DestRegEither W;
|
||||
const xImplSimd_DestRegEither D;
|
||||
const xImplSimd_DestRegEither Q;
|
||||
|
||||
// Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
|
||||
const xImplSimd_DestRegEither SB;
|
||||
|
||||
// Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
|
||||
const xImplSimd_DestRegEither SW;
|
||||
|
||||
// Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
|
||||
const xImplSimd_DestRegEither USB;
|
||||
|
||||
// Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
|
||||
const xImplSimd_DestRegEither USW;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
struct xImplSimd_PMul
|
||||
{
|
||||
const xImplSimd_DestRegEither LW;
|
||||
const xImplSimd_DestRegEither HW;
|
||||
const xImplSimd_DestRegEither HUW;
|
||||
const xImplSimd_DestRegEither UDQ;
|
||||
|
||||
// [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
|
||||
// corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
|
||||
// integers. Each intermediate 32-bit integer is truncated to the 18 most significant
|
||||
// bits. Rounding is always performed by adding 1 to the least significant bit of the
|
||||
// 18-bit intermediate result. The final result is obtained by selecting the 16 bits
|
||||
// immediately to the right of the most significant bit of each 18-bit intermediate
|
||||
// result and packed to the destination operand.
|
||||
//
|
||||
// Both operands can be MMX or XMM registers. Source can be register or memory.
|
||||
//
|
||||
const xImplSimd_DestRegEither HRSW;
|
||||
|
||||
// [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
|
||||
// the low 32 bits of each product in xmm1.
|
||||
const xImplSimd_DestRegSSE LD;
|
||||
|
||||
// [SSE-4.1] Multiply the packed signed dword integers in dest with src.
|
||||
const xImplSimd_DestRegSSE DQ;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
|
||||
//
|
||||
struct xImplSimd_rSqrt
|
||||
{
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SQRT has PS/SS/SD forms, but not the PD form.
|
||||
//
|
||||
struct xImplSimd_Sqrt
|
||||
{
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
struct xImplSimd_AndNot
|
||||
{
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
const xImplSimd_DestRegSSE PD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed absolute value. [sSSE3 only]
|
||||
//
|
||||
struct xImplSimd_PAbsolute
|
||||
{
|
||||
// [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
|
||||
// in dest, as UNSIGNED.
|
||||
const xImplSimd_DestRegEither B;
|
||||
|
||||
// [sSSE-3] Computes the absolute value of word in the src, and stores the result
|
||||
// in dest, as UNSIGNED.
|
||||
const xImplSimd_DestRegEither W;
|
||||
|
||||
// [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
|
||||
// result in dest, as UNSIGNED.
|
||||
const xImplSimd_DestRegEither D;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the
|
||||
// corresponding sign in src.
|
||||
//
|
||||
struct xImplSimd_PSign
|
||||
{
|
||||
// [sSSE-3] negates each byte element of dest if the signed integer value of the
|
||||
// corresponding data element in src is less than zero. If the signed integer value
|
||||
// of a data element in src is positive, the corresponding data element in dest is
|
||||
// unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const xImplSimd_DestRegEither B;
|
||||
|
||||
// [sSSE-3] negates each word element of dest if the signed integer value of the
|
||||
// corresponding data element in src is less than zero. If the signed integer value
|
||||
// of a data element in src is positive, the corresponding data element in dest is
|
||||
// unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const xImplSimd_DestRegEither W;
|
||||
|
||||
// [sSSE-3] negates each doubleword element of dest if the signed integer value
|
||||
// of the corresponding data element in src is less than zero. If the signed integer
|
||||
// value of a data element in src is positive, the corresponding data element in dest
|
||||
// is unchanged. If a data element in src is zero, the corresponding data element in
|
||||
// dest is set to zero.
|
||||
const xImplSimd_DestRegEither D;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed Multiply and Add!!
|
||||
//
|
||||
struct xImplSimd_PMultAdd
|
||||
{
|
||||
// Multiplies the individual signed words of dest by the corresponding signed words
|
||||
// of src, producing temporary signed, doubleword results. The adjacent doubleword
|
||||
// results are then summed and stored in the destination operand.
|
||||
//
|
||||
// DEST[31:0] = ( DEST[15:0] * SRC[15:0]) + (DEST[31:16] * SRC[31:16] );
|
||||
// DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] );
|
||||
// [.. repeat in the case of XMM src/dest operands ..]
|
||||
//
|
||||
const xImplSimd_DestRegEither WD;
|
||||
|
||||
// [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding
|
||||
// signed byte of src, producing intermediate signed 16-bit integers. Each adjacent
|
||||
// pair of signed words is added and the saturated result is packed to dest.
|
||||
// For example, the lowest-order bytes (bits 7-0) in src and dest are multiplied
|
||||
// and the intermediate signed word result is added with the corresponding
|
||||
// intermediate result from the 2nd lowest-order bytes (bits 15-8) of the operands;
|
||||
// the sign-saturated result is stored in the lowest word of dest (bits 15-0).
|
||||
// The same operation is performed on the other pairs of adjacent bytes.
|
||||
//
|
||||
// In Coder Speak:
|
||||
// DEST[15-0] = SaturateToSignedWord( SRC[15-8] * DEST[15-8] + SRC[7-0] * DEST[7-0] );
|
||||
// DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] );
|
||||
// [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..]
|
||||
//
|
||||
const xImplSimd_DestRegEither UBSW;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Packed Horizontal Add [SSE3 only]
|
||||
//
|
||||
struct xImplSimd_HorizAdd
|
||||
{
|
||||
// [SSE-3] Horizontal Add of Packed Data. A three step process:
|
||||
// * Adds the single-precision floating-point values in the first and second dwords of
|
||||
// dest and stores the result in the first dword of dest.
|
||||
// * Adds single-precision floating-point values in the third and fourth dword of dest
|
||||
// stores the result in the second dword of dest.
|
||||
// * Adds single-precision floating-point values in the first and second dword of *src*
|
||||
// and stores the result in the third dword of dest.
|
||||
const xImplSimd_DestRegSSE PS;
|
||||
|
||||
// [SSE-3] Horizontal Add of Packed Data. A two step process:
|
||||
// * Adds the double-precision floating-point values in the high and low quadwords of
|
||||
// dest and stores the result in the low quadword of dest.
|
||||
// * Adds the double-precision floating-point values in the high and low quadwords of
|
||||
// *src* stores the result in the high quadword of dest.
|
||||
const xImplSimd_DestRegSSE PD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// DotProduct calculation (SSE4.1 only!)
|
||||
//
|
||||
struct xImplSimd_DotProduct
|
||||
{
|
||||
// [SSE-4.1] Conditionally multiplies the packed single precision floating-point
|
||||
// values in dest with the packed single-precision floats in src depending on a
|
||||
// mask extracted from the high 4 bits of the immediate byte. If a condition mask
|
||||
// bit in Imm8[7:4] is zero, the corresponding multiplication is replaced by a value
|
||||
// of 0.0. The four resulting single-precision values are summed into an inter-
|
||||
// mediate result.
|
||||
//
|
||||
// The intermediate result is conditionally broadcasted to the destination using a
|
||||
// broadcast mask specified by bits [3:0] of the immediate byte. If a broadcast
|
||||
// mask bit is 1, the intermediate result is copied to the corresponding dword
|
||||
// element in dest. If a broadcast mask bit is zero, the corresponding element in
|
||||
// the destination is set to zero.
|
||||
//
|
||||
xImplSimd_DestRegImmSSE PS;
|
||||
|
||||
// [SSE-4.1]
|
||||
xImplSimd_DestRegImmSSE PD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Rounds floating point values (packed or single scalar) by an arbitrary rounding mode.
|
||||
// (SSE4.1 only!)
|
||||
struct xImplSimd_Round
|
||||
{
|
||||
// [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
|
||||
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
|
||||
// Bits 1:0 - Specifies a rounding mode for this instruction only.
|
||||
//
|
||||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const xImplSimd_DestRegImmSSE PS;
|
||||
|
||||
// [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
|
||||
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
|
||||
// Bits 1:0 - Specifies a rounding mode for this instruction only.
|
||||
//
|
||||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const xImplSimd_DestRegImmSSE PD;
|
||||
|
||||
// [SSE-4.1] Rounds the single-precision src value and stores in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
|
||||
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
|
||||
// Bits 1:0 - Specifies a rounding mode for this instruction only.
|
||||
//
|
||||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const xImplSimd_DestRegImmSSE SS;
|
||||
|
||||
// [SSE-4.1] Rounds the double-precision src value and stores in dest.
|
||||
//
|
||||
// Imm8 specifies control fields for the rounding operation:
|
||||
// Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
|
||||
// Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
|
||||
// Bits 1:0 - Specifies a rounding mode for this instruction only.
|
||||
//
|
||||
// Rounding Mode Reference:
|
||||
// 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
|
||||
//
|
||||
const xImplSimd_DestRegImmSSE SD;
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
111
common/emitter/implement/simd_comparisons.h
Normal file
111
common/emitter/implement/simd_comparisons.h
Normal file
@@ -0,0 +1,111 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
struct xImplSimd_MinMax
|
||||
{
|
||||
const xImplSimd_DestRegSSE PS; // packed single precision
|
||||
const xImplSimd_DestRegSSE PD; // packed double precision
|
||||
const xImplSimd_DestRegSSE SS; // scalar single precision
|
||||
const xImplSimd_DestRegSSE SD; // scalar double precision
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
struct xImplSimd_Compare
|
||||
{
|
||||
SSE2_ComparisonType CType;
|
||||
|
||||
void PS(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void PS(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
|
||||
void PD(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void PD(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
|
||||
void SS(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void SS(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
|
||||
void SD(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void SD(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Compare scalar floating point values and set EFLAGS (Ordered or Unordered)
|
||||
//
|
||||
struct xImplSimd_COMI
|
||||
{
|
||||
const xImplSimd_DestRegSSE SS;
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
struct xImplSimd_PCompare
|
||||
{
|
||||
public:
|
||||
// Compare packed bytes for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither EQB;
|
||||
|
||||
// Compare packed words for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither EQW;
|
||||
|
||||
// Compare packed doublewords [32-bits] for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither EQD;
|
||||
|
||||
// Compare packed signed bytes for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither GTB;
|
||||
|
||||
// Compare packed signed words for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither GTW;
|
||||
|
||||
// Compare packed signed doublewords [32-bits] for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplSimd_DestRegEither GTD;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
struct xImplSimd_PMinMax
|
||||
{
|
||||
// Compare packed unsigned byte integers in dest to src and store packed min/max
|
||||
// values in dest.
|
||||
const xImplSimd_DestRegEither UB;
|
||||
|
||||
// Compare packed signed word integers in dest to src and store packed min/max
|
||||
// values in dest.
|
||||
const xImplSimd_DestRegEither SW;
|
||||
|
||||
// [SSE-4.1] Compare packed signed byte integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const xImplSimd_DestRegSSE SB;
|
||||
|
||||
// [SSE-4.1] Compare packed signed doubleword integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const xImplSimd_DestRegSSE SD;
|
||||
|
||||
// [SSE-4.1] Compare packed unsigned word integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const xImplSimd_DestRegSSE UW;
|
||||
|
||||
// [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store
|
||||
// packed min/max values in dest. (SSE operands only)
|
||||
const xImplSimd_DestRegSSE UD;
|
||||
};
|
||||
|
||||
} // end namespace x86Emitter
|
||||
61
common/emitter/implement/simd_helpers.h
Normal file
61
common/emitter/implement/simd_helpers.h
Normal file
@@ -0,0 +1,61 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// =====================================================================================================
|
||||
// xImpl_SIMD Types (template free!)
|
||||
// =====================================================================================================
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
|
||||
// like ANDPS/ANDPD
|
||||
//
|
||||
struct xImplSimd_DestRegSSE
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only
|
||||
// (PSHUFD / PSHUFHW / etc).
|
||||
//
|
||||
struct xImplSimd_DestRegImmSSE
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm) const;
|
||||
};
|
||||
|
||||
struct xImplSimd_DestSSE_CmpImm
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType imm) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from, SSE2_ComparisonType imm) const;
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE operations that have reg,reg/rm forms only,
|
||||
// but accept either MM or XMM destinations (most PADD/PSUB and other P arithmetic ops).
|
||||
//
|
||||
struct xImplSimd_DestRegEither
|
||||
{
|
||||
u8 Prefix;
|
||||
u16 Opcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
};
|
||||
|
||||
} // end namespace x86Emitter
|
||||
167
common/emitter/implement/simd_moremovs.h
Normal file
167
common/emitter/implement/simd_moremovs.h
Normal file
@@ -0,0 +1,167 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_MovHL
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Moves to/from high/low portions of an xmm register.
|
||||
// These instructions cannot be used in reg/reg form.
|
||||
//
|
||||
struct xImplSimd_MovHL
|
||||
{
|
||||
u16 Opcode;
|
||||
|
||||
void PS(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void PS(const xIndirectVoid& to, const xRegisterSSE& from) const;
|
||||
|
||||
void PD(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void PD(const xIndirectVoid& to, const xRegisterSSE& from) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_MovHL_RtoR
|
||||
// --------------------------------------------------------------------------------------
|
||||
// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but
|
||||
// do something kinda different! Fun!
|
||||
//
|
||||
struct xImplSimd_MovHL_RtoR
|
||||
{
|
||||
u16 Opcode;
|
||||
|
||||
void PS(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void PD(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_MoveSSE
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD
|
||||
//
|
||||
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||
// which can be checked for alignment at runtime.
|
||||
//
|
||||
struct xImplSimd_MoveSSE
|
||||
{
|
||||
u8 Prefix;
|
||||
bool isAligned;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xIndirectVoid& to, const xRegisterSSE& from) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_MoveDQ
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Implementations for MOVDQA / MOVDQU
|
||||
//
|
||||
// All implementations of Unaligned Movs will, when possible, use aligned movs instead.
|
||||
// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement
|
||||
// which can be checked for alignment at runtime.
|
||||
|
||||
struct xImplSimd_MoveDQ
|
||||
{
|
||||
u8 Prefix;
|
||||
bool isAligned;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xIndirectVoid& to, const xRegisterSSE& from) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_Blend
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Blend - Conditional copying of values in src into dest.
|
||||
//
|
||||
struct xImplSimd_Blend
|
||||
{
|
||||
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
||||
// mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a
|
||||
// dword element in a 128-bit operand.
|
||||
//
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
xImplSimd_DestRegImmSSE PS;
|
||||
|
||||
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
||||
// mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a
|
||||
// quadword element in a 128-bit operand.
|
||||
//
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
xImplSimd_DestRegImmSSE PD;
|
||||
|
||||
// [SSE-4.1] Conditionally copies dword values from src to dest, depending on the
|
||||
// mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
||||
// to a dword element in the 128-bit operand.
|
||||
//
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
xImplSimd_DestRegSSE VPS;
|
||||
|
||||
// [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the
|
||||
// mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds
|
||||
// to a quadword element in the 128-bit operand.
|
||||
//
|
||||
// If a mask bit is 1, then the corresponding dword in the source operand is copied
|
||||
// to dest, else the dword element in dest is left unchanged.
|
||||
//
|
||||
xImplSimd_DestRegSSE VPD;
|
||||
};
|
||||
|
||||
struct xImplSimd_PBlend
|
||||
{
|
||||
xImplSimd_DestRegImmSSE W;
|
||||
xImplSimd_DestRegSSE VB;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_PMove
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Packed Move with Sign or Zero extension.
|
||||
//
|
||||
struct xImplSimd_PMove
|
||||
{
|
||||
u16 OpcodeBase;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers
|
||||
// and store them in dest.
|
||||
void BW(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void BW(const xRegisterSSE& to, const xIndirect64& from) const;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers
|
||||
// and store them in dest.
|
||||
void BD(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void BD(const xRegisterSSE& to, const xIndirect32& from) const;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers
|
||||
// and store them in dest.
|
||||
void BQ(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void BQ(const xRegisterSSE& to, const xIndirect16& from) const;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers
|
||||
// and store them in dest.
|
||||
void WD(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void WD(const xRegisterSSE& to, const xIndirect64& from) const;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers
|
||||
// and store them in dest.
|
||||
void WQ(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void WQ(const xRegisterSSE& to, const xIndirect32& from) const;
|
||||
|
||||
// [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers
|
||||
// and store them in dest.
|
||||
void DQ(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void DQ(const xRegisterSSE& to, const xIndirect64& from) const;
|
||||
};
|
||||
} // namespace x86Emitter
|
||||
226
common/emitter/implement/simd_shufflepack.h
Normal file
226
common/emitter/implement/simd_shufflepack.h
Normal file
@@ -0,0 +1,226 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_Shuffle
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct xImplSimd_Shuffle
|
||||
{
|
||||
inline void _selector_assertion_check(u8 selector) const;
|
||||
|
||||
void PS(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const;
|
||||
void PS(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const;
|
||||
|
||||
void PD(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const;
|
||||
void PD(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImplSimd_PShuffle
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct xImplSimd_PShuffle
|
||||
{
|
||||
// Copies doublewords from src and inserts them into dest at dword locations selected
|
||||
// with the order operand (8 bit immediate).
|
||||
const xImplSimd_DestRegImmSSE D;
|
||||
|
||||
// Copies words from the low quadword of src and inserts them into the low quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The high quadword of src is copied to the high quadword of dest.
|
||||
const xImplSimd_DestRegImmSSE LW;
|
||||
|
||||
// Copies words from the high quadword of src and inserts them into the high quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The low quadword of src is copied to the low quadword of dest.
|
||||
const xImplSimd_DestRegImmSSE HW;
|
||||
|
||||
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
||||
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
||||
// shuffle control mask is set, then constant zero is written in the result byte.
|
||||
// Each byte in the shuffle control mask forms an index to permute the corresponding
|
||||
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
||||
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
||||
//
|
||||
const xImplSimd_DestRegEither B;
|
||||
|
||||
// below is my test bed for a new system, free of subclasses. Was supposed to improve intellisense
|
||||
// but it doesn't (makes it worse). Will try again in MSVC 2010. --air
|
||||
|
||||
#if 0
|
||||
// Copies words from src and inserts them into dest at word locations selected with
|
||||
// the order operand (8 bit immediate).
|
||||
|
||||
// Copies doublewords from src and inserts them into dest at dword locations selected
|
||||
// with the order operand (8 bit immediate).
|
||||
void D( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
|
||||
void D( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
|
||||
|
||||
// Copies words from the low quadword of src and inserts them into the low quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The high quadword of src is copied to the high quadword of dest.
|
||||
void LW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
|
||||
void LW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
|
||||
|
||||
// Copies words from the high quadword of src and inserts them into the high quadword
|
||||
// of dest at word locations selected with the order operand (8 bit immediate).
|
||||
// The low quadword of src is copied to the low quadword of dest.
|
||||
void HW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
|
||||
void HW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
|
||||
|
||||
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
||||
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
||||
// shuffle control mask is set, then constant zero is written in the result byte.
|
||||
// Each byte in the shuffle control mask forms an index to permute the corresponding
|
||||
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
||||
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
||||
//
|
||||
void B( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
|
||||
void B( const xRegisterSSE& to, const xIndirectVoid& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
|
||||
#endif
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// SimdImpl_PUnpack
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct SimdImpl_PUnpack
|
||||
{
|
||||
// Unpack and interleave low-order bytes from src and dest into dest.
|
||||
const xImplSimd_DestRegEither LBW;
|
||||
// Unpack and interleave low-order words from src and dest into dest.
|
||||
const xImplSimd_DestRegEither LWD;
|
||||
// Unpack and interleave low-order doublewords from src and dest into dest.
|
||||
const xImplSimd_DestRegEither LDQ;
|
||||
// Unpack and interleave low-order quadwords from src and dest into dest.
|
||||
const xImplSimd_DestRegSSE LQDQ;
|
||||
|
||||
// Unpack and interleave high-order bytes from src and dest into dest.
|
||||
const xImplSimd_DestRegEither HBW;
|
||||
// Unpack and interleave high-order words from src and dest into dest.
|
||||
const xImplSimd_DestRegEither HWD;
|
||||
// Unpack and interleave high-order doublewords from src and dest into dest.
|
||||
const xImplSimd_DestRegEither HDQ;
|
||||
// Unpack and interleave high-order quadwords from src and dest into dest.
|
||||
const xImplSimd_DestRegSSE HQDQ;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// SimdImpl_Pack
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Pack with Signed or Unsigned Saturation
|
||||
//
|
||||
struct SimdImpl_Pack
|
||||
{
|
||||
// Converts packed signed word integers from src and dest into packed signed
|
||||
// byte integers in dest, using signed saturation.
|
||||
const xImplSimd_DestRegEither SSWB;
|
||||
|
||||
// Converts packed signed dword integers from src and dest into packed signed
|
||||
// word integers in dest, using signed saturation.
|
||||
const xImplSimd_DestRegEither SSDW;
|
||||
|
||||
// Converts packed unsigned word integers from src and dest into packed unsigned
|
||||
// byte integers in dest, using unsigned saturation.
|
||||
const xImplSimd_DestRegEither USWB;
|
||||
|
||||
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
|
||||
// unsigned word integers in dest, using signed saturation.
|
||||
const xImplSimd_DestRegSSE USDW;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// SimdImpl_Unpack
|
||||
// --------------------------------------------------------------------------------------
|
||||
struct xImplSimd_Unpack
|
||||
{
|
||||
// Unpacks the high doubleword [single-precision] values from src and dest into
|
||||
// dest, such that the result of dest looks like this:
|
||||
// dest[0] <- dest[2]
|
||||
// dest[1] <- src[2]
|
||||
// dest[2] <- dest[3]
|
||||
// dest[3] <- src[3]
|
||||
//
|
||||
const xImplSimd_DestRegSSE HPS;
|
||||
|
||||
// Unpacks the high quadword [double-precision] values from src and dest into
|
||||
// dest, such that the result of dest looks like this:
|
||||
// dest.lo <- dest.hi
|
||||
// dest.hi <- src.hi
|
||||
//
|
||||
const xImplSimd_DestRegSSE HPD;
|
||||
|
||||
// Unpacks the low doubleword [single-precision] values from src and dest into
|
||||
// dest, such that the result of dest looks like this:
|
||||
// dest[3] <- src[1]
|
||||
// dest[2] <- dest[1]
|
||||
// dest[1] <- src[0]
|
||||
// dest[0] <- dest[0]
|
||||
//
|
||||
const xImplSimd_DestRegSSE LPS;
|
||||
|
||||
// Unpacks the low quadword [double-precision] values from src and dest into
|
||||
// dest, effectively moving the low portion of src into the upper portion of dest.
|
||||
// The result of dest is loaded as such:
|
||||
// dest.hi <- src.lo
|
||||
// dest.lo <- dest.lo [remains unchanged!]
|
||||
//
|
||||
const xImplSimd_DestRegSSE LPD;
|
||||
};
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// SimdImpl_PInsert
|
||||
// --------------------------------------------------------------------------------------
|
||||
// PINSRW/B/D [all but Word form are SSE4.1 only!]
|
||||
//
|
||||
struct xImplSimd_PInsert
|
||||
{
|
||||
void B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void B(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
||||
|
||||
void W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void W(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
||||
|
||||
void D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void D(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
||||
|
||||
void Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const;
|
||||
void Q(const xRegisterSSE& to, const xIndirect64& from, u8 imm8) const;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PEXTRW/B/D [all but Word form are SSE4.1 only!]
|
||||
//
|
||||
// Note: Word form's indirect memory form is only available in SSE4.1.
|
||||
//
|
||||
struct SimdImpl_PExtract
|
||||
{
|
||||
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||
// byte value from src into an x86 32 bit register.
|
||||
void B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void B(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
|
||||
// Copies the word element specified by imm8 from src to dest. The upper bits
|
||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||
// word value from src into an x86 32 bit register.
|
||||
//
|
||||
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
|
||||
//
|
||||
void W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void W(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
|
||||
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
|
||||
// used to extract any single packed dword value from src into an x86 32 bit register.
|
||||
void D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void D(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
|
||||
// Insert a qword integer value from r/m64 into the xmm1 at the destination element specified by imm8.
|
||||
void Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void Q(const xIndirect64& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
};
|
||||
} // namespace x86Emitter
|
||||
26
common/emitter/implement/simd_templated_helpers.h
Normal file
26
common/emitter/implement/simd_templated_helpers.h
Normal file
@@ -0,0 +1,26 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MMX / SSE Helper Functions!
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only,
|
||||
// like ANDPS/ANDPD
|
||||
//
|
||||
template <u8 Prefix, u16 Opcode>
|
||||
class SimdImpl_DestRegSSE
|
||||
{
|
||||
public:
|
||||
__forceinline void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(Prefix, Opcode, to, from); }
|
||||
__forceinline void operator()(const xRegisterSSE& to, const ModSibBase& from) const
|
||||
{
|
||||
bool isReallyAligned = ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty();
|
||||
pxAssertMsg(isReallyAligned, "Alignment check failed on SSE indirect load.");
|
||||
xOpWrite0F(Prefix, Opcode, to, from);
|
||||
}
|
||||
|
||||
SimdImpl_DestRegSSE() {} //GCWho?
|
||||
};
|
||||
63
common/emitter/implement/test.h
Normal file
63
common/emitter/implement/test.h
Normal file
@@ -0,0 +1,63 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
// Implementations found here: TEST + BTS/BT/BTC/BTR + BSF/BSR! (for lack of better location)
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_Test
|
||||
// --------------------------------------------------------------------------------------
|
||||
//
|
||||
struct xImpl_Test
|
||||
{
|
||||
void operator()(const xRegisterInt& to, const xRegisterInt& from) const;
|
||||
void operator()(const xIndirect64orLess& dest, int imm) const;
|
||||
void operator()(const xRegisterInt& to, int imm) const;
|
||||
};
|
||||
|
||||
enum G8Type
|
||||
{
|
||||
G8Type_BT = 4,
|
||||
G8Type_BTS,
|
||||
G8Type_BTR,
|
||||
G8Type_BTC,
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// BSF / BSR
|
||||
// --------------------------------------------------------------------------------------
|
||||
// 16/32 operands are available. No 8 bit ones, not that any of you cared, I bet.
|
||||
//
|
||||
struct xImpl_BitScan
|
||||
{
|
||||
// 0xbc [fwd] / 0xbd [rev]
|
||||
u16 Opcode;
|
||||
|
||||
void operator()(const xRegister16or32or64& to, const xRegister16or32or64& from) const;
|
||||
void operator()(const xRegister16or32or64& to, const xIndirectVoid& sibsrc) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xImpl_Group8
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Bit Test Instructions - Valid on 16/32 bit instructions only.
|
||||
//
|
||||
struct xImpl_Group8
|
||||
{
|
||||
G8Type InstType;
|
||||
|
||||
void operator()(const xRegister16or32or64& bitbase, const xRegister16or32or64& bitoffset) const;
|
||||
void operator()(const xRegister16or32or64& bitbase, u8 bitoffset) const;
|
||||
|
||||
void operator()(const xIndirectVoid& bitbase, const xRegister16or32or64& bitoffset) const;
|
||||
|
||||
void operator()(const xIndirect64& bitbase, u8 bitoffset) const;
|
||||
void operator()(const xIndirect32& bitbase, u8 bitoffset) const;
|
||||
void operator()(const xIndirect16& bitbase, u8 bitoffset) const;
|
||||
};
|
||||
|
||||
} // End namespace x86Emitter
|
||||
13
common/emitter/implement/xchg.h
Normal file
13
common/emitter/implement/xchg.h
Normal file
@@ -0,0 +1,13 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
// This header file is intended to be the future home of xchg, cmpxchg, xadd, and
|
||||
// other threading-related exchange instructions.
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
|
||||
} // End namespace x86Emitter
|
||||
635
common/emitter/instructions.h
Normal file
635
common/emitter/instructions.h
Normal file
@@ -0,0 +1,635 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
/*
|
||||
* ix86 definitions v0.9.1
|
||||
*
|
||||
* Original Authors (v0.6.2 and prior):
|
||||
* linuzappz <linuzappz@pcsx.net>
|
||||
* alexey silinov
|
||||
* goldfinger
|
||||
* zerofrog(@gmail.com)
|
||||
*
|
||||
* Authors of v0.9.1:
|
||||
* Jake.Stine(@gmail.com)
|
||||
* cottonvibes(@gmail.com)
|
||||
* sudonim(1@gmail.com)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
extern void xStoreReg(const xRegisterSSE& src);
|
||||
extern void xRestoreReg(const xRegisterSSE& dest);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Group 1 Instruction Class
|
||||
|
||||
extern const xImpl_Group1 xADC;
|
||||
extern const xImpl_Group1 xSBB;
|
||||
|
||||
extern const xImpl_G1Logic xAND;
|
||||
extern const xImpl_G1Logic xOR;
|
||||
extern const xImpl_G1Logic xXOR;
|
||||
|
||||
extern const xImpl_G1Arith xADD;
|
||||
extern const xImpl_G1Arith xSUB;
|
||||
extern const xImpl_G1Compare xCMP;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Group 2 Instruction Class
|
||||
//
|
||||
// Optimization Note: For Imm forms, we ignore the instruction if the shift count is
|
||||
// zero. This is a safe optimization since any zero-value shift does not affect any
|
||||
// flags.
|
||||
|
||||
extern const xImpl_Mov xMOV;
|
||||
extern const xImpl_MovImm64 xMOV64;
|
||||
extern const xImpl_Test xTEST;
|
||||
extern const xImpl_Group2 xROL, xROR,
|
||||
xRCL, xRCR,
|
||||
xSHL, xSHR,
|
||||
xSAR;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Group 3 Instruction Class
|
||||
|
||||
extern const xImpl_Group3 xNOT, xNEG;
|
||||
extern const xImpl_Group3 xUMUL, xUDIV;
|
||||
extern const xImpl_iDiv xDIV;
|
||||
extern const xImpl_iMul xMUL;
|
||||
|
||||
extern const xImpl_IncDec xINC, xDEC;
|
||||
|
||||
extern const xImpl_MovExtend xMOVZX, xMOVSX;
|
||||
|
||||
extern const xImpl_DwordShift xSHLD, xSHRD;
|
||||
|
||||
extern const xImpl_Group8 xBT;
|
||||
extern const xImpl_Group8 xBTR;
|
||||
extern const xImpl_Group8 xBTS;
|
||||
extern const xImpl_Group8 xBTC;
|
||||
|
||||
extern const xImpl_BitScan xBSF, xBSR;
|
||||
|
||||
extern const xImpl_JmpCall xJMP;
|
||||
extern const xImpl_JmpCall xCALL;
|
||||
extern const xImpl_FastCall xFastCall;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
extern const xImpl_CMov xCMOVA, xCMOVAE,
|
||||
xCMOVB, xCMOVBE,
|
||||
xCMOVG, xCMOVGE,
|
||||
xCMOVL, xCMOVLE,
|
||||
|
||||
xCMOVZ, xCMOVE,
|
||||
xCMOVNZ, xCMOVNE,
|
||||
xCMOVO, xCMOVNO,
|
||||
xCMOVC, xCMOVNC,
|
||||
|
||||
xCMOVS, xCMOVNS,
|
||||
xCMOVPE, xCMOVPO;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
extern const xImpl_Set xSETA, xSETAE,
|
||||
xSETB, xSETBE,
|
||||
xSETG, xSETGE,
|
||||
xSETL, xSETLE,
|
||||
|
||||
xSETZ, xSETE,
|
||||
xSETNZ, xSETNE,
|
||||
xSETO, xSETNO,
|
||||
xSETC, xSETNC,
|
||||
|
||||
xSETS, xSETNS,
|
||||
xSETPE, xSETPO;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// BMI extra instruction requires BMI1/BMI2
|
||||
extern const xImplBMI_RVM xMULX, xPDEP, xPEXT, xANDN_S; // Warning xANDN is already used by SSE
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Miscellaneous Instructions
|
||||
// These are all defined inline or in ix86.cpp.
|
||||
//
|
||||
|
||||
extern void xBSWAP(const xRegister32or64& to);
|
||||
|
||||
// ----- Lea Instructions (Load Effective Address) -----
|
||||
// Note: alternate (void*) forms of these instructions are not provided since those
|
||||
// forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs
|
||||
// instead.
|
||||
|
||||
extern void xLEA(xRegister64 to, const xIndirectVoid& src, bool preserve_flags = false);
|
||||
extern void xLEA(xRegister32 to, const xIndirectVoid& src, bool preserve_flags = false);
|
||||
extern void xLEA(xRegister16 to, const xIndirectVoid& src, bool preserve_flags = false);
|
||||
/// LEA with a target that will be decided later, guarantees that no optimizations are performed that could change what needs to be written in
|
||||
extern u32* xLEA_Writeback(xAddressReg to);
|
||||
|
||||
// ----- Push / Pop Instructions -----
|
||||
// Note: pushad/popad implementations are intentionally left out. The instructions are
|
||||
// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead.
|
||||
|
||||
extern void xPOP(const xIndirectVoid& from);
|
||||
extern void xPUSH(const xIndirectVoid& from);
|
||||
|
||||
extern void xPOP(xRegister32or64 from);
|
||||
|
||||
extern void xPUSH(u32 imm);
|
||||
extern void xPUSH(xRegister32or64 from);
|
||||
|
||||
// pushes the EFLAGS register onto the stack
|
||||
extern void xPUSHFD();
|
||||
// pops the EFLAGS register from the stack
|
||||
extern void xPOPFD();
|
||||
|
||||
// ----- Miscellaneous Instructions -----
|
||||
// Various Instructions with no parameter and no special encoding logic.
|
||||
|
||||
extern void xLEAVE();
|
||||
extern void xRET();
|
||||
extern void xCBW();
|
||||
extern void xCWD();
|
||||
extern void xCDQ();
|
||||
extern void xCWDE();
|
||||
extern void xCDQE();
|
||||
|
||||
extern void xLAHF();
|
||||
extern void xSAHF();
|
||||
|
||||
extern void xSTC();
|
||||
extern void xCLC();
|
||||
|
||||
// NOP 1-byte
|
||||
extern void xNOP();
|
||||
|
||||
extern void xINT(u8 imm);
|
||||
extern void xINTO();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper object to handle the various functions ABI
|
||||
class xScopedStackFrame
|
||||
{
|
||||
bool m_base_frame;
|
||||
bool m_save_base_pointer;
|
||||
int m_offset;
|
||||
|
||||
public:
|
||||
xScopedStackFrame(bool base_frame, bool save_base_pointer = false, int offset = 0);
|
||||
~xScopedStackFrame();
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Helper object to save some temporary registers before the call
|
||||
class xScopedSavedRegisters
|
||||
{
|
||||
std::vector<std::reference_wrapper<const xAddressReg>> regs;
|
||||
|
||||
public:
|
||||
xScopedSavedRegisters(std::initializer_list<std::reference_wrapper<const xAddressReg>> regs);
|
||||
~xScopedSavedRegisters();
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Helper function to calculate base+offset taking into account the limitations of x86-64's RIP-relative addressing
|
||||
/// (Will either return `base+offset` or LEA `base` into `tmpRegister` and return `tmpRegister+offset`)
|
||||
xAddressVoid xComplexAddress(const xAddressReg& tmpRegister, void* base, const xAddressVoid& offset);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Helper function to load addresses that may be far from the current instruction pointer
|
||||
/// On i386, resolves to `mov dst, (sptr)addr`
|
||||
/// On x86-64, resolves to either `mov dst, (sptr)addr` or `lea dst, [addr]` depending on the distance from RIP
|
||||
void xLoadFarAddr(const xAddressReg& dst, void* addr);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Helper function to write a 64-bit constant to memory
|
||||
/// May use `tmp` on x86-64
|
||||
void xWriteImm64ToMem(u64* addr, const xAddressReg& tmp, u64 imm);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// Helper function to run operations with large immediates
|
||||
/// If the immediate fits in 32 bits, runs op(target, imm)
|
||||
/// Otherwise, loads imm into tmpRegister and then runs op(dst, tmp)
|
||||
template <typename Op, typename Dst>
|
||||
void xImm64Op(const Op& op, const Dst& dst, const xRegister64& tmpRegister, s64 imm)
|
||||
{
|
||||
if (imm == (s32)imm)
|
||||
{
|
||||
op(dst, imm);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV64(tmpRegister, imm);
|
||||
op(dst, tmpRegister);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// JMP / Jcc Instructions!
|
||||
|
||||
extern void xJcc(JccComparisonType comparison, const void* target);
|
||||
extern s8* xJcc8(JccComparisonType comparison = Jcc_Unconditional, s8 displacement = 0);
|
||||
extern s32* xJcc32(JccComparisonType comparison = Jcc_Unconditional, s32 displacement = 0);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Conditional jumps to fixed targets.
|
||||
// Jumps accept any pointer as a valid target (function or data), and will generate either
|
||||
// 8 or 32 bit displacement versions of the jump, depending on relative displacement of
|
||||
// the target (efficient!)
|
||||
//
|
||||
|
||||
template <typename T>
|
||||
__fi void xJE(T* func)
|
||||
{
|
||||
xJcc(Jcc_Equal, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJZ(T* func)
|
||||
{
|
||||
xJcc(Jcc_Zero, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJNE(T* func)
|
||||
{
|
||||
xJcc(Jcc_NotEqual, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJNZ(T* func)
|
||||
{
|
||||
xJcc(Jcc_NotZero, (void*)(uptr)func);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__fi void xJO(T* func)
|
||||
{
|
||||
xJcc(Jcc_Overflow, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJNO(T* func)
|
||||
{
|
||||
xJcc(Jcc_NotOverflow, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJC(T* func)
|
||||
{
|
||||
xJcc(Jcc_Carry, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJNC(T* func)
|
||||
{
|
||||
xJcc(Jcc_NotCarry, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJS(T* func)
|
||||
{
|
||||
xJcc(Jcc_Signed, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJNS(T* func)
|
||||
{
|
||||
xJcc(Jcc_Unsigned, (void*)(uptr)func);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__fi void xJPE(T* func)
|
||||
{
|
||||
xJcc(Jcc_ParityEven, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJPO(T* func)
|
||||
{
|
||||
xJcc(Jcc_ParityOdd, (void*)(uptr)func);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__fi void xJL(T* func)
|
||||
{
|
||||
xJcc(Jcc_Less, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJLE(T* func)
|
||||
{
|
||||
xJcc(Jcc_LessOrEqual, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJG(T* func)
|
||||
{
|
||||
xJcc(Jcc_Greater, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJGE(T* func)
|
||||
{
|
||||
xJcc(Jcc_GreaterOrEqual, (void*)(uptr)func);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__fi void xJB(T* func)
|
||||
{
|
||||
xJcc(Jcc_Below, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJBE(T* func)
|
||||
{
|
||||
xJcc(Jcc_BelowOrEqual, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJA(T* func)
|
||||
{
|
||||
xJcc(Jcc_Above, (void*)(uptr)func);
|
||||
}
|
||||
template <typename T>
|
||||
__fi void xJAE(T* func)
|
||||
{
|
||||
xJcc(Jcc_AboveOrEqual, (void*)(uptr)func);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Forward Jump Helpers (act as labels!)
|
||||
|
||||
#define DEFINE_FORWARD_JUMP(label, cond) \
|
||||
template <typename OperandType> \
|
||||
class xForward##label : public xForwardJump<OperandType> \
|
||||
{ \
|
||||
public: \
|
||||
xForward##label() \
|
||||
: xForwardJump<OperandType>(cond) \
|
||||
{ \
|
||||
} \
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Note: typedefs below are defined individually in order to appease Intellisense
|
||||
// resolution. Including them into the class definition macro above breaks it.
|
||||
|
||||
typedef xForwardJump<s8> xForwardJump8;
|
||||
typedef xForwardJump<s32> xForwardJump32;
|
||||
|
||||
DEFINE_FORWARD_JUMP(JA, Jcc_Above);
|
||||
DEFINE_FORWARD_JUMP(JB, Jcc_Below);
|
||||
DEFINE_FORWARD_JUMP(JAE, Jcc_AboveOrEqual);
|
||||
DEFINE_FORWARD_JUMP(JBE, Jcc_BelowOrEqual);
|
||||
|
||||
typedef xForwardJA<s8> xForwardJA8;
|
||||
typedef xForwardJA<s32> xForwardJA32;
|
||||
typedef xForwardJB<s8> xForwardJB8;
|
||||
typedef xForwardJB<s32> xForwardJB32;
|
||||
typedef xForwardJAE<s8> xForwardJAE8;
|
||||
typedef xForwardJAE<s32> xForwardJAE32;
|
||||
typedef xForwardJBE<s8> xForwardJBE8;
|
||||
typedef xForwardJBE<s32> xForwardJBE32;
|
||||
|
||||
DEFINE_FORWARD_JUMP(JG, Jcc_Greater);
|
||||
DEFINE_FORWARD_JUMP(JL, Jcc_Less);
|
||||
DEFINE_FORWARD_JUMP(JGE, Jcc_GreaterOrEqual);
|
||||
DEFINE_FORWARD_JUMP(JLE, Jcc_LessOrEqual);
|
||||
|
||||
typedef xForwardJG<s8> xForwardJG8;
|
||||
typedef xForwardJG<s32> xForwardJG32;
|
||||
typedef xForwardJL<s8> xForwardJL8;
|
||||
typedef xForwardJL<s32> xForwardJL32;
|
||||
typedef xForwardJGE<s8> xForwardJGE8;
|
||||
typedef xForwardJGE<s32> xForwardJGE32;
|
||||
typedef xForwardJLE<s8> xForwardJLE8;
|
||||
typedef xForwardJLE<s32> xForwardJLE32;
|
||||
|
||||
DEFINE_FORWARD_JUMP(JZ, Jcc_Zero);
|
||||
DEFINE_FORWARD_JUMP(JE, Jcc_Equal);
|
||||
DEFINE_FORWARD_JUMP(JNZ, Jcc_NotZero);
|
||||
DEFINE_FORWARD_JUMP(JNE, Jcc_NotEqual);
|
||||
|
||||
typedef xForwardJZ<s8> xForwardJZ8;
|
||||
typedef xForwardJZ<s32> xForwardJZ32;
|
||||
typedef xForwardJE<s8> xForwardJE8;
|
||||
typedef xForwardJE<s32> xForwardJE32;
|
||||
typedef xForwardJNZ<s8> xForwardJNZ8;
|
||||
typedef xForwardJNZ<s32> xForwardJNZ32;
|
||||
typedef xForwardJNE<s8> xForwardJNE8;
|
||||
typedef xForwardJNE<s32> xForwardJNE32;
|
||||
|
||||
DEFINE_FORWARD_JUMP(JS, Jcc_Signed);
|
||||
DEFINE_FORWARD_JUMP(JNS, Jcc_Unsigned);
|
||||
|
||||
typedef xForwardJS<s8> xForwardJS8;
|
||||
typedef xForwardJS<s32> xForwardJS32;
|
||||
typedef xForwardJNS<s8> xForwardJNS8;
|
||||
typedef xForwardJNS<s32> xForwardJNS32;
|
||||
|
||||
DEFINE_FORWARD_JUMP(JO, Jcc_Overflow);
|
||||
DEFINE_FORWARD_JUMP(JNO, Jcc_NotOverflow);
|
||||
|
||||
typedef xForwardJO<s8> xForwardJO8;
|
||||
typedef xForwardJO<s32> xForwardJO32;
|
||||
typedef xForwardJNO<s8> xForwardJNO8;
|
||||
typedef xForwardJNO<s32> xForwardJNO32;
|
||||
|
||||
DEFINE_FORWARD_JUMP(JC, Jcc_Carry);
|
||||
DEFINE_FORWARD_JUMP(JNC, Jcc_NotCarry);
|
||||
|
||||
typedef xForwardJC<s8> xForwardJC8;
|
||||
typedef xForwardJC<s32> xForwardJC32;
|
||||
typedef xForwardJNC<s8> xForwardJNC8;
|
||||
typedef xForwardJNC<s32> xForwardJNC32;
|
||||
|
||||
DEFINE_FORWARD_JUMP(JPE, Jcc_ParityEven);
|
||||
DEFINE_FORWARD_JUMP(JPO, Jcc_ParityOdd);
|
||||
|
||||
typedef xForwardJPE<s8> xForwardJPE8;
|
||||
typedef xForwardJPE<s32> xForwardJPE32;
|
||||
typedef xForwardJPO<s8> xForwardJPO8;
|
||||
typedef xForwardJPO<s32> xForwardJPO32;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern void xEMMS();
|
||||
extern void xSTMXCSR(const xIndirect32& dest);
|
||||
extern void xLDMXCSR(const xIndirect32& src);
|
||||
extern void xFXSAVE(const xIndirectVoid& dest);
|
||||
extern void xFXRSTOR(const xIndirectVoid& src);
|
||||
|
||||
extern void xMOVDZX(const xRegisterSSE& to, const xRegister32or64& from);
|
||||
extern void xMOVDZX(const xRegisterSSE& to, const xIndirectVoid& src);
|
||||
|
||||
extern void xMOVD(const xRegister32or64& to, const xRegisterSSE& from);
|
||||
extern void xMOVD(const xIndirectVoid& dest, const xRegisterSSE& from);
|
||||
|
||||
extern void xMOVQ(const xIndirectVoid& dest, const xRegisterSSE& from);
|
||||
|
||||
extern void xMOVQZX(const xRegisterSSE& to, const xIndirectVoid& src);
|
||||
extern void xMOVQZX(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
|
||||
extern void xMOVSS(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xMOVSS(const xIndirectVoid& to, const xRegisterSSE& from);
|
||||
extern void xMOVSD(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xMOVSD(const xIndirectVoid& to, const xRegisterSSE& from);
|
||||
|
||||
extern void xMOVSSZX(const xRegisterSSE& to, const xIndirectVoid& from);
|
||||
extern void xMOVSDZX(const xRegisterSSE& to, const xIndirectVoid& from);
|
||||
|
||||
extern void xMOVNTDQA(const xRegisterSSE& to, const xIndirectVoid& from);
|
||||
extern void xMOVNTDQA(const xIndirectVoid& to, const xRegisterSSE& from);
|
||||
|
||||
extern void xMOVNTPD(const xIndirectVoid& to, const xRegisterSSE& from);
|
||||
extern void xMOVNTPS(const xIndirectVoid& to, const xRegisterSSE& from);
|
||||
|
||||
extern void xMOVMSKPS(const xRegister32& to, const xRegisterSSE& from);
|
||||
extern void xMOVMSKPD(const xRegister32& to, const xRegisterSSE& from);
|
||||
|
||||
extern void xMASKMOV(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xPMOVMSKB(const xRegister32or64& to, const xRegisterSSE& from);
|
||||
extern void xPALIGNR(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const xImplSimd_MoveSSE xMOVAPS;
|
||||
extern const xImplSimd_MoveSSE xMOVUPS;
|
||||
extern const xImplSimd_MoveSSE xMOVAPD;
|
||||
extern const xImplSimd_MoveSSE xMOVUPD;
|
||||
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
extern const xImplSimd_MoveSSE xMOVDQA;
|
||||
extern const xImplSimd_MoveSSE xMOVDQU;
|
||||
#else
|
||||
extern const xImplSimd_MoveDQ xMOVDQA;
|
||||
extern const xImplSimd_MoveDQ xMOVDQU;
|
||||
#endif
|
||||
|
||||
extern const xImplSimd_MovHL xMOVH;
|
||||
extern const xImplSimd_MovHL xMOVL;
|
||||
extern const xImplSimd_MovHL_RtoR xMOVLH;
|
||||
extern const xImplSimd_MovHL_RtoR xMOVHL;
|
||||
|
||||
extern const xImplSimd_PBlend xPBLEND;
|
||||
extern const xImplSimd_Blend xBLEND;
|
||||
extern const xImplSimd_PMove xPMOVSX;
|
||||
extern const xImplSimd_PMove xPMOVZX;
|
||||
|
||||
extern const xImplSimd_DestRegSSE xMOVSLDUP;
|
||||
extern const xImplSimd_DestRegSSE xMOVSHDUP;
|
||||
|
||||
extern void xINSERTPS(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8);
|
||||
extern void xINSERTPS(const xRegisterSSE& to, const xIndirect32& from, u8 imm8);
|
||||
|
||||
extern void xEXTRACTPS(const xRegister32or64& to, const xRegisterSSE& from, u8 imm8);
|
||||
extern void xEXTRACTPS(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const xImplSimd_DestRegEither xPAND;
|
||||
extern const xImplSimd_DestRegEither xPANDN;
|
||||
extern const xImplSimd_DestRegEither xPOR;
|
||||
extern const xImplSimd_DestRegEither xPXOR;
|
||||
|
||||
extern const xImplSimd_Shuffle xSHUF;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const xImplSimd_DestRegSSE xPTEST;
|
||||
|
||||
extern const xImplSimd_MinMax xMIN;
|
||||
extern const xImplSimd_MinMax xMAX;
|
||||
|
||||
extern const xImplSimd_Compare xCMPEQ, xCMPLT,
|
||||
xCMPLE, xCMPUNORD,
|
||||
xCMPNE, xCMPNLT,
|
||||
xCMPNLE, xCMPORD;
|
||||
|
||||
extern const xImplSimd_COMI xCOMI;
|
||||
extern const xImplSimd_COMI xUCOMI;
|
||||
|
||||
extern const xImplSimd_PCompare xPCMP;
|
||||
extern const xImplSimd_PMinMax xPMIN;
|
||||
extern const xImplSimd_PMinMax xPMAX;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
extern void xCVTDQ2PD(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTDQ2PD(const xRegisterSSE& to, const xIndirect64& from);
|
||||
extern void xCVTDQ2PS(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTDQ2PS(const xRegisterSSE& to, const xIndirect128& from);
|
||||
|
||||
extern void xCVTPD2DQ(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTPD2DQ(const xRegisterSSE& to, const xIndirect128& from);
|
||||
extern void xCVTPD2PS(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTPD2PS(const xRegisterSSE& to, const xIndirect128& from);
|
||||
|
||||
extern void xCVTPI2PD(const xRegisterSSE& to, const xIndirect64& from);
|
||||
extern void xCVTPI2PS(const xRegisterSSE& to, const xIndirect64& from);
|
||||
|
||||
extern void xCVTPS2DQ(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTPS2DQ(const xRegisterSSE& to, const xIndirect128& from);
|
||||
extern void xCVTPS2PD(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTPS2PD(const xRegisterSSE& to, const xIndirect64& from);
|
||||
|
||||
extern void xCVTSD2SI(const xRegister32or64& to, const xRegisterSSE& from);
|
||||
extern void xCVTSD2SI(const xRegister32or64& to, const xIndirect64& from);
|
||||
extern void xCVTSD2SS(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTSD2SS(const xRegisterSSE& to, const xIndirect64& from);
|
||||
extern void xCVTSI2SS(const xRegisterSSE& to, const xRegister32or64& from);
|
||||
extern void xCVTSI2SS(const xRegisterSSE& to, const xIndirect32& from);
|
||||
|
||||
extern void xCVTSS2SD(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTSS2SD(const xRegisterSSE& to, const xIndirect32& from);
|
||||
extern void xCVTSS2SI(const xRegister32or64& to, const xRegisterSSE& from);
|
||||
extern void xCVTSS2SI(const xRegister32or64& to, const xIndirect32& from);
|
||||
|
||||
extern void xCVTTPD2DQ(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTTPD2DQ(const xRegisterSSE& to, const xIndirect128& from);
|
||||
extern void xCVTTPS2DQ(const xRegisterSSE& to, const xRegisterSSE& from);
|
||||
extern void xCVTTPS2DQ(const xRegisterSSE& to, const xIndirect128& from);
|
||||
|
||||
extern void xCVTTSD2SI(const xRegister32or64& to, const xRegisterSSE& from);
|
||||
extern void xCVTTSD2SI(const xRegister32or64& to, const xIndirect64& from);
|
||||
extern void xCVTTSS2SI(const xRegister32or64& to, const xRegisterSSE& from);
|
||||
extern void xCVTTSS2SI(const xRegister32or64& to, const xIndirect32& from);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const xImplSimd_AndNot xANDN;
|
||||
extern const xImplSimd_rSqrt xRCP;
|
||||
extern const xImplSimd_rSqrt xRSQRT;
|
||||
extern const xImplSimd_Sqrt xSQRT;
|
||||
|
||||
extern const xImplSimd_Shift xPSLL;
|
||||
extern const xImplSimd_Shift xPSRL;
|
||||
extern const xImplSimd_ShiftWithoutQ xPSRA;
|
||||
extern const xImplSimd_AddSub xPADD;
|
||||
extern const xImplSimd_AddSub xPSUB;
|
||||
extern const xImplSimd_PMul xPMUL;
|
||||
extern const xImplSimd_PAbsolute xPABS;
|
||||
extern const xImplSimd_PSign xPSIGN;
|
||||
extern const xImplSimd_PMultAdd xPMADD;
|
||||
extern const xImplSimd_HorizAdd xHADD;
|
||||
extern const xImplSimd_DotProduct xDP;
|
||||
extern const xImplSimd_Round xROUND;
|
||||
|
||||
extern const xImplSimd_PShuffle xPSHUF;
|
||||
extern const SimdImpl_PUnpack xPUNPCK;
|
||||
extern const xImplSimd_Unpack xUNPCK;
|
||||
extern const SimdImpl_Pack xPACK;
|
||||
extern const xImplSimd_PInsert xPINSR;
|
||||
extern const SimdImpl_PExtract xPEXTR;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const xImplAVX_Move xVMOVAPS;
|
||||
extern const xImplAVX_Move xVMOVUPS;
|
||||
extern const xImplAVX_ArithFloat xVADD;
|
||||
extern const xImplAVX_ArithFloat xVSUB;
|
||||
extern const xImplAVX_ArithFloat xVMUL;
|
||||
extern const xImplAVX_ArithFloat xVDIV;
|
||||
extern const xImplAVX_CmpFloat xVCMP;
|
||||
extern const xImplAVX_ThreeArgYMM xVPAND;
|
||||
extern const xImplAVX_ThreeArgYMM xVPANDN;
|
||||
extern const xImplAVX_ThreeArgYMM xVPOR;
|
||||
extern const xImplAVX_ThreeArgYMM xVPXOR;
|
||||
extern const xImplAVX_CmpInt xVPCMP;
|
||||
|
||||
extern void xVPMOVMSKB(const xRegister32& to, const xRegisterSSE& from);
|
||||
extern void xVMOVMSKPS(const xRegister32& to, const xRegisterSSE& from);
|
||||
extern void xVMOVMSKPD(const xRegister32& to, const xRegisterSSE& from);
|
||||
extern void xVZEROUPPER();
|
||||
|
||||
} // namespace x86Emitter
|
||||
174
common/emitter/internal.h
Normal file
174
common/emitter/internal.h
Normal file
@@ -0,0 +1,174 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/emitter/x86types.h"
|
||||
#include "common/emitter/instructions.h"
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
#define OpWriteSSE(pre, op) xOpWrite0F(pre, op, to, from)
|
||||
|
||||
extern void SimdPrefix(u8 prefix, u16 opcode);
|
||||
extern void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset = 0);
|
||||
extern void EmitSibMagic(uint regfield, const xIndirectVoid& info, int extraRIPOffset = 0);
|
||||
extern void EmitSibMagic(uint reg1, const xRegisterBase& reg2, int = 0);
|
||||
extern void EmitSibMagic(const xRegisterBase& reg1, const xRegisterBase& reg2, int = 0);
|
||||
extern void EmitSibMagic(const xRegisterBase& reg1, const void* src, int extraRIPOffset = 0);
|
||||
extern void EmitSibMagic(const xRegisterBase& reg1, const xIndirectVoid& sib, int extraRIPOffset = 0);
|
||||
|
||||
extern void EmitRex(uint regfield, const void* address);
|
||||
extern void EmitRex(uint regfield, const xIndirectVoid& info);
|
||||
extern void EmitRex(uint reg1, const xRegisterBase& reg2);
|
||||
extern void EmitRex(const xRegisterBase& reg1, const xRegisterBase& reg2);
|
||||
extern void EmitRex(const xRegisterBase& reg1, const void* src);
|
||||
extern void EmitRex(const xRegisterBase& reg1, const xIndirectVoid& sib);
|
||||
|
||||
extern void _xMovRtoR(const xRegisterInt& to, const xRegisterInt& from);
|
||||
|
||||
template <typename T>
|
||||
inline void xWrite(T val)
|
||||
{
|
||||
*(T*)x86Ptr = val;
|
||||
x86Ptr += sizeof(T);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
__emitinline void xOpWrite(u8 prefix, u8 opcode, const T1& param1, const T2& param2, int extraRIPOffset = 0)
|
||||
{
|
||||
if (prefix != 0)
|
||||
xWrite8(prefix);
|
||||
EmitRex(param1, param2);
|
||||
|
||||
xWrite8(opcode);
|
||||
|
||||
EmitSibMagic(param1, param2, extraRIPOffset);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
__emitinline void xOpAccWrite(u8 prefix, u8 opcode, const T1& param1, const T2& param2)
|
||||
{
|
||||
if (prefix != 0)
|
||||
xWrite8(prefix);
|
||||
EmitRex(param1, param2);
|
||||
|
||||
xWrite8(opcode);
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// emitter helpers for xmm instruction with prefixes, most of which are using
|
||||
// the basic opcode format (items inside braces denote optional or conditional
|
||||
// emission):
|
||||
//
|
||||
// [Prefix] / 0x0f / [OpcodePrefix] / Opcode / ModRM+[SibSB]
|
||||
//
|
||||
// Prefixes are typically 0x66, 0xf2, or 0xf3. OpcodePrefixes are either 0x38 or
|
||||
// 0x3a [and other value will result in assertion failue].
|
||||
//
|
||||
template <typename T1, typename T2>
|
||||
__emitinline void xOpWrite0F(u8 prefix, u16 opcode, const T1& param1, const T2& param2)
|
||||
{
|
||||
if (prefix != 0)
|
||||
xWrite8(prefix);
|
||||
EmitRex(param1, param2);
|
||||
|
||||
SimdPrefix(0, opcode);
|
||||
|
||||
EmitSibMagic(param1, param2);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
__emitinline void xOpWrite0F(u8 prefix, u16 opcode, const T1& param1, const T2& param2, u8 imm8)
|
||||
{
|
||||
if (prefix != 0)
|
||||
xWrite8(prefix);
|
||||
EmitRex(param1, param2);
|
||||
|
||||
SimdPrefix(0, opcode);
|
||||
|
||||
EmitSibMagic(param1, param2, 1);
|
||||
xWrite8(imm8);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
__emitinline void xOpWrite0F(u16 opcode, const T1& param1, const T2& param2)
|
||||
{
|
||||
xOpWrite0F(0, opcode, param1, param2);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
__emitinline void xOpWrite0F(u16 opcode, const T1& param1, const T2& param2, u8 imm8)
|
||||
{
|
||||
xOpWrite0F(0, opcode, param1, param2, imm8);
|
||||
}
|
||||
|
||||
// VEX 2 Bytes Prefix
|
||||
template <typename T1, typename T2, typename T3>
|
||||
__emitinline void xOpWriteC5(u8 prefix, u8 opcode, const T1& param1, const T2& param2, const T3& param3)
|
||||
{
|
||||
pxAssert(prefix == 0 || prefix == 0x66 || prefix == 0xF3 || prefix == 0xF2);
|
||||
|
||||
const xRegisterBase& reg = param1.IsReg() ? param1 : param2;
|
||||
|
||||
u8 nR = reg.IsExtended() ? 0x00 : 0x80;
|
||||
u8 L;
|
||||
|
||||
// Needed for 256-bit movemask.
|
||||
if constexpr (std::is_same_v<T3, xRegisterSSE>)
|
||||
L = param3.IsWideSIMD() ? 4 : 0;
|
||||
else
|
||||
L = reg.IsWideSIMD() ? 4 : 0;
|
||||
|
||||
u8 nv = (param2.IsEmpty() ? 0xF : ((~param2.GetId() & 0xF))) << 3;
|
||||
|
||||
u8 p =
|
||||
prefix == 0xF2 ? 3 :
|
||||
prefix == 0xF3 ? 2 :
|
||||
prefix == 0x66 ? 1 :
|
||||
0;
|
||||
|
||||
xWrite8(0xC5);
|
||||
xWrite8(nR | nv | L | p);
|
||||
xWrite8(opcode);
|
||||
EmitSibMagic(param1, param3);
|
||||
}
|
||||
|
||||
// VEX 3 Bytes Prefix
|
||||
template <typename T1, typename T2, typename T3>
|
||||
__emitinline void xOpWriteC4(u8 prefix, u8 mb_prefix, u8 opcode, const T1& param1, const T2& param2, const T3& param3, int w = -1)
|
||||
{
|
||||
pxAssert(prefix == 0 || prefix == 0x66 || prefix == 0xF3 || prefix == 0xF2);
|
||||
pxAssert(mb_prefix == 0x0F || mb_prefix == 0x38 || mb_prefix == 0x3A);
|
||||
|
||||
const xRegisterInt& reg = param1.IsReg() ? param1 : param2;
|
||||
|
||||
u8 nR = reg.IsExtended() ? 0x00 : 0x80;
|
||||
u8 nB = param3.IsExtended() ? 0x00 : 0x20;
|
||||
u8 nX = 0x40; // likely unused so hardwired to disabled
|
||||
u8 L = reg.IsWideSIMD() ? 4 : 0;
|
||||
u8 W = (w == -1) ? (reg.GetOperandSize() == 8 ? 0x80 : 0) : // autodetect the size
|
||||
0x80 * w; // take directly the W value
|
||||
|
||||
u8 nv = (~param2.GetId() & 0xF) << 3;
|
||||
|
||||
u8 p =
|
||||
prefix == 0xF2 ? 3 :
|
||||
prefix == 0xF3 ? 2 :
|
||||
prefix == 0x66 ? 1 :
|
||||
0;
|
||||
|
||||
u8 m =
|
||||
mb_prefix == 0x3A ? 3 :
|
||||
mb_prefix == 0x38 ? 2 :
|
||||
1;
|
||||
|
||||
xWrite8(0xC4);
|
||||
xWrite8(nR | nX | nB | m);
|
||||
xWrite8(W | nv | L | p);
|
||||
xWrite8(opcode);
|
||||
EmitSibMagic(param1, param3);
|
||||
}
|
||||
} // namespace x86Emitter
|
||||
269
common/emitter/jmp.cpp
Normal file
269
common/emitter/jmp.cpp
Normal file
@@ -0,0 +1,269 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
/*
|
||||
* ix86 core v0.9.1
|
||||
*
|
||||
* Original Authors (v0.6.2 and prior):
|
||||
* linuzappz <linuzappz@pcsx.net>
|
||||
* alexey silinov
|
||||
* goldfinger
|
||||
* zerofrog(@gmail.com)
|
||||
*
|
||||
* Authors of v0.9.1:
|
||||
* Jake.Stine(@gmail.com)
|
||||
* cottonvibes(@gmail.com)
|
||||
* sudonim(1@gmail.com)
|
||||
*/
|
||||
|
||||
#include "common/emitter/internal.h"
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
void xImpl_JmpCall::operator()(const xAddressReg& absreg) const
|
||||
{
|
||||
// Jumps are always wide and don't need the rex.W
|
||||
xOpWrite(0, 0xff, isJmp ? 4 : 2, absreg.GetNonWide());
|
||||
}
|
||||
void xImpl_JmpCall::operator()(const xIndirectNative& src) const
|
||||
{
|
||||
// Jumps are always wide and don't need the rex.W
|
||||
EmitRex(0, xIndirect32(src.Base, src.Index, 1, 0));
|
||||
xWrite8(0xff);
|
||||
EmitSibMagic(isJmp ? 4 : 2, src);
|
||||
}
|
||||
|
||||
const xImpl_JmpCall xJMP = {true};
|
||||
const xImpl_JmpCall xCALL = {false};
|
||||
|
||||
|
||||
template <typename Reg1, typename Reg2>
|
||||
void prepareRegsForFastcall(const Reg1& a1, const Reg2& a2)
|
||||
{
|
||||
if (a1.IsEmpty())
|
||||
return;
|
||||
|
||||
// Make sure we don't mess up if someone tries to fastcall with a1 in arg2reg and a2 in arg1reg
|
||||
if (a2.Id != arg1reg.Id)
|
||||
{
|
||||
xMOV(Reg1(arg1reg), a1);
|
||||
if (!a2.IsEmpty())
|
||||
{
|
||||
xMOV(Reg2(arg2reg), a2);
|
||||
}
|
||||
}
|
||||
else if (a1.Id != arg2reg.Id)
|
||||
{
|
||||
xMOV(Reg2(arg2reg), a2);
|
||||
xMOV(Reg1(arg1reg), a1);
|
||||
}
|
||||
else
|
||||
{
|
||||
xPUSH(a1);
|
||||
xMOV(Reg2(arg2reg), a2);
|
||||
xPOP(Reg1(arg1reg));
|
||||
}
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(const void* f, const xRegister32& a1, const xRegister32& a2) const
|
||||
{
|
||||
prepareRegsForFastcall(a1, a2);
|
||||
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
|
||||
if ((sptr)disp == (s32)disp)
|
||||
{
|
||||
xCALL(f);
|
||||
}
|
||||
else
|
||||
{
|
||||
xLEA(rax, ptr64[f]);
|
||||
xCALL(rax);
|
||||
}
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(const void* f, const xRegisterLong& a1, const xRegisterLong& a2) const
|
||||
{
|
||||
prepareRegsForFastcall(a1, a2);
|
||||
uptr disp = ((uptr)xGetPtr() + 5) - (uptr)f;
|
||||
if ((sptr)disp == (s32)disp)
|
||||
{
|
||||
xCALL(f);
|
||||
}
|
||||
else
|
||||
{
|
||||
xLEA(rax, ptr64[f]);
|
||||
xCALL(rax);
|
||||
}
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(const void* f, u32 a1, const xRegisterLong& a2) const
|
||||
{
|
||||
if (!a2.IsEmpty())
|
||||
{
|
||||
xMOV(arg2reg, a2);
|
||||
}
|
||||
xMOV(arg1reg, a1);
|
||||
(*this)(f, arg1reg, arg2reg);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(const void* f, void* a1) const
|
||||
{
|
||||
xLEA(arg1reg, ptr[a1]);
|
||||
(*this)(f, arg1reg, arg2reg);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(const void* f, u32 a1, const xRegister32& a2) const
|
||||
{
|
||||
if (!a2.IsEmpty())
|
||||
{
|
||||
xMOV(arg2regd, a2);
|
||||
}
|
||||
xMOV(arg1regd, a1);
|
||||
(*this)(f, arg1regd, arg2regd);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(const void* f, const xIndirect32& a1) const
|
||||
{
|
||||
xMOV(arg1regd, a1);
|
||||
(*this)(f, arg1regd);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(const void* f, u32 a1, u32 a2) const
|
||||
{
|
||||
xMOV(arg1regd, a1);
|
||||
xMOV(arg2regd, a2);
|
||||
(*this)(f, arg1regd, arg2regd);
|
||||
}
|
||||
|
||||
void xImpl_FastCall::operator()(const xIndirectNative& f, const xRegisterLong& a1, const xRegisterLong& a2) const
|
||||
{
|
||||
prepareRegsForFastcall(a1, a2);
|
||||
xCALL(f);
|
||||
}
|
||||
|
||||
const xImpl_FastCall xFastCall = {};
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Emits a 32 bit jump, and returns a pointer to the 32 bit displacement.
|
||||
// (displacements should be assigned relative to the end of the jump instruction,
|
||||
// or in other words *(retval+1) )
|
||||
__emitinline s32* xJcc32(JccComparisonType comparison, s32 displacement)
|
||||
{
|
||||
if (comparison == Jcc_Unconditional)
|
||||
xWrite8(0xe9);
|
||||
else
|
||||
{
|
||||
xWrite8(0x0f);
|
||||
xWrite8(0x80 | comparison);
|
||||
}
|
||||
xWrite<s32>(displacement);
|
||||
|
||||
return ((s32*)xGetPtr()) - 1;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Emits a 32 bit jump, and returns a pointer to the 8 bit displacement.
|
||||
// (displacements should be assigned relative to the end of the jump instruction,
|
||||
// or in other words *(retval+1) )
|
||||
__emitinline s8* xJcc8(JccComparisonType comparison, s8 displacement)
|
||||
{
|
||||
xWrite8((comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison));
|
||||
xWrite<s8>(displacement);
|
||||
return (s8*)xGetPtr() - 1;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Writes a jump at the current x86Ptr, which targets a pre-established target address.
|
||||
// (usually a backwards jump)
|
||||
//
|
||||
// slideForward - used internally by xSmartJump to indicate that the jump target is going
|
||||
// to slide forward in the event of an 8 bit displacement.
|
||||
//
|
||||
__emitinline void xJccKnownTarget(JccComparisonType comparison, const void* target, bool slideForward)
|
||||
{
|
||||
// Calculate the potential j8 displacement first, assuming an instruction length of 2:
|
||||
sptr displacement8 = (sptr)target - (sptr)(xGetPtr() + 2);
|
||||
|
||||
const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0;
|
||||
displacement8 -= slideVal;
|
||||
|
||||
if (slideForward)
|
||||
{
|
||||
pxAssertMsg(displacement8 >= 0, "Used slideForward on a backward jump; nothing to slide!");
|
||||
}
|
||||
|
||||
if (is_s8(displacement8))
|
||||
xJcc8(comparison, displacement8);
|
||||
else
|
||||
{
|
||||
// Perform a 32 bit jump instead. :(
|
||||
s32* bah = xJcc32(comparison);
|
||||
sptr distance = (sptr)target - (sptr)xGetPtr();
|
||||
|
||||
// This assert won't physically happen on x86 targets
|
||||
pxAssertMsg(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target is too far away, needs an indirect register");
|
||||
|
||||
*bah = (s32)distance;
|
||||
}
|
||||
}
|
||||
|
||||
// Low-level jump instruction! Specify a comparison type and a target in void* form, and
|
||||
// a jump (either 8 or 32 bit) is generated.
|
||||
__emitinline void xJcc(JccComparisonType comparison, const void* target)
|
||||
{
|
||||
xJccKnownTarget(comparison, target, false);
|
||||
}
|
||||
|
||||
xForwardJumpBase::xForwardJumpBase(uint opsize, JccComparisonType cctype)
|
||||
{
|
||||
pxAssert(opsize == 1 || opsize == 4);
|
||||
pxAssertMsg(cctype != Jcc_Unknown, "Invalid ForwardJump conditional type.");
|
||||
|
||||
BasePtr = (s8*)xGetPtr() +
|
||||
((opsize == 1) ? 2 : // j8's are always 2 bytes.
|
||||
((cctype == Jcc_Unconditional) ? 5 : 6)); // j32's are either 5 or 6 bytes
|
||||
|
||||
if (opsize == 1)
|
||||
xWrite8((cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype));
|
||||
else
|
||||
{
|
||||
if (cctype == Jcc_Unconditional)
|
||||
xWrite8(0xe9);
|
||||
else
|
||||
{
|
||||
xWrite8(0x0f);
|
||||
xWrite8(0x80 | cctype);
|
||||
}
|
||||
}
|
||||
|
||||
xAdvancePtr(opsize);
|
||||
}
|
||||
|
||||
void xForwardJumpBase::_setTarget(uint opsize) const
|
||||
{
|
||||
pxAssertMsg(BasePtr != NULL, "");
|
||||
|
||||
sptr displacement = (sptr)xGetPtr() - (sptr)BasePtr;
|
||||
if (opsize == 1)
|
||||
{
|
||||
pxAssertMsg(is_s8(displacement), "Emitter Error: Invalid short jump displacement.");
|
||||
BasePtr[-1] = (s8)displacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// full displacement, no sanity checks needed :D
|
||||
((s32*)BasePtr)[-1] = displacement;
|
||||
}
|
||||
}
|
||||
|
||||
// returns the inverted conditional type for this Jcc condition. Ie, JNS will become JS.
|
||||
__fi JccComparisonType xInvertCond(JccComparisonType src)
|
||||
{
|
||||
pxAssert(src != Jcc_Unknown);
|
||||
if (Jcc_Unconditional == src)
|
||||
return Jcc_Unconditional;
|
||||
|
||||
// x86 conditionals are clever! To invert conditional types, just invert the lower bit:
|
||||
return (JccComparisonType)((int)src ^ 1);
|
||||
}
|
||||
} // namespace x86Emitter
|
||||
403
common/emitter/legacy.cpp
Normal file
403
common/emitter/legacy.cpp
Normal file
@@ -0,0 +1,403 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
/*
|
||||
* ix86 core v0.6.2
|
||||
* Authors: linuzappz <linuzappz@pcsx.net>
|
||||
* alexey silinov
|
||||
* goldfinger
|
||||
* zerofrog(@gmail.com)
|
||||
* cottonvibes(@gmail.com)
|
||||
*/
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// ix86 legacy emitter functions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
#include "common/emitter/legacy_internal.h"
|
||||
#include "common/Console.h"
|
||||
#include <cassert>
|
||||
|
||||
emitterT void ModRM(uint mod, uint reg, uint rm)
|
||||
{
|
||||
// Note: Following assertions are for legacy support only.
|
||||
// The new emitter performs these sanity checks during operand construction, so these
|
||||
// assertions can probably be removed once all legacy emitter code has been removed.
|
||||
pxAssert(mod < 4);
|
||||
pxAssert(reg < 8);
|
||||
pxAssert(rm < 8);
|
||||
xWrite8((mod << 6) | (reg << 3) | rm);
|
||||
}
|
||||
|
||||
emitterT void SibSB(uint ss, uint index, uint base)
|
||||
{
|
||||
// Note: Following asserts are for legacy support only.
|
||||
// The new emitter performs these sanity checks during operand construction, so these
|
||||
// assertions can probably be removed once all legacy emitter code has been removed.
|
||||
pxAssert(ss < 4);
|
||||
pxAssert(index < 8);
|
||||
pxAssert(base < 8);
|
||||
xWrite8((ss << 6) | (index << 3) | base);
|
||||
}
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// From here on are instructions that have NOT been implemented in the new emitter.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
emitterT u8* J8Rel(int cc, int to)
|
||||
{
|
||||
xWrite8(cc);
|
||||
xWrite8(to);
|
||||
return (u8*)(x86Ptr - 1);
|
||||
}
|
||||
|
||||
emitterT u16* J16Rel(int cc, u32 to)
|
||||
{
|
||||
xWrite16(0x0F66);
|
||||
xWrite8(cc);
|
||||
xWrite16(to);
|
||||
return (u16*)(x86Ptr - 2);
|
||||
}
|
||||
|
||||
emitterT u32* J32Rel(int cc, u32 to)
|
||||
{
|
||||
xWrite8(0x0F);
|
||||
xWrite8(cc);
|
||||
xWrite32(to);
|
||||
return (u32*)(x86Ptr - 4);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
emitterT void x86SetPtr(u8* ptr)
|
||||
{
|
||||
x86Ptr = ptr;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Jump Label API (as rough as it might be)
|
||||
//
|
||||
// I don't auto-inline these because of the console logging in case of error, which tends
|
||||
// to cause quite a bit of code bloat.
|
||||
//
|
||||
void x86SetJ8(u8* j8)
|
||||
{
|
||||
u32 jump = (x86Ptr - j8) - 1;
|
||||
|
||||
if (jump > 0x7f)
|
||||
{
|
||||
Console.Error("j8 greater than 0x7f!!");
|
||||
assert(0);
|
||||
}
|
||||
*j8 = (u8)jump;
|
||||
}
|
||||
|
||||
void x86SetJ8A(u8* j8)
|
||||
{
|
||||
u32 jump = (x86Ptr - j8) - 1;
|
||||
|
||||
if (jump > 0x7f)
|
||||
{
|
||||
Console.Error("j8 greater than 0x7f!!");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (((uptr)x86Ptr & 0xf) > 4)
|
||||
{
|
||||
|
||||
uptr newjump = jump + 16 - ((uptr)x86Ptr & 0xf);
|
||||
|
||||
if (newjump <= 0x7f)
|
||||
{
|
||||
jump = newjump;
|
||||
while ((uptr)x86Ptr & 0xf)
|
||||
*x86Ptr++ = 0x90;
|
||||
}
|
||||
}
|
||||
*j8 = (u8)jump;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
emitterT void x86SetJ32(u32* j32)
|
||||
{
|
||||
*j32 = (x86Ptr - (u8*)j32) - 4;
|
||||
}
|
||||
|
||||
emitterT void x86SetJ32A(u32* j32)
|
||||
{
|
||||
while ((uptr)x86Ptr & 0xf)
|
||||
*x86Ptr++ = 0x90;
|
||||
x86SetJ32(j32);
|
||||
}
|
||||
|
||||
/********************/
|
||||
/* IX86 instructions */
|
||||
/********************/
|
||||
|
||||
////////////////////////////////////
|
||||
// jump instructions /
|
||||
////////////////////////////////////
|
||||
|
||||
/* jmp rel8 */
|
||||
emitterT u8* JMP8(u8 to)
|
||||
{
|
||||
xWrite8(0xEB);
|
||||
xWrite8(to);
|
||||
return x86Ptr - 1;
|
||||
}
|
||||
|
||||
/* jmp rel32 */
|
||||
emitterT u32* JMP32(uptr to)
|
||||
{
|
||||
assert((sptr)to <= 0x7fffffff && (sptr)to >= -0x7fffffff);
|
||||
xWrite8(0xE9);
|
||||
xWrite32(to);
|
||||
return (u32*)(x86Ptr - 4);
|
||||
}
|
||||
|
||||
/* jp rel8 */
|
||||
emitterT u8* JP8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7A, to);
|
||||
}
|
||||
|
||||
/* jnp rel8 */
|
||||
emitterT u8* JNP8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7B, to);
|
||||
}
|
||||
|
||||
/* je rel8 */
|
||||
emitterT u8* JE8(u8 to)
|
||||
{
|
||||
return J8Rel(0x74, to);
|
||||
}
|
||||
|
||||
/* jz rel8 */
|
||||
emitterT u8* JZ8(u8 to)
|
||||
{
|
||||
return J8Rel(0x74, to);
|
||||
}
|
||||
|
||||
/* js rel8 */
|
||||
emitterT u8* JS8(u8 to)
|
||||
{
|
||||
return J8Rel(0x78, to);
|
||||
}
|
||||
|
||||
/* jns rel8 */
|
||||
emitterT u8* JNS8(u8 to)
|
||||
{
|
||||
return J8Rel(0x79, to);
|
||||
}
|
||||
|
||||
/* jg rel8 */
|
||||
emitterT u8* JG8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7F, to);
|
||||
}
|
||||
|
||||
/* jge rel8 */
|
||||
emitterT u8* JGE8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7D, to);
|
||||
}
|
||||
|
||||
/* jl rel8 */
|
||||
emitterT u8* JL8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7C, to);
|
||||
}
|
||||
|
||||
/* ja rel8 */
|
||||
emitterT u8* JA8(u8 to)
|
||||
{
|
||||
return J8Rel(0x77, to);
|
||||
}
|
||||
|
||||
emitterT u8* JAE8(u8 to)
|
||||
{
|
||||
return J8Rel(0x73, to);
|
||||
}
|
||||
|
||||
/* jb rel8 */
|
||||
emitterT u8* JB8(u8 to)
|
||||
{
|
||||
return J8Rel(0x72, to);
|
||||
}
|
||||
|
||||
/* jbe rel8 */
|
||||
emitterT u8* JBE8(u8 to)
|
||||
{
|
||||
return J8Rel(0x76, to);
|
||||
}
|
||||
|
||||
/* jle rel8 */
|
||||
emitterT u8* JLE8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7E, to);
|
||||
}
|
||||
|
||||
/* jne rel8 */
|
||||
emitterT u8* JNE8(u8 to)
|
||||
{
|
||||
return J8Rel(0x75, to);
|
||||
}
|
||||
|
||||
/* jnz rel8 */
|
||||
emitterT u8* JNZ8(u8 to)
|
||||
{
|
||||
return J8Rel(0x75, to);
|
||||
}
|
||||
|
||||
/* jng rel8 */
|
||||
emitterT u8* JNG8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7E, to);
|
||||
}
|
||||
|
||||
/* jnge rel8 */
|
||||
emitterT u8* JNGE8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7C, to);
|
||||
}
|
||||
|
||||
/* jnl rel8 */
|
||||
emitterT u8* JNL8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7D, to);
|
||||
}
|
||||
|
||||
/* jnle rel8 */
|
||||
emitterT u8* JNLE8(u8 to)
|
||||
{
|
||||
return J8Rel(0x7F, to);
|
||||
}
|
||||
|
||||
/* jo rel8 */
|
||||
emitterT u8* JO8(u8 to)
|
||||
{
|
||||
return J8Rel(0x70, to);
|
||||
}
|
||||
|
||||
/* jno rel8 */
|
||||
emitterT u8* JNO8(u8 to)
|
||||
{
|
||||
return J8Rel(0x71, to);
|
||||
}
|
||||
// jb rel32
|
||||
emitterT u32* JB32(u32 to)
|
||||
{
|
||||
return J32Rel(0x82, to);
|
||||
}
|
||||
|
||||
/* je rel32 */
|
||||
emitterT u32* JE32(u32 to)
|
||||
{
|
||||
return J32Rel(0x84, to);
|
||||
}
|
||||
|
||||
/* jz rel32 */
|
||||
emitterT u32* JZ32(u32 to)
|
||||
{
|
||||
return J32Rel(0x84, to);
|
||||
}
|
||||
|
||||
/* js rel32 */
|
||||
emitterT u32* JS32(u32 to)
|
||||
{
|
||||
return J32Rel(0x88, to);
|
||||
}
|
||||
|
||||
/* jns rel32 */
|
||||
emitterT u32* JNS32(u32 to)
|
||||
{
|
||||
return J32Rel(0x89, to);
|
||||
}
|
||||
|
||||
/* jg rel32 */
|
||||
emitterT u32* JG32(u32 to)
|
||||
{
|
||||
return J32Rel(0x8F, to);
|
||||
}
|
||||
|
||||
/* jge rel32 */
|
||||
emitterT u32* JGE32(u32 to)
|
||||
{
|
||||
return J32Rel(0x8D, to);
|
||||
}
|
||||
|
||||
/* jl rel32 */
|
||||
emitterT u32* JL32(u32 to)
|
||||
{
|
||||
return J32Rel(0x8C, to);
|
||||
}
|
||||
|
||||
/* jle rel32 */
|
||||
emitterT u32* JLE32(u32 to)
|
||||
{
|
||||
return J32Rel(0x8E, to);
|
||||
}
|
||||
|
||||
/* ja rel32 */
|
||||
emitterT u32* JA32(u32 to)
|
||||
{
|
||||
return J32Rel(0x87, to);
|
||||
}
|
||||
|
||||
/* jae rel32 */
|
||||
emitterT u32* JAE32(u32 to)
|
||||
{
|
||||
return J32Rel(0x83, to);
|
||||
}
|
||||
|
||||
/* jne rel32 */
|
||||
emitterT u32* JNE32(u32 to)
|
||||
{
|
||||
return J32Rel(0x85, to);
|
||||
}
|
||||
|
||||
/* jnz rel32 */
|
||||
emitterT u32* JNZ32(u32 to)
|
||||
{
|
||||
return J32Rel(0x85, to);
|
||||
}
|
||||
|
||||
/* jng rel32 */
|
||||
emitterT u32* JNG32(u32 to)
|
||||
{
|
||||
return J32Rel(0x8E, to);
|
||||
}
|
||||
|
||||
/* jnge rel32 */
|
||||
emitterT u32* JNGE32(u32 to)
|
||||
{
|
||||
return J32Rel(0x8C, to);
|
||||
}
|
||||
|
||||
/* jnl rel32 */
|
||||
emitterT u32* JNL32(u32 to)
|
||||
{
|
||||
return J32Rel(0x8D, to);
|
||||
}
|
||||
|
||||
/* jnle rel32 */
|
||||
emitterT u32* JNLE32(u32 to)
|
||||
{
|
||||
return J32Rel(0x8F, to);
|
||||
}
|
||||
|
||||
/* jo rel32 */
|
||||
emitterT u32* JO32(u32 to)
|
||||
{
|
||||
return J32Rel(0x80, to);
|
||||
}
|
||||
|
||||
/* jno rel32 */
|
||||
emitterT u32* JNO32(u32 to)
|
||||
{
|
||||
return J32Rel(0x81, to);
|
||||
}
|
||||
186
common/emitter/legacy_instructions.h
Normal file
186
common/emitter/legacy_instructions.h
Normal file
@@ -0,0 +1,186 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
//#define SPAM_DEPRECATION_WARNINGS
|
||||
#if defined(__linux__) && defined(__clang__) && defined(SPAM_DEPRECATION_WARNINGS)
|
||||
#define ATTR_DEP [[deprecated]]
|
||||
#else
|
||||
#define ATTR_DEP
|
||||
#endif
|
||||
|
||||
#ifdef FSCALE
|
||||
# undef FSCALE // Defined in a macOS header
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// legacy jump/align functions
|
||||
//------------------------------------------------------------------
|
||||
ATTR_DEP extern void x86SetJ8(u8* j8);
|
||||
ATTR_DEP extern void x86SetJ8A(u8* j8);
|
||||
ATTR_DEP extern void x86SetJ16(u16* j16);
|
||||
ATTR_DEP extern void x86SetJ16A(u16* j16);
|
||||
ATTR_DEP extern void x86SetJ32(u32* j32);
|
||||
ATTR_DEP extern void x86SetJ32A(u32* j32);
|
||||
//------------------------------------------------------------------
|
||||
|
||||
////////////////////////////////////
|
||||
// jump instructions //
|
||||
////////////////////////////////////
|
||||
|
||||
// jmp rel8
|
||||
ATTR_DEP extern u8* JMP8(u8 to);
|
||||
|
||||
// jmp rel32
|
||||
ATTR_DEP extern u32* JMP32(uptr to);
|
||||
|
||||
// jp rel8
|
||||
ATTR_DEP extern u8* JP8(u8 to);
|
||||
// jnp rel8
|
||||
ATTR_DEP extern u8* JNP8(u8 to);
|
||||
// je rel8
|
||||
ATTR_DEP extern u8* JE8(u8 to);
|
||||
// jz rel8
|
||||
ATTR_DEP extern u8* JZ8(u8 to);
|
||||
// jg rel8
|
||||
ATTR_DEP extern u8* JG8(u8 to);
|
||||
// jge rel8
|
||||
ATTR_DEP extern u8* JGE8(u8 to);
|
||||
// js rel8
|
||||
ATTR_DEP extern u8* JS8(u8 to);
|
||||
// jns rel8
|
||||
ATTR_DEP extern u8* JNS8(u8 to);
|
||||
// jl rel8
|
||||
ATTR_DEP extern u8* JL8(u8 to);
|
||||
// ja rel8
|
||||
ATTR_DEP extern u8* JA8(u8 to);
|
||||
// jae rel8
|
||||
ATTR_DEP extern u8* JAE8(u8 to);
|
||||
// jb rel8
|
||||
ATTR_DEP extern u8* JB8(u8 to);
|
||||
// jbe rel8
|
||||
ATTR_DEP extern u8* JBE8(u8 to);
|
||||
// jle rel8
|
||||
ATTR_DEP extern u8* JLE8(u8 to);
|
||||
// jne rel8
|
||||
ATTR_DEP extern u8* JNE8(u8 to);
|
||||
// jnz rel8
|
||||
ATTR_DEP extern u8* JNZ8(u8 to);
|
||||
// jng rel8
|
||||
ATTR_DEP extern u8* JNG8(u8 to);
|
||||
// jnge rel8
|
||||
ATTR_DEP extern u8* JNGE8(u8 to);
|
||||
// jnl rel8
|
||||
ATTR_DEP extern u8* JNL8(u8 to);
|
||||
// jnle rel8
|
||||
ATTR_DEP extern u8* JNLE8(u8 to);
|
||||
// jo rel8
|
||||
ATTR_DEP extern u8* JO8(u8 to);
|
||||
// jno rel8
|
||||
ATTR_DEP extern u8* JNO8(u8 to);
|
||||
|
||||
/*
|
||||
// jb rel16
|
||||
ATTR_DEP extern u16* JA16( u16 to );
|
||||
// jb rel16
|
||||
ATTR_DEP extern u16* JB16( u16 to );
|
||||
// je rel16
|
||||
ATTR_DEP extern u16* JE16( u16 to );
|
||||
// jz rel16
|
||||
ATTR_DEP extern u16* JZ16( u16 to );
|
||||
*/
|
||||
|
||||
// jns rel32
|
||||
ATTR_DEP extern u32* JNS32(u32 to);
|
||||
// js rel32
|
||||
ATTR_DEP extern u32* JS32(u32 to);
|
||||
|
||||
// jb rel32
|
||||
ATTR_DEP extern u32* JB32(u32 to);
|
||||
// je rel32
|
||||
ATTR_DEP extern u32* JE32(u32 to);
|
||||
// jz rel32
|
||||
ATTR_DEP extern u32* JZ32(u32 to);
|
||||
// jg rel32
|
||||
ATTR_DEP extern u32* JG32(u32 to);
|
||||
// jge rel32
|
||||
ATTR_DEP extern u32* JGE32(u32 to);
|
||||
// jl rel32
|
||||
ATTR_DEP extern u32* JL32(u32 to);
|
||||
// jle rel32
|
||||
ATTR_DEP extern u32* JLE32(u32 to);
|
||||
// jae rel32
|
||||
ATTR_DEP extern u32* JAE32(u32 to);
|
||||
// jne rel32
|
||||
ATTR_DEP extern u32* JNE32(u32 to);
|
||||
// jnz rel32
|
||||
ATTR_DEP extern u32* JNZ32(u32 to);
|
||||
// jng rel32
|
||||
ATTR_DEP extern u32* JNG32(u32 to);
|
||||
// jnge rel32
|
||||
ATTR_DEP extern u32* JNGE32(u32 to);
|
||||
// jnl rel32
|
||||
ATTR_DEP extern u32* JNL32(u32 to);
|
||||
// jnle rel32
|
||||
ATTR_DEP extern u32* JNLE32(u32 to);
|
||||
// jo rel32
|
||||
ATTR_DEP extern u32* JO32(u32 to);
|
||||
// jno rel32
|
||||
ATTR_DEP extern u32* JNO32(u32 to);
|
||||
// js rel32
|
||||
ATTR_DEP extern u32* JS32(u32 to);
|
||||
|
||||
//******************
|
||||
// FPU instructions
|
||||
//******************
|
||||
|
||||
// fld m32 to fpu reg stack
|
||||
ATTR_DEP extern void FLD32(u32 from);
|
||||
// fld st(i)
|
||||
ATTR_DEP extern void FLD(int st);
|
||||
// fld1 (push +1.0f on the stack)
|
||||
ATTR_DEP extern void FLD1();
|
||||
// fld1 (push log_2 e on the stack)
|
||||
ATTR_DEP extern void FLDL2E();
|
||||
// fstp m32 from fpu reg stack
|
||||
ATTR_DEP extern void FSTP32(u32 to);
|
||||
// fstp st(i)
|
||||
ATTR_DEP extern void FSTP(int st);
|
||||
|
||||
// frndint
|
||||
ATTR_DEP extern void FRNDINT();
|
||||
ATTR_DEP extern void FXCH(int st);
|
||||
ATTR_DEP extern void F2XM1();
|
||||
ATTR_DEP extern void FSCALE();
|
||||
|
||||
// fadd ST(0) to fpu reg stack ST(src)
|
||||
ATTR_DEP extern void FADD320toR(x86IntRegType src);
|
||||
// fsub ST(src) to fpu reg stack ST(0)
|
||||
ATTR_DEP extern void FSUB32Rto0(x86IntRegType src);
|
||||
|
||||
// fmul m32 to fpu reg stack
|
||||
ATTR_DEP extern void FMUL32(u32 from);
|
||||
// fdiv m32 to fpu reg stack
|
||||
ATTR_DEP extern void FDIV32(u32 from);
|
||||
// ftan fpu reg stack
|
||||
ATTR_DEP extern void FPATAN(void);
|
||||
// fsin fpu reg stack
|
||||
ATTR_DEP extern void FSIN(void);
|
||||
|
||||
//*********************
|
||||
// SSE instructions *
|
||||
//*********************
|
||||
ATTR_DEP extern void SSE_MAXSS_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
ATTR_DEP extern void SSE_MINSS_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
ATTR_DEP extern void SSE_ADDSS_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
ATTR_DEP extern void SSE_SUBSS_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
|
||||
//*********************
|
||||
// SSE 2 Instructions*
|
||||
//*********************
|
||||
|
||||
ATTR_DEP extern void SSE2_MAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
ATTR_DEP extern void SSE2_MINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
ATTR_DEP extern void SSE2_ADDSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
ATTR_DEP extern void SSE2_SUBSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from);
|
||||
28
common/emitter/legacy_internal.h
Normal file
28
common/emitter/legacy_internal.h
Normal file
@@ -0,0 +1,28 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/emitter/internal.h"
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Legacy Helper Macros and Functions (depreciated)
|
||||
//------------------------------------------------------------------
|
||||
|
||||
#define emitterT __fi
|
||||
|
||||
using x86Emitter::xWrite8;
|
||||
using x86Emitter::xWrite16;
|
||||
using x86Emitter::xWrite32;
|
||||
using x86Emitter::xWrite64;
|
||||
|
||||
#include "common/emitter/legacy_types.h"
|
||||
#include "common/emitter/legacy_instructions.h"
|
||||
|
||||
#define MEMADDR(addr, oplen) (addr)
|
||||
|
||||
extern void ModRM(uint mod, uint reg, uint rm);
|
||||
extern void SibSB(uint ss, uint index, uint base);
|
||||
extern void SET8R(int cc, int to);
|
||||
extern u8* J8Rel(int cc, int to);
|
||||
extern u32* J32Rel(int cc, u32 to);
|
||||
20
common/emitter/legacy_sse.cpp
Normal file
20
common/emitter/legacy_sse.cpp
Normal file
@@ -0,0 +1,20 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#include "common/emitter/legacy_internal.h"
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Begin SSE-Only Part!
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
#define DEFINE_LEGACY_SSSD_OPCODE(mod) \
|
||||
emitterT void SSE_##mod##SS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { x##mod.SS(xRegisterSSE(to), xRegisterSSE(from)); } \
|
||||
emitterT void SSE2_##mod##SD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { x##mod.SD(xRegisterSSE(to), xRegisterSSE(from)); }
|
||||
|
||||
DEFINE_LEGACY_SSSD_OPCODE(SUB)
|
||||
DEFINE_LEGACY_SSSD_OPCODE(ADD)
|
||||
|
||||
DEFINE_LEGACY_SSSD_OPCODE(MIN)
|
||||
DEFINE_LEGACY_SSSD_OPCODE(MAX)
|
||||
12
common/emitter/legacy_types.h
Normal file
12
common/emitter/legacy_types.h
Normal file
@@ -0,0 +1,12 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
//#define SIB 4 // maps to ESP
|
||||
//#define SIBDISP 5 // maps to EBP
|
||||
#define DISP32 5 // maps to EBP
|
||||
|
||||
// general types
|
||||
typedef int x86IntRegType;
|
||||
typedef int x86SSERegType;
|
||||
276
common/emitter/movs.cpp
Normal file
276
common/emitter/movs.cpp
Normal file
@@ -0,0 +1,276 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
/*
|
||||
* ix86 core v0.9.1
|
||||
*
|
||||
* Original Authors (v0.6.2 and prior):
|
||||
* linuzappz <linuzappz@pcsx.net>
|
||||
* alexey silinov
|
||||
* goldfinger
|
||||
* zerofrog(@gmail.com)
|
||||
*
|
||||
* Authors of v0.9.1:
|
||||
* Jake.Stine(@gmail.com)
|
||||
* cottonvibes(@gmail.com)
|
||||
* sudonim(1@gmail.com)
|
||||
*/
|
||||
|
||||
#include "common/emitter/internal.h"
|
||||
#include "common/emitter/implement/helpers.h"
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
void _xMovRtoR(const xRegisterInt& to, const xRegisterInt& from)
|
||||
{
|
||||
pxAssert(to.GetOperandSize() == from.GetOperandSize());
|
||||
|
||||
if (to == from)
|
||||
return; // ignore redundant MOVs.
|
||||
|
||||
xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0x88 : 0x89, from, to);
|
||||
}
|
||||
|
||||
void xImpl_Mov::operator()(const xRegisterInt& to, const xRegisterInt& from) const
|
||||
{
|
||||
// FIXME WTF?
|
||||
_xMovRtoR(to, from);
|
||||
}
|
||||
|
||||
void xImpl_Mov::operator()(const xIndirectVoid& dest, const xRegisterInt& from) const
|
||||
{
|
||||
// mov eax has a special from when writing directly to a DISP32 address
|
||||
// (sans any register index/base registers).
|
||||
|
||||
xOpWrite(from.GetPrefix16(), from.Is8BitOp() ? 0x88 : 0x89, from, dest);
|
||||
}
|
||||
|
||||
void xImpl_Mov::operator()(const xRegisterInt& to, const xIndirectVoid& src) const
|
||||
{
|
||||
// mov eax has a special from when reading directly from a DISP32 address
|
||||
// (sans any register index/base registers).
|
||||
|
||||
xOpWrite(to.GetPrefix16(), to.Is8BitOp() ? 0x8a : 0x8b, to, src);
|
||||
}
|
||||
|
||||
void xImpl_Mov::operator()(const xIndirect64orLess& dest, sptr imm) const
|
||||
{
|
||||
switch (dest.GetOperandSize())
|
||||
{
|
||||
case 1:
|
||||
pxAssertMsg(imm == (s8)imm || imm == (u8)imm, "Immediate won't fit!");
|
||||
break;
|
||||
case 2:
|
||||
pxAssertMsg(imm == (s16)imm || imm == (u16)imm, "Immediate won't fit!");
|
||||
break;
|
||||
case 4:
|
||||
pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit!");
|
||||
break;
|
||||
case 8:
|
||||
pxAssertMsg(imm == (s32)imm, "Immediate won't fit in immediate slot, go through a register!");
|
||||
break;
|
||||
default:
|
||||
pxAssertMsg(0, "Bad indirect size!");
|
||||
}
|
||||
xOpWrite(dest.GetPrefix16(), dest.Is8BitOp() ? 0xc6 : 0xc7, 0, dest, dest.GetImmSize());
|
||||
dest.xWriteImm(imm);
|
||||
}
|
||||
|
||||
// preserve_flags - set to true to disable optimizations which could alter the state of
|
||||
// the flags (namely replacing mov reg,0 with xor).
|
||||
void xImpl_Mov::operator()(const xRegisterInt& to, sptr imm, bool preserve_flags) const
|
||||
{
|
||||
switch (to.GetOperandSize())
|
||||
{
|
||||
case 1:
|
||||
pxAssertMsg(imm == (s8)imm || imm == (u8)imm, "Immediate won't fit!");
|
||||
break;
|
||||
case 2:
|
||||
pxAssertMsg(imm == (s16)imm || imm == (u16)imm, "Immediate won't fit!");
|
||||
break;
|
||||
case 4:
|
||||
pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit!");
|
||||
break;
|
||||
case 8:
|
||||
pxAssertMsg(imm == (s32)imm || imm == (u32)imm, "Immediate won't fit in immediate slot, use mov64 or lea!");
|
||||
break;
|
||||
default:
|
||||
pxAssertMsg(0, "Bad indirect size!");
|
||||
}
|
||||
const xRegisterInt& to_ = to.GetNonWide();
|
||||
if (!preserve_flags && (imm == 0))
|
||||
{
|
||||
_g1_EmitOp(G1Type_XOR, to_, to_);
|
||||
}
|
||||
else if (imm == (sptr)(u32)imm || !to.IsWide())
|
||||
{
|
||||
// Note: MOV does not have (reg16/32,imm8) forms.
|
||||
u8 opcode = (to_.Is8BitOp() ? 0xb0 : 0xb8) | to_.Id;
|
||||
xOpAccWrite(to_.GetPrefix16(), opcode, 0, to_);
|
||||
to_.xWriteImm(imm);
|
||||
}
|
||||
else
|
||||
{
|
||||
xOpWrite(to.GetPrefix16(), 0xc7, 0, to);
|
||||
to.xWriteImm(imm);
|
||||
}
|
||||
}
|
||||
|
||||
const xImpl_Mov xMOV;
|
||||
|
||||
void xImpl_MovImm64::operator()(const xRegister64& to, s64 imm, bool preserve_flags) const
|
||||
{
|
||||
if (imm == (u32)imm || imm == (s32)imm)
|
||||
{
|
||||
xMOV(to, imm, preserve_flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
u8 opcode = 0xb8 | to.Id;
|
||||
xOpAccWrite(to.GetPrefix16(), opcode, 0, to);
|
||||
xWrite64(imm);
|
||||
}
|
||||
}
|
||||
|
||||
const xImpl_MovImm64 xMOV64;
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// CMOVcc
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
||||
#define ccSane() pxAssertMsg(ccType >= 0 && ccType <= 0x0f, "Invalid comparison type specifier.")
|
||||
|
||||
// Macro useful for trapping unwanted use of EBP.
|
||||
//#define EbpAssert() pxAssert( to != ebp )
|
||||
#define EbpAssert()
|
||||
|
||||
|
||||
|
||||
void xImpl_CMov::operator()(const xRegister16or32or64& to, const xRegister16or32or64& from) const
|
||||
{
|
||||
pxAssert(to->GetOperandSize() == from->GetOperandSize());
|
||||
ccSane();
|
||||
xOpWrite0F(to->GetPrefix16(), 0x40 | ccType, to, from);
|
||||
}
|
||||
|
||||
void xImpl_CMov::operator()(const xRegister16or32or64& to, const xIndirectVoid& sibsrc) const
|
||||
{
|
||||
ccSane();
|
||||
xOpWrite0F(to->GetPrefix16(), 0x40 | ccType, to, sibsrc);
|
||||
}
|
||||
|
||||
//void xImpl_CMov::operator()( const xDirectOrIndirect32& to, const xDirectOrIndirect32& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }
|
||||
//void xImpl_CMov::operator()( const xDirectOrIndirect16& to, const xDirectOrIndirect16& from ) const { ccSane(); _DoI_helpermess( *this, to, from ); }
|
||||
|
||||
void xImpl_Set::operator()(const xRegister8& to) const
|
||||
{
|
||||
ccSane();
|
||||
xOpWrite0F(0x90 | ccType, 0, to);
|
||||
}
|
||||
void xImpl_Set::operator()(const xIndirect8& dest) const
|
||||
{
|
||||
ccSane();
|
||||
xOpWrite0F(0x90 | ccType, 0, dest);
|
||||
}
|
||||
//void xImpl_Set::operator()( const xDirectOrIndirect8& dest ) const { ccSane(); _DoI_helpermess( *this, dest ); }
|
||||
|
||||
void xImpl_MovExtend::operator()(const xRegister16or32or64& to, const xRegister8& from) const
|
||||
{
|
||||
EbpAssert();
|
||||
xOpWrite0F(
|
||||
(to->GetOperandSize() == 2) ? 0x66 : 0,
|
||||
SignExtend ? 0xbe : 0xb6,
|
||||
to, from);
|
||||
}
|
||||
|
||||
void xImpl_MovExtend::operator()(const xRegister16or32or64& to, const xIndirect8& sibsrc) const
|
||||
{
|
||||
EbpAssert();
|
||||
xOpWrite0F(
|
||||
(to->GetOperandSize() == 2) ? 0x66 : 0,
|
||||
SignExtend ? 0xbe : 0xb6,
|
||||
to, sibsrc);
|
||||
}
|
||||
|
||||
void xImpl_MovExtend::operator()(const xRegister32or64& to, const xRegister16& from) const
|
||||
{
|
||||
EbpAssert();
|
||||
xOpWrite0F(SignExtend ? 0xbf : 0xb7, to, from);
|
||||
}
|
||||
|
||||
void xImpl_MovExtend::operator()(const xRegister32or64& to, const xIndirect16& sibsrc) const
|
||||
{
|
||||
EbpAssert();
|
||||
xOpWrite0F(SignExtend ? 0xbf : 0xb7, to, sibsrc);
|
||||
}
|
||||
|
||||
void xImpl_MovExtend::operator()(const xRegister64& to, const xRegister32& from) const
|
||||
{
|
||||
EbpAssert();
|
||||
pxAssertMsg(SignExtend, "Use mov for 64-bit movzx");
|
||||
xOpWrite(0, 0x63, to, from);
|
||||
}
|
||||
|
||||
void xImpl_MovExtend::operator()(const xRegister64& to, const xIndirect32& sibsrc) const
|
||||
{
|
||||
EbpAssert();
|
||||
pxAssertMsg(SignExtend, "Use mov for 64-bit movzx");
|
||||
xOpWrite(0, 0x63, to, sibsrc);
|
||||
}
|
||||
|
||||
const xImpl_MovExtend xMOVSX = {true};
|
||||
const xImpl_MovExtend xMOVZX = {false};
|
||||
|
||||
const xImpl_CMov xCMOVA = {Jcc_Above};
|
||||
const xImpl_CMov xCMOVAE = {Jcc_AboveOrEqual};
|
||||
const xImpl_CMov xCMOVB = {Jcc_Below};
|
||||
const xImpl_CMov xCMOVBE = {Jcc_BelowOrEqual};
|
||||
|
||||
const xImpl_CMov xCMOVG = {Jcc_Greater};
|
||||
const xImpl_CMov xCMOVGE = {Jcc_GreaterOrEqual};
|
||||
const xImpl_CMov xCMOVL = {Jcc_Less};
|
||||
const xImpl_CMov xCMOVLE = {Jcc_LessOrEqual};
|
||||
|
||||
const xImpl_CMov xCMOVZ = {Jcc_Zero};
|
||||
const xImpl_CMov xCMOVE = {Jcc_Equal};
|
||||
const xImpl_CMov xCMOVNZ = {Jcc_NotZero};
|
||||
const xImpl_CMov xCMOVNE = {Jcc_NotEqual};
|
||||
|
||||
const xImpl_CMov xCMOVO = {Jcc_Overflow};
|
||||
const xImpl_CMov xCMOVNO = {Jcc_NotOverflow};
|
||||
const xImpl_CMov xCMOVC = {Jcc_Carry};
|
||||
const xImpl_CMov xCMOVNC = {Jcc_NotCarry};
|
||||
|
||||
const xImpl_CMov xCMOVS = {Jcc_Signed};
|
||||
const xImpl_CMov xCMOVNS = {Jcc_Unsigned};
|
||||
const xImpl_CMov xCMOVPE = {Jcc_ParityEven};
|
||||
const xImpl_CMov xCMOVPO = {Jcc_ParityOdd};
|
||||
|
||||
|
||||
const xImpl_Set xSETA = {Jcc_Above};
|
||||
const xImpl_Set xSETAE = {Jcc_AboveOrEqual};
|
||||
const xImpl_Set xSETB = {Jcc_Below};
|
||||
const xImpl_Set xSETBE = {Jcc_BelowOrEqual};
|
||||
|
||||
const xImpl_Set xSETG = {Jcc_Greater};
|
||||
const xImpl_Set xSETGE = {Jcc_GreaterOrEqual};
|
||||
const xImpl_Set xSETL = {Jcc_Less};
|
||||
const xImpl_Set xSETLE = {Jcc_LessOrEqual};
|
||||
|
||||
const xImpl_Set xSETZ = {Jcc_Zero};
|
||||
const xImpl_Set xSETE = {Jcc_Equal};
|
||||
const xImpl_Set xSETNZ = {Jcc_NotZero};
|
||||
const xImpl_Set xSETNE = {Jcc_NotEqual};
|
||||
|
||||
const xImpl_Set xSETO = {Jcc_Overflow};
|
||||
const xImpl_Set xSETNO = {Jcc_NotOverflow};
|
||||
const xImpl_Set xSETC = {Jcc_Carry};
|
||||
const xImpl_Set xSETNC = {Jcc_NotCarry};
|
||||
|
||||
const xImpl_Set xSETS = {Jcc_Signed};
|
||||
const xImpl_Set xSETNS = {Jcc_Unsigned};
|
||||
const xImpl_Set xSETPE = {Jcc_ParityEven};
|
||||
const xImpl_Set xSETPO = {Jcc_ParityOdd};
|
||||
|
||||
} // end namespace x86Emitter
|
||||
732
common/emitter/simd.cpp
Normal file
732
common/emitter/simd.cpp
Normal file
@@ -0,0 +1,732 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#include "common/emitter/internal.h"
|
||||
#include "common/VectorIntrin.h"
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is
|
||||
// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4
|
||||
// instructions). Any other lower value assumes the upper value is 0 and ignored.
|
||||
// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will
|
||||
// generate an assertion.
|
||||
//
|
||||
__emitinline void SimdPrefix(u8 prefix, u16 opcode)
|
||||
{
|
||||
pxAssertMsg(prefix == 0, "REX prefix must be just before the opcode");
|
||||
|
||||
const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a);
|
||||
|
||||
// If the lower byte is not a valid prefix and the upper byte is non-zero it
|
||||
// means we made a mistake!
|
||||
if (!is16BitOpcode)
|
||||
pxAssert((opcode >> 8) == 0);
|
||||
|
||||
if (prefix != 0)
|
||||
{
|
||||
if (is16BitOpcode)
|
||||
xWrite32((opcode << 16) | 0x0f00 | prefix);
|
||||
else
|
||||
{
|
||||
xWrite16(0x0f00 | prefix);
|
||||
xWrite8(opcode);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is16BitOpcode)
|
||||
{
|
||||
xWrite8(0x0f);
|
||||
xWrite16(opcode);
|
||||
}
|
||||
else
|
||||
xWrite16((opcode << 8) | 0x0f);
|
||||
}
|
||||
}
|
||||
|
||||
const xImplSimd_DestRegEither xPAND = {0x66, 0xdb};
|
||||
const xImplSimd_DestRegEither xPANDN = {0x66, 0xdf};
|
||||
const xImplSimd_DestRegEither xPOR = {0x66, 0xeb};
|
||||
const xImplSimd_DestRegEither xPXOR = {0x66, 0xef};
|
||||
|
||||
// [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag
|
||||
// only if all bits in the result are 0. PTEST also sets the CF flag according
|
||||
// to the following condition: (xmm2/m128 AND NOT xmm1) == 0;
|
||||
const xImplSimd_DestRegSSE xPTEST = {0x66, 0x1738};
|
||||
|
||||
// =====================================================================================================
|
||||
// SSE Conversion Operations, as looney as they are.
|
||||
// =====================================================================================================
|
||||
// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing
|
||||
// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]).
|
||||
//
|
||||
|
||||
__fi void xCVTDQ2PD(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0xe6); }
|
||||
__fi void xCVTDQ2PD(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0xf3, 0xe6); }
|
||||
__fi void xCVTDQ2PS(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x00, 0x5b); }
|
||||
__fi void xCVTDQ2PS(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0x00, 0x5b); }
|
||||
|
||||
__fi void xCVTPD2DQ(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf2, 0xe6); }
|
||||
__fi void xCVTPD2DQ(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0xf2, 0xe6); }
|
||||
__fi void xCVTPD2PS(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x66, 0x5a); }
|
||||
__fi void xCVTPD2PS(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0x66, 0x5a); }
|
||||
|
||||
__fi void xCVTPI2PD(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0x66, 0x2a); }
|
||||
__fi void xCVTPI2PS(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0x00, 0x2a); }
|
||||
|
||||
__fi void xCVTPS2DQ(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x66, 0x5b); }
|
||||
__fi void xCVTPS2DQ(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0x66, 0x5b); }
|
||||
__fi void xCVTPS2PD(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x00, 0x5a); }
|
||||
__fi void xCVTPS2PD(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0x00, 0x5a); }
|
||||
|
||||
__fi void xCVTSD2SI(const xRegister32or64& to, const xRegisterSSE& from) { OpWriteSSE(0xf2, 0x2d); }
|
||||
__fi void xCVTSD2SI(const xRegister32or64& to, const xIndirect64& from) { OpWriteSSE(0xf2, 0x2d); }
|
||||
__fi void xCVTSD2SS(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf2, 0x5a); }
|
||||
__fi void xCVTSD2SS(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0xf2, 0x5a); }
|
||||
__fi void xCVTSI2SS(const xRegisterSSE& to, const xRegister32or64& from) { OpWriteSSE(0xf3, 0x2a); }
|
||||
__fi void xCVTSI2SS(const xRegisterSSE& to, const xIndirect32& from) { OpWriteSSE(0xf3, 0x2a); }
|
||||
|
||||
__fi void xCVTSS2SD(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0x5a); }
|
||||
__fi void xCVTSS2SD(const xRegisterSSE& to, const xIndirect32& from) { OpWriteSSE(0xf3, 0x5a); }
|
||||
__fi void xCVTSS2SI(const xRegister32or64& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0x2d); }
|
||||
__fi void xCVTSS2SI(const xRegister32or64& to, const xIndirect32& from) { OpWriteSSE(0xf3, 0x2d); }
|
||||
|
||||
__fi void xCVTTPD2DQ(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x66, 0xe6); }
|
||||
__fi void xCVTTPD2DQ(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0x66, 0xe6); }
|
||||
__fi void xCVTTPS2DQ(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0x5b); }
|
||||
__fi void xCVTTPS2DQ(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0xf3, 0x5b); }
|
||||
|
||||
__fi void xCVTTSD2SI(const xRegister32or64& to, const xRegisterSSE& from) { OpWriteSSE(0xf2, 0x2c); }
|
||||
__fi void xCVTTSD2SI(const xRegister32or64& to, const xIndirect64& from) { OpWriteSSE(0xf2, 0x2c); }
|
||||
__fi void xCVTTSS2SI(const xRegister32or64& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0x2c); }
|
||||
__fi void xCVTTSS2SI(const xRegister32or64& to, const xIndirect32& from) { OpWriteSSE(0xf3, 0x2c); }
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
|
||||
void xImplSimd_DestRegImmSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm) const { xOpWrite0F(Prefix, Opcode, to, from, imm); }
|
||||
void xImplSimd_DestRegImmSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm) const { xOpWrite0F(Prefix, Opcode, to, from, imm); }
|
||||
|
||||
|
||||
void xImplSimd_DestRegEither::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
void xImplSimd_DestRegEither::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
|
||||
|
||||
void xImplSimd_DestSSE_CmpImm::operator()(const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType imm) const { xOpWrite0F(Prefix, Opcode, to, from, imm); }
|
||||
void xImplSimd_DestSSE_CmpImm::operator()(const xRegisterSSE& to, const xIndirectVoid& from, SSE2_ComparisonType imm) const { xOpWrite0F(Prefix, Opcode, to, from, imm); }
|
||||
|
||||
// =====================================================================================================
|
||||
// SIMD Arithmetic Instructions
|
||||
// =====================================================================================================
|
||||
|
||||
void _SimdShiftHelper::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
void _SimdShiftHelper::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); }
|
||||
|
||||
|
||||
void _SimdShiftHelper::operator()(const xRegisterSSE& to, u8 imm8) const
|
||||
{
|
||||
xOpWrite0F(0x66, OpcodeImm, (int)Modcode, to);
|
||||
xWrite8(imm8);
|
||||
}
|
||||
|
||||
void xImplSimd_Shift::DQ(const xRegisterSSE& to, u8 imm8) const
|
||||
{
|
||||
xOpWrite0F(0x66, 0x73, (int)Q.Modcode + 1, to, imm8);
|
||||
}
|
||||
|
||||
|
||||
const xImplSimd_ShiftWithoutQ xPSRA =
|
||||
{
|
||||
{0x66, 0xe1, 0x71, 4}, // W
|
||||
{0x66, 0xe2, 0x72, 4} // D
|
||||
};
|
||||
|
||||
const xImplSimd_Shift xPSRL =
|
||||
{
|
||||
{0x66, 0xd1, 0x71, 2}, // W
|
||||
{0x66, 0xd2, 0x72, 2}, // D
|
||||
{0x66, 0xd3, 0x73, 2}, // Q
|
||||
};
|
||||
|
||||
const xImplSimd_Shift xPSLL =
|
||||
{
|
||||
{0x66, 0xf1, 0x71, 6}, // W
|
||||
{0x66, 0xf2, 0x72, 6}, // D
|
||||
{0x66, 0xf3, 0x73, 6}, // Q
|
||||
};
|
||||
|
||||
const xImplSimd_AddSub xPADD =
|
||||
{
|
||||
{0x66, 0xdc + 0x20}, // B
|
||||
{0x66, 0xdc + 0x21}, // W
|
||||
{0x66, 0xdc + 0x22}, // D
|
||||
{0x66, 0xd4}, // Q
|
||||
|
||||
{0x66, 0xdc + 0x10}, // SB
|
||||
{0x66, 0xdc + 0x11}, // SW
|
||||
{0x66, 0xdc}, // USB
|
||||
{0x66, 0xdc + 1}, // USW
|
||||
};
|
||||
|
||||
const xImplSimd_AddSub xPSUB =
|
||||
{
|
||||
{0x66, 0xd8 + 0x20}, // B
|
||||
{0x66, 0xd8 + 0x21}, // W
|
||||
{0x66, 0xd8 + 0x22}, // D
|
||||
{0x66, 0xfb}, // Q
|
||||
|
||||
{0x66, 0xd8 + 0x10}, // SB
|
||||
{0x66, 0xd8 + 0x11}, // SW
|
||||
{0x66, 0xd8}, // USB
|
||||
{0x66, 0xd8 + 1}, // USW
|
||||
};
|
||||
|
||||
const xImplSimd_PMul xPMUL =
|
||||
{
|
||||
{0x66, 0xd5}, // LW
|
||||
{0x66, 0xe5}, // HW
|
||||
{0x66, 0xe4}, // HUW
|
||||
{0x66, 0xf4}, // UDQ
|
||||
|
||||
{0x66, 0x0b38}, // HRSW
|
||||
{0x66, 0x4038}, // LD
|
||||
{0x66, 0x2838}, // DQ
|
||||
};
|
||||
|
||||
const xImplSimd_rSqrt xRSQRT =
|
||||
{
|
||||
{0x00, 0x52}, // PS
|
||||
{0xf3, 0x52} // SS
|
||||
};
|
||||
|
||||
const xImplSimd_rSqrt xRCP =
|
||||
{
|
||||
{0x00, 0x53}, // PS
|
||||
{0xf3, 0x53} // SS
|
||||
};
|
||||
|
||||
const xImplSimd_Sqrt xSQRT =
|
||||
{
|
||||
{0x00, 0x51}, // PS
|
||||
{0xf3, 0x51}, // SS
|
||||
{0xf2, 0x51} // SS
|
||||
};
|
||||
|
||||
const xImplSimd_AndNot xANDN =
|
||||
{
|
||||
{0x00, 0x55}, // PS
|
||||
{0x66, 0x55} // PD
|
||||
};
|
||||
|
||||
const xImplSimd_PAbsolute xPABS =
|
||||
{
|
||||
{0x66, 0x1c38}, // B
|
||||
{0x66, 0x1d38}, // W
|
||||
{0x66, 0x1e38} // D
|
||||
};
|
||||
|
||||
const xImplSimd_PSign xPSIGN =
|
||||
{
|
||||
{0x66, 0x0838}, // B
|
||||
{0x66, 0x0938}, // W
|
||||
{0x66, 0x0a38}, // D
|
||||
};
|
||||
|
||||
const xImplSimd_PMultAdd xPMADD =
|
||||
{
|
||||
{0x66, 0xf5}, // WD
|
||||
{0x66, 0xf438}, // UBSW
|
||||
};
|
||||
|
||||
const xImplSimd_HorizAdd xHADD =
|
||||
{
|
||||
{0xf2, 0x7c}, // PS
|
||||
{0x66, 0x7c}, // PD
|
||||
};
|
||||
|
||||
const xImplSimd_DotProduct xDP =
|
||||
{
|
||||
{0x66, 0x403a}, // PS
|
||||
{0x66, 0x413a}, // PD
|
||||
};
|
||||
|
||||
const xImplSimd_Round xROUND =
|
||||
{
|
||||
{0x66, 0x083a}, // PS
|
||||
{0x66, 0x093a}, // PD
|
||||
{0x66, 0x0a3a}, // SS
|
||||
{0x66, 0x0b3a}, // SD
|
||||
};
|
||||
|
||||
// =====================================================================================================
|
||||
// SIMD Comparison Instructions
|
||||
// =====================================================================================================
|
||||
|
||||
void xImplSimd_Compare::PS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x00, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::PS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x00, 0xc2, to, from, (u8)CType); }
|
||||
|
||||
void xImplSimd_Compare::PD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x66, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::PD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x66, 0xc2, to, from, (u8)CType); }
|
||||
|
||||
void xImplSimd_Compare::SS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0xf3, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::SS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0xf3, 0xc2, to, from, (u8)CType); }
|
||||
|
||||
void xImplSimd_Compare::SD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0xf2, 0xc2, to, from, (u8)CType); }
|
||||
void xImplSimd_Compare::SD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0xf2, 0xc2, to, from, (u8)CType); }
|
||||
|
||||
const xImplSimd_MinMax xMIN =
|
||||
{
|
||||
{0x00, 0x5d}, // PS
|
||||
{0x66, 0x5d}, // PD
|
||||
{0xf3, 0x5d}, // SS
|
||||
{0xf2, 0x5d}, // SD
|
||||
};
|
||||
|
||||
const xImplSimd_MinMax xMAX =
|
||||
{
|
||||
{0x00, 0x5f}, // PS
|
||||
{0x66, 0x5f}, // PD
|
||||
{0xf3, 0x5f}, // SS
|
||||
{0xf2, 0x5f}, // SD
|
||||
};
|
||||
|
||||
// [TODO] : Merge this into the xCMP class, so that they are notation as: xCMP.EQ
|
||||
|
||||
const xImplSimd_Compare xCMPEQ = {SSE2_Equal};
|
||||
const xImplSimd_Compare xCMPLT = {SSE2_Less};
|
||||
const xImplSimd_Compare xCMPLE = {SSE2_LessOrEqual};
|
||||
const xImplSimd_Compare xCMPUNORD = {SSE2_LessOrEqual};
|
||||
const xImplSimd_Compare xCMPNE = {SSE2_NotEqual};
|
||||
const xImplSimd_Compare xCMPNLT = {SSE2_NotLess};
|
||||
const xImplSimd_Compare xCMPNLE = {SSE2_NotLessOrEqual};
|
||||
const xImplSimd_Compare xCMPORD = {SSE2_Ordered};
|
||||
|
||||
const xImplSimd_COMI xCOMI =
|
||||
{
|
||||
{0x00, 0x2f}, // SS
|
||||
{0x66, 0x2f}, // SD
|
||||
};
|
||||
|
||||
const xImplSimd_COMI xUCOMI =
|
||||
{
|
||||
{0x00, 0x2e}, // SS
|
||||
{0x66, 0x2e}, // SD
|
||||
};
|
||||
|
||||
const xImplSimd_PCompare xPCMP =
|
||||
{
|
||||
{0x66, 0x74}, // EQB
|
||||
{0x66, 0x75}, // EQW
|
||||
{0x66, 0x76}, // EQD
|
||||
|
||||
{0x66, 0x64}, // GTB
|
||||
{0x66, 0x65}, // GTW
|
||||
{0x66, 0x66}, // GTD
|
||||
};
|
||||
|
||||
const xImplSimd_PMinMax xPMIN =
|
||||
{
|
||||
{0x66, 0xda}, // UB
|
||||
{0x66, 0xea}, // SW
|
||||
{0x66, 0x3838}, // SB
|
||||
{0x66, 0x3938}, // SD
|
||||
|
||||
{0x66, 0x3a38}, // UW
|
||||
{0x66, 0x3b38}, // UD
|
||||
};
|
||||
|
||||
const xImplSimd_PMinMax xPMAX =
|
||||
{
|
||||
{0x66, 0xde}, // UB
|
||||
{0x66, 0xee}, // SW
|
||||
{0x66, 0x3c38}, // SB
|
||||
{0x66, 0x3d38}, // SD
|
||||
|
||||
{0x66, 0x3e38}, // UW
|
||||
{0x66, 0x3f38}, // UD
|
||||
};
|
||||
|
||||
// =====================================================================================================
|
||||
// SIMD Shuffle/Pack (Shuffle puck?)
|
||||
// =====================================================================================================
|
||||
|
||||
__fi void xImplSimd_Shuffle::_selector_assertion_check(u8 selector) const
|
||||
{
|
||||
pxAssertMsg((selector & ~3) == 0,
|
||||
"Invalid immediate operand on SSE Shuffle: Upper 6 bits of the SSE Shuffle-PD Selector are reserved and must be zero.");
|
||||
}
|
||||
|
||||
void xImplSimd_Shuffle::PS(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const
|
||||
{
|
||||
xOpWrite0F(0xc6, to, from, selector);
|
||||
}
|
||||
|
||||
void xImplSimd_Shuffle::PS(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const
|
||||
{
|
||||
xOpWrite0F(0xc6, to, from, selector);
|
||||
}
|
||||
|
||||
void xImplSimd_Shuffle::PD(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const
|
||||
{
|
||||
_selector_assertion_check(selector);
|
||||
xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3);
|
||||
}
|
||||
|
||||
void xImplSimd_Shuffle::PD(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const
|
||||
{
|
||||
_selector_assertion_check(selector);
|
||||
xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3);
|
||||
}
|
||||
|
||||
void xImplSimd_PInsert::B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::B(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
|
||||
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xIndirect64& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, from, to, imm8); }
|
||||
void SimdImpl_PExtract::B(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, from, dest, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0xc5, from, to, imm8); }
|
||||
void SimdImpl_PExtract::W(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x153a, from, dest, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, to, imm8); }
|
||||
void SimdImpl_PExtract::D(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, to, imm8); }
|
||||
void SimdImpl_PExtract::Q(const xIndirect64& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
|
||||
|
||||
const xImplSimd_Shuffle xSHUF = {};
|
||||
|
||||
const xImplSimd_PShuffle xPSHUF =
|
||||
{
|
||||
{0x66, 0x70}, // D
|
||||
{0xf2, 0x70}, // LW
|
||||
{0xf3, 0x70}, // HW
|
||||
|
||||
{0x66, 0x0038}, // B
|
||||
};
|
||||
|
||||
const SimdImpl_PUnpack xPUNPCK =
|
||||
{
|
||||
{0x66, 0x60}, // LBW
|
||||
{0x66, 0x61}, // LWD
|
||||
{0x66, 0x62}, // LDQ
|
||||
{0x66, 0x6c}, // LQDQ
|
||||
|
||||
{0x66, 0x68}, // HBW
|
||||
{0x66, 0x69}, // HWD
|
||||
{0x66, 0x6a}, // HDQ
|
||||
{0x66, 0x6d}, // HQDQ
|
||||
};
|
||||
|
||||
const SimdImpl_Pack xPACK =
|
||||
{
|
||||
{0x66, 0x63}, // SSWB
|
||||
{0x66, 0x6b}, // SSDW
|
||||
{0x66, 0x67}, // USWB
|
||||
{0x66, 0x2b38}, // USDW
|
||||
};
|
||||
|
||||
const xImplSimd_Unpack xUNPCK =
|
||||
{
|
||||
{0x00, 0x15}, // HPS
|
||||
{0x66, 0x15}, // HPD
|
||||
{0x00, 0x14}, // LPS
|
||||
{0x66, 0x14}, // LPD
|
||||
};
|
||||
|
||||
const xImplSimd_PInsert xPINSR;
|
||||
const SimdImpl_PExtract xPEXTR;
|
||||
|
||||
// =====================================================================================================
|
||||
// SIMD Move And Blend Instructions
|
||||
// =====================================================================================================
|
||||
|
||||
void xImplSimd_MovHL::PS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(Opcode, to, from); }
|
||||
void xImplSimd_MovHL::PS(const xIndirectVoid& to, const xRegisterSSE& from) const { xOpWrite0F(Opcode + 1, from, to); }
|
||||
|
||||
void xImplSimd_MovHL::PD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x66, Opcode, to, from); }
|
||||
void xImplSimd_MovHL::PD(const xIndirectVoid& to, const xRegisterSSE& from) const { xOpWrite0F(0x66, Opcode + 1, from, to); }
|
||||
|
||||
void xImplSimd_MovHL_RtoR::PS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(Opcode, to, from); }
|
||||
void xImplSimd_MovHL_RtoR::PD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x66, Opcode, to, from); }
|
||||
|
||||
static const u16 MovPS_OpAligned = 0x28; // Aligned [aps] form
|
||||
static const u16 MovPS_OpUnaligned = 0x10; // unaligned [ups] form
|
||||
|
||||
void xImplSimd_MoveSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const
|
||||
{
|
||||
if (to != from)
|
||||
xOpWrite0F(Prefix, MovPS_OpAligned, to, from);
|
||||
}
|
||||
|
||||
void xImplSimd_MoveSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || (((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty());
|
||||
|
||||
xOpWrite0F(Prefix, isReallyAligned ? MovPS_OpAligned : MovPS_OpUnaligned, to, from);
|
||||
}
|
||||
|
||||
void xImplSimd_MoveSSE::operator()(const xIndirectVoid& to, const xRegisterSSE& from) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ((to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty());
|
||||
xOpWrite0F(Prefix, isReallyAligned ? MovPS_OpAligned + 1 : MovPS_OpUnaligned + 1, from, to);
|
||||
}
|
||||
|
||||
static const u8 MovDQ_PrefixAligned = 0x66; // Aligned [dqa] form
|
||||
static const u8 MovDQ_PrefixUnaligned = 0xf3; // unaligned [dqu] form
|
||||
|
||||
void xImplSimd_MoveDQ::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const
|
||||
{
|
||||
if (to != from)
|
||||
xOpWrite0F(MovDQ_PrefixAligned, 0x6f, to, from);
|
||||
}
|
||||
|
||||
void xImplSimd_MoveDQ::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ((from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty());
|
||||
xOpWrite0F(isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x6f, to, from);
|
||||
}
|
||||
|
||||
void xImplSimd_MoveDQ::operator()(const xIndirectVoid& to, const xRegisterSSE& from) const
|
||||
{
|
||||
// ModSib form is aligned if it's displacement-only and the displacement is aligned:
|
||||
bool isReallyAligned = isAligned || ((to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty());
|
||||
|
||||
// use opcode 0x7f : alternate ModRM encoding (reverse src/dst)
|
||||
xOpWrite0F(isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x7f, from, to);
|
||||
}
|
||||
|
||||
void xImplSimd_PMove::BW(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase); }
|
||||
void xImplSimd_PMove::BW(const xRegisterSSE& to, const xIndirect64& from) const { OpWriteSSE(0x66, OpcodeBase); }
|
||||
|
||||
void xImplSimd_PMove::BD(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x100); }
|
||||
void xImplSimd_PMove::BD(const xRegisterSSE& to, const xIndirect32& from) const { OpWriteSSE(0x66, OpcodeBase + 0x100); }
|
||||
|
||||
void xImplSimd_PMove::BQ(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x200); }
|
||||
void xImplSimd_PMove::BQ(const xRegisterSSE& to, const xIndirect16& from) const { OpWriteSSE(0x66, OpcodeBase + 0x200); }
|
||||
|
||||
void xImplSimd_PMove::WD(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x300); }
|
||||
void xImplSimd_PMove::WD(const xRegisterSSE& to, const xIndirect64& from) const { OpWriteSSE(0x66, OpcodeBase + 0x300); }
|
||||
|
||||
void xImplSimd_PMove::WQ(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x400); }
|
||||
void xImplSimd_PMove::WQ(const xRegisterSSE& to, const xIndirect32& from) const { OpWriteSSE(0x66, OpcodeBase + 0x400); }
|
||||
|
||||
void xImplSimd_PMove::DQ(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x500); }
|
||||
void xImplSimd_PMove::DQ(const xRegisterSSE& to, const xIndirect64& from) const { OpWriteSSE(0x66, OpcodeBase + 0x500); }
|
||||
|
||||
|
||||
const xImplSimd_MoveSSE xMOVAPS = {0x00, true};
|
||||
const xImplSimd_MoveSSE xMOVUPS = {0x00, false};
|
||||
|
||||
#ifdef ALWAYS_USE_MOVAPS
|
||||
const xImplSimd_MoveSSE xMOVDQA = {0x00, true};
|
||||
const xImplSimd_MoveSSE xMOVAPD = {0x00, true};
|
||||
|
||||
const xImplSimd_MoveSSE xMOVDQU = {0x00, false};
|
||||
const xImplSimd_MoveSSE xMOVUPD = {0x00, false};
|
||||
#else
|
||||
const xImplSimd_MoveDQ xMOVDQA = {0x66, true};
|
||||
const xImplSimd_MoveSSE xMOVAPD = {0x66, true};
|
||||
|
||||
const xImplSimd_MoveDQ xMOVDQU = {0xf3, false};
|
||||
const xImplSimd_MoveSSE xMOVUPD = {0x66, false};
|
||||
#endif
|
||||
|
||||
|
||||
const xImplSimd_MovHL xMOVH = {0x16};
|
||||
const xImplSimd_MovHL xMOVL = {0x12};
|
||||
|
||||
const xImplSimd_MovHL_RtoR xMOVLH = {0x16};
|
||||
const xImplSimd_MovHL_RtoR xMOVHL = {0x12};
|
||||
|
||||
const xImplSimd_PBlend xPBLEND =
|
||||
{
|
||||
{0x66, 0x0e3a}, // W
|
||||
{0x66, 0x1038}, // VB
|
||||
};
|
||||
|
||||
const xImplSimd_Blend xBLEND =
|
||||
{
|
||||
{0x66, 0x0c3a}, // PS
|
||||
{0x66, 0x0d3a}, // PD
|
||||
{0x66, 0x1438}, // VPS
|
||||
{0x66, 0x1538}, // VPD
|
||||
};
|
||||
|
||||
const xImplSimd_PMove xPMOVSX = {0x2038};
|
||||
const xImplSimd_PMove xPMOVZX = {0x3038};
|
||||
|
||||
// [SSE-3]
|
||||
const xImplSimd_DestRegSSE xMOVSLDUP = {0xf3, 0x12};
|
||||
|
||||
// [SSE-3]
|
||||
const xImplSimd_DestRegSSE xMOVSHDUP = {0xf3, 0x16};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MMX Mov Instructions (MOVD, MOVQ, MOVSS).
|
||||
//
|
||||
// Notes:
|
||||
// * Some of the functions have been renamed to more clearly reflect what they actually
|
||||
// do. Namely we've affixed "ZX" to several MOVs that take a register as a destination
|
||||
// since that's what they do (MOVD clears upper 32/96 bits, etc).
|
||||
//
|
||||
// * MOVD has valid forms for MMX and XMM registers.
|
||||
//
|
||||
|
||||
__fi void xMOVDZX(const xRegisterSSE& to, const xRegister32or64& from) { xOpWrite0F(0x66, 0x6e, to, from); }
|
||||
__fi void xMOVDZX(const xRegisterSSE& to, const xIndirectVoid& src) { xOpWrite0F(0x66, 0x6e, to, src); }
|
||||
|
||||
__fi void xMOVD(const xRegister32or64& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0x7e, from, to); }
|
||||
__fi void xMOVD(const xIndirectVoid& dest, const xRegisterSSE& from) { xOpWrite0F(0x66, 0x7e, from, dest); }
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__fi void xMOVQZX(const xRegisterSSE& to, const xRegisterSSE& from) { xOpWrite0F(0xf3, 0x7e, to, from); }
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__fi void xMOVQZX(const xRegisterSSE& to, const xIndirectVoid& src) { xOpWrite0F(0xf3, 0x7e, to, src); }
|
||||
|
||||
// Moves from XMM to XMM, with the *upper 64 bits* of the destination register
|
||||
// being cleared to zero.
|
||||
__fi void xMOVQZX(const xRegisterSSE& to, const void* src) { xOpWrite0F(0xf3, 0x7e, to, src); }
|
||||
|
||||
// Moves lower quad of XMM to ptr64 (no bits are cleared)
|
||||
__fi void xMOVQ(const xIndirectVoid& dest, const xRegisterSSE& from) { xOpWrite0F(0x66, 0xd6, from, dest); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
||||
#define IMPLEMENT_xMOVS(ssd, prefix) \
|
||||
__fi void xMOV##ssd(const xRegisterSSE& to, const xRegisterSSE& from) \
|
||||
{ \
|
||||
if (to != from) \
|
||||
xOpWrite0F(prefix, 0x10, to, from); \
|
||||
} \
|
||||
__fi void xMOV##ssd##ZX(const xRegisterSSE& to, const xIndirectVoid& from) { xOpWrite0F(prefix, 0x10, to, from); } \
|
||||
__fi void xMOV##ssd(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(prefix, 0x11, from, to); }
|
||||
|
||||
IMPLEMENT_xMOVS(SS, 0xf3)
|
||||
IMPLEMENT_xMOVS(SD, 0xf2)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Non-temporal movs only support a register as a target (ie, load form only, no stores)
|
||||
//
|
||||
|
||||
__fi void xMOVNTDQA(const xRegisterSSE& to, const xIndirectVoid& from)
|
||||
{
|
||||
xOpWrite0F(0x66, 0x2a38, to.Id, from);
|
||||
}
|
||||
|
||||
__fi void xMOVNTDQA(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0xe7, from, to); }
|
||||
|
||||
__fi void xMOVNTPD(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0x2b, from, to); }
|
||||
__fi void xMOVNTPS(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(0x2b, from, to); }
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
__fi void xMOVMSKPS(const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F(0x50, to, from); }
|
||||
__fi void xMOVMSKPD(const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0x50, to, from, true); }
|
||||
|
||||
// xMASKMOV:
|
||||
// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2.
|
||||
// The default memory location is specified by DS:EDI. The most significant bit in each byte
|
||||
// of the mask operand determines whether the corresponding byte in the source operand is
|
||||
// written to the corresponding byte location in memory.
|
||||
__fi void xMASKMOV(const xRegisterSSE& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0xf7, to, from); }
|
||||
|
||||
// xPMOVMSKB:
|
||||
// Creates a mask made up of the most significant bit of each byte of the source
|
||||
// operand and stores the result in the low byte or word of the destination operand.
|
||||
// Upper bits of the destination are cleared to zero.
|
||||
//
|
||||
// When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on
|
||||
// 128-bit (SSE) source, the byte mask is 16-bits.
|
||||
//
|
||||
__fi void xPMOVMSKB(const xRegister32or64& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0xd7, to, from); }
|
||||
|
||||
// [sSSE-3] Concatenates dest and source operands into an intermediate composite,
|
||||
// shifts the composite at byte granularity to the right by a constant immediate,
|
||||
// and extracts the right-aligned result into the destination.
|
||||
//
|
||||
__fi void xPALIGNR(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x0f3a, to, from, imm8); }
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// INSERTPS / EXTRACTPS [SSE4.1 only!]
|
||||
// --------------------------------------------------------------------------------------
|
||||
// [TODO] these might be served better as classes, especially if other instructions use
|
||||
// the M32,sse,imm form (I forget offhand if any do).
|
||||
|
||||
|
||||
// [SSE-4.1] Insert a single-precision floating-point value from src into a specified
|
||||
// location in dest, and selectively zero out the data elements in dest according to
|
||||
// the mask field in the immediate byte. The source operand can be a memory location
|
||||
// (32 bits) or an XMM register (lower 32 bits used).
|
||||
//
|
||||
// Imm8 provides three fields:
|
||||
// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if
|
||||
// the source is a memory operand.
|
||||
// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest.
|
||||
// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written
|
||||
// with 0.0 if set to 1.
|
||||
//
|
||||
__emitinline void xINSERTPS(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x213a, to, from, imm8); }
|
||||
__emitinline void xINSERTPS(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) { xOpWrite0F(0x66, 0x213a, to, from, imm8); }
|
||||
|
||||
// [SSE-4.1] Extract a single-precision floating-point value from src at an offset
|
||||
// determined by imm8[1-0]*32. The extracted single precision floating-point value
|
||||
// is stored into the low 32-bits of dest (or at a 32-bit memory pointer).
|
||||
//
|
||||
__emitinline void xEXTRACTPS(const xRegister32or64& to, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x173a, to, from, imm8); }
|
||||
__emitinline void xEXTRACTPS(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x173a, from, dest, imm8); }
|
||||
|
||||
|
||||
// =====================================================================================================
|
||||
// Ungrouped Instructions!
|
||||
// =====================================================================================================
|
||||
|
||||
|
||||
// Store Streaming SIMD Extension Control/Status to Mem32.
|
||||
__emitinline void xSTMXCSR(const xIndirect32& dest)
|
||||
{
|
||||
xOpWrite0F(0, 0xae, 3, dest);
|
||||
}
|
||||
|
||||
// Load Streaming SIMD Extension Control/Status from Mem32.
|
||||
__emitinline void xLDMXCSR(const xIndirect32& src)
|
||||
{
|
||||
xOpWrite0F(0, 0xae, 2, src);
|
||||
}
|
||||
|
||||
// Save x87 FPU, MMX Technology, and SSE State to buffer
|
||||
// Target buffer must be at least 512 bytes in length to hold the result.
|
||||
__emitinline void xFXSAVE(const xIndirectVoid& dest)
|
||||
{
|
||||
xOpWrite0F(0, 0xae, 0, dest);
|
||||
}
|
||||
|
||||
// Restore x87 FPU, MMX , XMM, and MXCSR State.
|
||||
// Source buffer should be 512 bytes in length.
|
||||
__emitinline void xFXRSTOR(const xIndirectVoid& src)
|
||||
{
|
||||
xOpWrite0F(0, 0xae, 1, src);
|
||||
}
|
||||
} // namespace x86Emitter
|
||||
1328
common/emitter/x86emitter.cpp
Normal file
1328
common/emitter/x86emitter.cpp
Normal file
File diff suppressed because it is too large
Load Diff
38
common/emitter/x86emitter.h
Normal file
38
common/emitter/x86emitter.h
Normal file
@@ -0,0 +1,38 @@
|
||||
// SPDX-FileCopyrightText: 2002-2025 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
/*
|
||||
* ix86 public header v0.9.1
|
||||
*
|
||||
* Original Authors (v0.6.2 and prior):
|
||||
* linuzappz <linuzappz@pcsx.net>
|
||||
* alexey silinov
|
||||
* goldfinger
|
||||
* zerofrog(@gmail.com)
|
||||
*
|
||||
* Authors of v0.9.1:
|
||||
* Jake.Stine(@gmail.com)
|
||||
* cottonvibes(@gmail.com)
|
||||
* sudonim(1@gmail.com)
|
||||
*/
|
||||
|
||||
// PCSX2's New C++ Emitter
|
||||
// --------------------------------------------------------------------------------------
|
||||
// To use it just include the x86Emitter namespace into your file/class/function off choice.
|
||||
//
|
||||
// This header file is intended for use by public code. It includes the appropriate
|
||||
// inlines and class definitions for efficient codegen. (code internal to the emitter
|
||||
// should usually use ix86_internal.h instead, and manually include the
|
||||
// ix86_inlines.inl file when it is known that inlining of ModSib functions are
|
||||
// wanted).
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/emitter/x86types.h"
|
||||
#include "common/emitter/instructions.h"
|
||||
|
||||
// Including legacy items for now, but these should be removed eventually,
|
||||
// once most code is no longer dependent on them.
|
||||
#include "common/emitter/legacy_types.h"
|
||||
#include "common/emitter/legacy_instructions.h"
|
||||
1078
common/emitter/x86types.h
Normal file
1078
common/emitter/x86types.h
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user