Merge branch 'misc'

These commits introduce context save and restore, and a small number of
optimizations that depend on their use for performance.
This commit is contained in:
MerryMage
2017-12-12 22:07:39 +00:00
22 changed files with 587 additions and 258 deletions

View File

@@ -29,6 +29,8 @@
namespace Dynarmic {
namespace BackendX64 {
using namespace Xbyak::util;
constexpr u64 f32_negative_zero = 0x80000000u;
constexpr u64 f32_nan = 0x7fc00000u;
constexpr u64 f32_non_sign_mask = 0x7fffffffu;
@@ -43,12 +45,10 @@ constexpr u64 f64_max_s32 = 0x41dfffffffc00000u; // 2147483647 as a double
constexpr u64 f64_min_u32 = 0x0000000000000000u; // 0 as a double
static Xbyak::Address MJitStateReg(Arm::Reg reg) {
using namespace Xbyak::util;
return dword[r15 + offsetof(JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
}
static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) {
using namespace Xbyak::util;
if (Arm::IsSingleExtReg(reg)) {
size_t index = static_cast<size_t>(reg) - static_cast<size_t>(Arm::ExtReg::S0);
return dword[r15 + offsetof(JitState, ExtReg) + sizeof(u32) * index];
@@ -60,11 +60,6 @@ static Xbyak::Address MJitStateExtReg(Arm::ExtReg reg) {
ASSERT_MSG(false, "Should never happen.");
}
static Xbyak::Address MJitStateCpsr() {
using namespace Xbyak::util;
return dword[r15 + offsetof(JitState, Cpsr)];
}
static void EraseInstruction(IR::Block& block, IR::Inst* inst) {
block.Instructions().erase(inst);
inst->Invalidate();
@@ -209,21 +204,87 @@ void EmitX64::EmitSetExtendedRegister64(RegAlloc& reg_alloc, IR::Block&, IR::Ins
}
}
static u32 GetCpsrImpl(JitState* jit_state) {
return jit_state->Cpsr();
}
void EmitX64::EmitGetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
reg_alloc.DefineValue(inst, result);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 c = reg_alloc.ScratchGpr().cvt32();
code->mov(c, dword[r15 + offsetof(JitState, CPSR_ge)]);
// Here we observe that CPSR_q and CPSR_nzcv are right next to each other in memory,
// so we load them both at the same time with one 64-bit read. This allows us to
// extract all of their bits together at once with one pext.
code->mov(result.cvt64(), qword[r15 + offsetof(JitState, CPSR_q)]);
code->mov(b.cvt64(), 0xF000000000000001ull);
code->pext(result.cvt64(), result.cvt64(), b.cvt64());
code->mov(b, 0x80808080);
code->pext(c.cvt64(), c.cvt64(), b.cvt64());
code->shl(result, 27);
code->shl(c, 16);
code->or_(result, c);
code->mov(b, 0x00000220);
code->mov(c, dword[r15 + offsetof(JitState, CPSR_et)]);
code->pdep(c.cvt64(), c.cvt64(), b.cvt64());
code->or_(result, dword[r15 + offsetof(JitState, CPSR_jaifm)]);
code->or_(result, c);
reg_alloc.DefineValue(inst, result);
} else {
reg_alloc.HostCall(inst);
code->mov(code->ABI_PARAM1, code->r15);
code->CallFunction(&GetCpsrImpl);
}
}
static void SetCpsrImpl(u32 value, JitState* jit_state) {
jit_state->SetCpsr(value);
}
void EmitX64::EmitSetCpsr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 arg = reg_alloc.UseGpr(args[0]).cvt32();
code->mov(MJitStateCpsr(), arg);
reg_alloc.HostCall(nullptr, args[0]);
code->mov(code->ABI_PARAM2, code->r15);
code->CallFunction(&SetCpsrImpl);
}
void EmitX64::EmitSetCpsrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
u32 imm = args[0].GetImmediateU32();
code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], u32(imm & 0xF0000000));
} else {
Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->and_(a, 0xF0000000);
code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], a);
}
}
void EmitX64::EmitSetCpsrNZCVQ(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
u32 imm = args[0].GetImmediateU32();
code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], u32(imm & 0xF0000000));
code->mov(code->byte[r15 + offsetof(JitState, CPSR_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
} else {
Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->bt(a, 27);
code->setc(code->byte[r15 + offsetof(JitState, CPSR_q)]);
code->and_(a, 0xF0000000);
code->mov(dword[r15 + offsetof(JitState, CPSR_nzcv)], a);
}
}
void EmitX64::EmitGetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
code->shr(result, 31);
reg_alloc.DefineValue(inst, result);
}
@@ -234,22 +295,22 @@ void EmitX64::EmitSetNFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code->or_(MJitStateCpsr(), flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask);
} else {
code->and_(MJitStateCpsr(), ~flag_mask);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
}
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store);
}
}
void EmitX64::EmitGetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
code->shr(result, 30);
code->and_(result, 1);
reg_alloc.DefineValue(inst, result);
@@ -261,22 +322,22 @@ void EmitX64::EmitSetZFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code->or_(MJitStateCpsr(), flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask);
} else {
code->and_(MJitStateCpsr(), ~flag_mask);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
}
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store);
}
}
void EmitX64::EmitGetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
code->shr(result, 29);
code->and_(result, 1);
reg_alloc.DefineValue(inst, result);
@@ -288,22 +349,22 @@ void EmitX64::EmitSetCFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code->or_(MJitStateCpsr(), flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask);
} else {
code->and_(MJitStateCpsr(), ~flag_mask);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
}
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store);
}
}
void EmitX64::EmitGetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, MJitStateCpsr());
code->mov(result, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
code->shr(result, 28);
code->and_(result, 1);
reg_alloc.DefineValue(inst, result);
@@ -315,85 +376,86 @@ void EmitX64::EmitSetVFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1()) {
code->or_(MJitStateCpsr(), flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], flag_mask);
} else {
code->and_(MJitStateCpsr(), ~flag_mask);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
}
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
code->and_(dword[r15 + offsetof(JitState, CPSR_nzcv)], ~flag_mask);
code->or_(dword[r15 + offsetof(JitState, CPSR_nzcv)], to_store);
}
}
void EmitX64::EmitOrQFlag(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
constexpr size_t flag_bit = 27;
constexpr u32 flag_mask = 1u << flag_bit;
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
if (args[0].GetImmediateU1())
code->or_(MJitStateCpsr(), flag_mask);
code->mov(dword[r15 + offsetof(JitState, CPSR_q)], 1);
} else {
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg8 to_store = reg_alloc.UseGpr(args[0]).cvt8();
code->shl(to_store, flag_bit);
code->or_(MJitStateCpsr(), to_store);
code->or_(code->byte[r15 + offsetof(JitState, CPSR_q)], to_store);
}
}
void EmitX64::EmitGetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 tmp;
if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
tmp = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp, 0x01010101);
}
code->mov(result, MJitStateCpsr());
code->shr(result, 16);
if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
code->pdep(result, result, tmp);
} else {
code->and_(result, 0xF);
code->imul(result, result, 0x00204081);
code->and_(result, 0x01010101);
}
code->imul(result, result, 0xFF);
Xbyak::Xmm result = reg_alloc.ScratchXmm();
code->movd(result, dword[r15 + offsetof(JitState, CPSR_ge)]);
reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitSetGEFlags(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
constexpr size_t flag_bit = 16;
constexpr u32 flag_mask = 0xFu << flag_bit;
auto args = reg_alloc.GetArgumentInfo(inst);
ASSERT(!args[0].IsImmediate());
Xbyak::Reg32 to_store = reg_alloc.UseScratchGpr(args[0]).cvt32();
if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
Xbyak::Reg32 tmp = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp, 0x80808080);
code->pext(to_store, to_store, tmp);
if (args[0].IsInXmm()) {
Xbyak::Xmm to_store = reg_alloc.UseXmm(args[0]);
code->movd(dword[r15 + offsetof(JitState, CPSR_ge)], to_store);
} else {
code->and_(to_store, 0x80808080);
code->imul(to_store, to_store, 0x00204081);
code->shr(to_store, 28);
Xbyak::Reg32 to_store = reg_alloc.UseGpr(args[0]).cvt32();
code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], to_store);
}
code->shl(to_store, flag_bit);
code->and_(MJitStateCpsr(), ~flag_mask);
code->or_(MJitStateCpsr(), to_store);
}
void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
void EmitX64::EmitSetGEFlagsCompressed(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
u32 imm = args[0].GetImmediateU32();
u32 ge = 0;
ge |= Common::Bit<19>(imm) ? 0xFF000000 : 0;
ge |= Common::Bit<18>(imm) ? 0x00FF0000 : 0;
ge |= Common::Bit<17>(imm) ? 0x0000FF00 : 0;
ge |= Common::Bit<16>(imm) ? 0x000000FF : 0;
code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], ge);
} else if (code->DoesCpuSupport(Xbyak::util::Cpu::tBMI2)) {
Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32();
Xbyak::Reg32 b = reg_alloc.ScratchGpr().cvt32();
code->mov(b, 0x01010101);
code->shr(a, 16);
code->pdep(a, a, b);
code->imul(a, a, 0xFF);
code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], a);
} else {
Xbyak::Reg32 a = reg_alloc.UseScratchGpr(args[0]).cvt32();
code->shr(a, 16);
code->and_(a, 0xF);
code->imul(a, a, 0x00204081);
code->and_(a, 0x01010101);
code->imul(a, a, 0xFF);
code->mov(dword[r15 + offsetof(JitState, CPSR_ge)], a);
}
}
void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block& block, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
auto& arg = args[0];
const u32 T_bit = 1 << 5;
// Pseudocode:
// if (new_pc & 1) {
// new_pc &= 0xFFFFFFFE;
@@ -402,42 +464,45 @@ void EmitX64::EmitBXWritePC(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
// new_pc &= 0xFFFFFFFC;
// cpsr.T = false;
// }
// We rely on the fact we disallow EFlag from changing within a block.
if (arg.IsImmediate()) {
u32 new_pc = arg.GetImmediateU32();
if (Common::Bit<0>(new_pc)) {
new_pc &= 0xFFFFFFFE;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
code->or_(MJitStateCpsr(), T_bit);
} else {
new_pc &= 0xFFFFFFFC;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
code->and_(MJitStateCpsr(), ~T_bit);
}
u32 mask = Common::Bit<0>(new_pc) ? 0xFFFFFFFE : 0xFFFFFFFC;
u32 et = 0;
et |= block.Location().EFlag() ? 2 : 0;
et |= Common::Bit<0>(new_pc) ? 1 : 0;
code->mov(MJitStateReg(Arm::Reg::PC), new_pc & mask);
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et);
} else {
using Xbyak::util::ptr;
if (block.Location().EFlag()) {
Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32();
Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 et = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg64 new_pc = reg_alloc.UseScratchGpr(arg);
Xbyak::Reg64 tmp1 = reg_alloc.ScratchGpr();
Xbyak::Reg64 tmp2 = reg_alloc.ScratchGpr();
code->mov(mask, new_pc);
code->and_(mask, 1);
code->lea(et, ptr[mask.cvt64() + 2]);
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], et);
code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->and_(new_pc, mask);
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
} else {
Xbyak::Reg32 new_pc = reg_alloc.UseScratchGpr(arg).cvt32();
Xbyak::Reg32 mask = reg_alloc.ScratchGpr().cvt32();
code->mov(tmp1, MJitStateCpsr());
code->mov(tmp2, tmp1);
code->and_(tmp2, u32(~T_bit)); // CPSR.T = 0
code->or_(tmp1, u32(T_bit)); // CPSR.T = 1
code->test(new_pc, u32(1));
code->cmove(tmp1, tmp2); // CPSR.T = pc & 1
code->mov(MJitStateCpsr(), tmp1);
code->lea(tmp2, ptr[new_pc + new_pc * 1]);
code->or_(tmp2, u32(0xFFFFFFFC)); // tmp2 = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->and_(new_pc, tmp2);
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
code->mov(mask, new_pc);
code->and_(mask, 1);
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], mask);
code->lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
code->and_(new_pc, mask);
code->mov(MJitStateReg(Arm::Reg::PC), new_pc);
}
}
}
void EmitX64::EmitCallSupervisor(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
reg_alloc.HostCall(nullptr);
code->SwitchMxcsrOnExit();
@@ -480,57 +545,51 @@ void EmitX64::EmitSetFpscr(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
}
void EmitX64::EmitGetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
Xbyak::Reg32 result = reg_alloc.ScratchGpr().cvt32();
code->mov(result, dword[r15 + offsetof(JitState, FPSCR_nzcv)]);
reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitSetFpscrNZCV(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
auto args = reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg32 value = reg_alloc.UseGpr(args[0]).cvt32();
code->mov(dword[r15 + offsetof(JitState, FPSCR_nzcv)], value);
}
void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, u64 target_hash) {
using namespace Xbyak::util;
auto args = reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
u64 unique_hash_of_target = args[0].GetImmediateU64();
auto iter = block_descriptors.find(unique_hash_of_target);
auto iter = block_descriptors.find(target_hash);
CodePtr target_code_ptr = iter != block_descriptors.end()
? iter->second.entrypoint
: code->GetReturnFromRunCodeAddress();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
code->mov(index_reg.cvt32(), dword[r15 + offsetof(JitState, rsb_ptr)]);
code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
code->add(index_reg, 1);
code->and_(index_reg, u32(JitState::RSBSize - 1));
code->mov(loc_desc_reg, target_hash);
code->mov(loc_desc_reg, unique_hash_of_target);
patch_information[unique_hash_of_target].mov_rcx.emplace_back(code->getCurr());
patch_information[target_hash].mov_rcx.emplace_back(code->getCurr());
EmitPatchMovRcx(target_code_ptr);
Xbyak::Label label;
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->je(label, code->T_SHORT);
}
code->mov(qword[r15 + index_reg * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[r15 + index_reg * 8 + offsetof(JitState, rsb_codeptrs)], rcx);
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
code->L(label);
code->add(index_reg.cvt32(), 1);
code->and_(index_reg.cvt32(), u32(JitState::RSBPtrMask));
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg.cvt32());
}
void EmitX64::EmitPushRSB(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
auto args = reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
u64 unique_hash_of_target = args[0].GetImmediateU64();
reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg64 index_reg = reg_alloc.ScratchGpr();
PushRSBHelper(loc_desc_reg, index_reg, unique_hash_of_target);
}
void EmitX64::EmitGetCarryFromOp(RegAlloc&, IR::Block&, IR::Inst*) {
@@ -2189,7 +2248,6 @@ void EmitX64::EmitPackedSelect(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst)
}
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;
// We need to report back whether we've found a denormal on input.
@@ -2206,7 +2264,6 @@ static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R
}
static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;
auto mask = code->MConst(f64_non_sign_mask);
@@ -2225,7 +2282,6 @@ static void DenormalsAreZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::R
}
static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;
code->movd(gpr_scratch, xmm_value);
@@ -2239,7 +2295,6 @@ static void FlushToZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32
}
static void FlushToZero64(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg64 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;
auto mask = code->MConst(f64_non_sign_mask);
@@ -2479,7 +2534,6 @@ static void SetFpscrNzcvFromFlags(BlockOfCode* code, RegAlloc& reg_alloc) {
reg_alloc.ScratchGpr({HostLoc::RCX}); // shifting requires use of cl
Xbyak::Reg32 nzcv = reg_alloc.ScratchGpr().cvt32();
using namespace Xbyak::util;
code->mov(nzcv, 0x28630000);
code->sete(cl);
@@ -2798,14 +2852,10 @@ void EmitX64::EmitFPU32ToDouble(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst)
void EmitX64::EmitClearExclusive(RegAlloc&, IR::Block&, IR::Inst*) {
using namespace Xbyak::util;
code->mov(code->byte[r15 + offsetof(JitState, exclusive_state)], u8(0));
}
void EmitX64::EmitSetExclusive(RegAlloc& reg_alloc, IR::Block&, IR::Inst* inst) {
using namespace Xbyak::util;
auto args = reg_alloc.GetArgumentInfo(inst);
ASSERT(args[1].IsImmediate());
Xbyak::Reg32 address = reg_alloc.UseGpr(args[0]).cvt32();
@@ -2824,7 +2874,6 @@ static void ReadMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst, U
return;
}
using namespace Xbyak::util;
reg_alloc.UseScratch(args[0], ABI_PARAM1);
@@ -2878,7 +2927,6 @@ static void WriteMemory(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* inst,
return;
}
using namespace Xbyak::util;
reg_alloc.ScratchGpr({ABI_RETURN});
reg_alloc.UseScratch(args[0], ABI_PARAM1);
@@ -2965,7 +3013,6 @@ static void ExclusiveWrite(BlockOfCode* code, RegAlloc& reg_alloc, IR::Inst* ins
Xbyak::Reg32 passed = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg32 tmp = code->ABI_RETURN.cvt32(); // Use one of the unusued HostCall registers.
using namespace Xbyak::util;
Xbyak::Label end;
code->mov(passed, u32(1));
@@ -3268,18 +3315,15 @@ void EmitX64::EmitCoprocStoreWords(RegAlloc& reg_alloc, IR::Block&, IR::Inst* in
}
void EmitX64::EmitAddCycles(size_t cycles) {
using namespace Xbyak::util;
ASSERT(cycles < std::numeric_limits<u32>::max());
code->sub(qword[r15 + offsetof(JitState, cycles_remaining)], static_cast<u32>(cycles));
}
static Xbyak::Label EmitCond(BlockOfCode* code, Arm::Cond cond) {
using namespace Xbyak::util;
Xbyak::Label label;
const Xbyak::Reg32 cpsr = eax;
code->mov(cpsr, MJitStateCpsr());
code->mov(cpsr, dword[r15 + offsetof(JitState, CPSR_nzcv)]);
constexpr size_t n_shift = 31;
constexpr size_t z_shift = 30;
@@ -3424,22 +3468,16 @@ void EmitX64::EmitTerminal(IR::Term::ReturnToDispatch, IR::LocationDescriptor) {
code->ReturnFromRunCode();
}
void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
static u32 CalculateCpsr_et(const IR::LocationDescriptor& desc) {
u32 et = 0;
et |= desc.EFlag() ? 2 : 0;
et |= desc.TFlag() ? 1 : 0;
return et;
}
if (terminal.next.TFlag() != initial_location.TFlag()) {
if (terminal.next.TFlag()) {
code->or_(MJitStateCpsr(), u32(1 << 5));
} else {
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
}
}
if (terminal.next.EFlag() != initial_location.EFlag()) {
if (terminal.next.EFlag()) {
code->or_(MJitStateCpsr(), u32(1 << 9));
} else {
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
}
void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) {
if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next));
}
code->cmp(qword[r15 + offsetof(JitState, cycles_remaining)], 0);
@@ -3450,27 +3488,21 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlock terminal, IR::LocationDescriptor
} else {
EmitPatchJg(terminal.next);
}
Xbyak::Label dest;
code->jmp(dest, Xbyak::CodeGenerator::T_NEAR);
code->SwitchToFarCode();
code->align(16);
code->L(dest);
code->mov(MJitStateReg(Arm::Reg::PC), terminal.next.PC());
code->ForceReturnFromRunCode(); // TODO: Check cycles, Properly do a link
PushRSBHelper(rax, rbx, terminal.next.UniqueHash());
code->ForceReturnFromRunCode();
code->SwitchToNearCode();
}
void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
if (terminal.next.TFlag() != initial_location.TFlag()) {
if (terminal.next.TFlag()) {
code->or_(MJitStateCpsr(), u32(1 << 5));
} else {
code->and_(MJitStateCpsr(), u32(~(1 << 5)));
}
}
if (terminal.next.EFlag() != initial_location.EFlag()) {
if (terminal.next.EFlag()) {
code->or_(MJitStateCpsr(), u32(1 << 9));
} else {
code->and_(MJitStateCpsr(), u32(~(1 << 9)));
}
if (CalculateCpsr_et(terminal.next) != CalculateCpsr_et(initial_location)) {
code->mov(dword[r15 + offsetof(JitState, CPSR_et)], CalculateCpsr_et(terminal.next));
}
patch_information[terminal.next.UniqueHash()].jmp.emplace_back(code->getCurr());
@@ -3482,23 +3514,21 @@ void EmitX64::EmitTerminal(IR::Term::LinkBlockFast terminal, IR::LocationDescrip
}
void EmitX64::EmitTerminal(IR::Term::PopRSBHint, IR::LocationDescriptor) {
using namespace Xbyak::util;
// This calculation has to match up with IREmitter::PushRSB
code->mov(ebx, MJitStateCpsr());
// TODO: Optimization is available here based on known state of FPSCR_mode and CPSR_et.
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
code->and_(ebx, u32((1 << 5) | (1 << 9)));
code->shr(ebx, 2);
code->or_(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->shl(rbx, 32);
code->shl(rcx, 32);
code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
code->or_(rbx, rcx);
code->mov(rax, reinterpret_cast<u64>(code->GetReturnFromRunCodeAddress()));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
}
code->mov(eax, dword[r15 + offsetof(JitState, rsb_ptr)]);
code->sub(eax, 1);
code->and_(eax, u32(JitState::RSBPtrMask));
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], eax);
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
code->jne(code->GetReturnFromRunCodeAddress());
code->mov(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + rax * sizeof(u64)]);
code->jmp(rax);
}
@@ -3510,8 +3540,6 @@ void EmitX64::EmitTerminal(IR::Term::If terminal, IR::LocationDescriptor initial
}
void EmitX64::EmitTerminal(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) {
using namespace Xbyak::util;
code->cmp(code->byte[r15 + offsetof(JitState, halt_requested)], u8(0));
code->jne(code->GetForceReturnFromRunCodeAddress());
EmitTerminal(terminal.else_, initial_location);