Optimization: Implement Return Stack Buffer

This commit is contained in:
MerryMage
2016-08-13 00:10:23 +01:00
parent 8e68e6fdd9
commit 960d14d18e
18 changed files with 167 additions and 31 deletions

View File

@@ -15,7 +15,7 @@ using namespace Gen;
namespace Dynarmic {
namespace BackendX64 {
BlockOfCode::BlockOfCode() {
BlockOfCode::BlockOfCode() : Gen::XCodeBlock() {
AllocCodeSpace(128 * 1024 * 1024);
ClearCache(false);
}
@@ -29,6 +29,7 @@ void BlockOfCode::ClearCache(bool poison_memory) {
GenConstants();
GenRunCode();
GenReturnFromRunCode();
}
size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cycles_to_run) const {
@@ -41,11 +42,7 @@ size_t BlockOfCode::RunCode(JitState* jit_state, CodePtr basic_block, size_t cyc
}
void BlockOfCode::ReturnFromRunCode(bool MXCSR_switch) {
if (MXCSR_switch)
SwitchMxcsrOnExit();
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();
JMP(MXCSR_switch ? return_from_run_code : return_from_run_code_without_mxcsr_switch, true);
}
void BlockOfCode::GenConstants() {
@@ -80,6 +77,17 @@ void BlockOfCode::GenRunCode() {
JMPptr(R(ABI_PARAM2));
}
void BlockOfCode::GenReturnFromRunCode() {
return_from_run_code = GetCodePtr();
SwitchMxcsrOnExit();
return_from_run_code_without_mxcsr_switch = GetCodePtr();
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();
}
void BlockOfCode::SwitchMxcsrOnEntry() {
STMXCSR(MDisp(R15, offsetof(JitState, save_host_MXCSR)));
LDMXCSR(MDisp(R15, offsetof(JitState, guest_MXCSR)));

View File

@@ -6,6 +6,8 @@
#pragma once
#include <array>
#include "backend_x64/jitstate.h"
#include "common/common_types.h"
#include "common/x64/emitter.h"
@@ -51,6 +53,10 @@ public:
return Gen::M(const_FloatPenultimatePositiveDenormal64);
}
CodePtr GetReturnFromRunCodeAddress() const {
return return_from_run_code;
}
private:
const u8* const_FloatNegativeZero32;
const u8* const_FloatNaN32;
@@ -64,6 +70,10 @@ private:
using RunCodeFuncType = void(*)(JitState*, CodePtr);
RunCodeFuncType run_code;
void GenRunCode();
CodePtr return_from_run_code;
CodePtr return_from_run_code_without_mxcsr_switch;
void GenReturnFromRunCode();
};
} // namespace BackendX64

View File

@@ -67,6 +67,7 @@ EmitX64::BlockDescriptor EmitX64::Emit(const Arm::LocationDescriptor descriptor,
code->INT3();
const CodePtr code_ptr = code->GetCodePtr();
basic_blocks[descriptor].code_ptr = code_ptr;
unique_hash_to_code_ptr[descriptor.UniqueHash()] = code_ptr;
EmitCondPrelude(block.cond, block.cond_failed, block.location);
@@ -328,6 +329,27 @@ void EmitX64::EmitCallSupervisor(IR::Block&, IR::Inst* inst) {
code->SwitchMxcsrOnEntry();
}
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
ASSERT(inst->GetArg(0).IsImmediate());
u64 imm64 = inst->GetArg(0).GetU64();
X64Reg tmp = reg_alloc.ScratchRegister({HostLoc::RCX});
X64Reg rsb_index = reg_alloc.ScratchRegister(any_gpr);
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());
code->MOV(32, R(rsb_index), MDisp(R15, offsetof(JitState, rsb_ptr)));
code->AND(32, R(rsb_index), Imm32(u32(JitState::RSBSize - 1)));
code->MOV(64, R(tmp), Imm64(imm64));
code->MOV(64, MComplex(R15, rsb_index, SCALE_1, offsetof(JitState, rsb_location_descriptors)), R(tmp));
patch_unique_hash_locations[imm64].emplace_back(code->GetCodePtr());
code->MOV(64, R(tmp), Imm64(code_ptr)); // This line has to match up with EmitX64::Patch.
code->MOV(64, MComplex(R15, rsb_index, SCALE_1, offsetof(JitState, rsb_codeptrs)), R(tmp));
code->ADD(32, R(rsb_index), Imm32(1));
code->MOV(32, MDisp(R15, offsetof(JitState, rsb_ptr)), R(rsb_index));
}
void EmitX64::EmitGetCarryFromOp(IR::Block&, IR::Inst*) {
ASSERT_MSG(0, "should never happen");
}
@@ -1696,7 +1718,22 @@ void EmitX64::EmitTerminalLinkBlockFast(IR::Term::LinkBlockFast terminal, Arm::L
}
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, Arm::LocationDescriptor initial_location) {
EmitTerminalReturnToDispatch({}, initial_location); // TODO: Implement RSB
// This calculation has to match up with IREmitter::PushRSB
code->MOV(32, R(RBX), MJitStateCpsr());
code->MOV(32, R(RCX), MJitStateReg(Arm::Reg::PC));
code->AND(32, R(RBX), Imm32((1 << 5) | (1 << 9)));
code->SHL(32, R(RBX), Imm8(2));
code->OR(32, R(RBX), MDisp(R15, offsetof(JitState, guest_FPSCR_mode)));
code->SHR(64, R(RBX), Imm8(32));
code->OR(64, R(RBX), R(RCX));
code->MOV(64, R(RAX), Imm64(u64(code->GetReturnFromRunCodeAddress())));
for (size_t i = 0; i < JitState::RSBSize; ++i) {
code->CMP(64, R(RBX), MDisp(R15, int(offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64))));
code->CMOVcc(64, RAX, MDisp(R15, int(offsetof(JitState, rsb_codeptrs) + i * sizeof(u64))), CC_E);
}
code->SUB(32, MDisp(R15, offsetof(JitState, rsb_ptr)), Imm32(1));
code->JMPptr(R(RAX));
}
void EmitX64::EmitTerminalIf(IR::Term::If terminal, Arm::LocationDescriptor initial_location) {
@@ -1716,6 +1753,11 @@ void EmitX64::Patch(Arm::LocationDescriptor desc, CodePtr bb) {
ASSERT(code->GetCodePtr() - location == 6);
}
for (CodePtr location : patch_unique_hash_locations[desc.UniqueHash()]) {
code->SetCodePtr(const_cast<u8*>(location));
code->MOV(64, R(RCX), Imm64(u64(bb)));
}
code->SetCodePtr(save_code_ptr);
}

View File

@@ -74,6 +74,8 @@ private:
BlockOfCode* code;
UserCallbacks cb;
Jit* jit_interface;
std::unordered_map<u64, CodePtr> unique_hash_to_code_ptr;
std::unordered_map<u64, std::vector<CodePtr>> patch_unique_hash_locations;
std::unordered_map<Arm::LocationDescriptor, BlockDescriptor, Arm::LocationDescriptorHash> basic_blocks;
std::unordered_map<Arm::LocationDescriptor, std::vector<CodePtr>, Arm::LocationDescriptorHash> patch_jg_locations;
};

View File

@@ -29,10 +29,15 @@ namespace Dynarmic {
using namespace BackendX64;
struct Jit::Impl {
Impl(Jit* jit, UserCallbacks callbacks) : emitter(&block_of_code, callbacks, jit), callbacks(callbacks) {}
Impl(Jit* jit, UserCallbacks callbacks)
: block_of_code()
, jit_state(&block_of_code)
, emitter(&block_of_code, callbacks, jit)
, callbacks(callbacks)
{}
JitState jit_state{};
BlockOfCode block_of_code{};
BlockOfCode block_of_code;
JitState jit_state;
EmitX64 emitter;
const UserCallbacks callbacks;
@@ -41,7 +46,7 @@ struct Jit::Impl {
bool TFlag = Common::Bit<5>(jit_state.Cpsr);
bool EFlag = Common::Bit<9>(jit_state.Cpsr);
Arm::LocationDescriptor descriptor{pc, TFlag, EFlag, jit_state.guest_FPSCR_flags};
Arm::LocationDescriptor descriptor{pc, TFlag, EFlag, jit_state.guest_FPSCR_mode};
CodePtr code_ptr = GetBasicBlock(descriptor).code_ptr;
return block_of_code.RunCode(&jit_state, code_ptr, cycle_count);
@@ -121,11 +126,12 @@ void Jit::ClearCache(bool poison_memory) {
ASSERT(!is_executing);
impl->block_of_code.ClearCache(poison_memory);
impl->emitter.ClearCache();
impl->jit_state.ResetRSB(&impl->block_of_code);
}
void Jit::Reset() {
ASSERT(!is_executing);
impl->jit_state = {};
impl->jit_state = JitState(&impl->block_of_code);
}
void Jit::HaltExecution() {

View File

@@ -4,14 +4,22 @@
* General Public License version 2 or any later version.
*/
#include "backend_x64/block_of_code.h"
#include "backend_x64/jitstate.h"
#include "common/assert.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "frontend/arm_types.h"
namespace Dynarmic {
namespace BackendX64 {
void JitState::ResetRSB(BlockOfCode* code) {
for (auto& value : rsb_codeptrs) {
value = u64(code->GetReturnFromRunCodeAddress());
}
}
/**
* Comparing MXCSR and FPSCR
* =========================
@@ -68,14 +76,16 @@ namespace BackendX64 {
*/
// NZCV; QC (ASMID only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
constexpr u32 FPSCR_MASK = 0b1111'00'111111'0'111'10011111'00000000;
constexpr u32 FPSCR_MODE_MASK = Arm::LocationDescriptor::FPSCR_MODE_MASK;
constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
u32 JitState::Fpscr() const {
ASSERT((guest_FPSCR_flags & ~FPSCR_MASK) == 0);
ASSERT((guest_FPSCR_mode & ~FPSCR_MODE_MASK) == 0);
ASSERT((guest_FPSCR_nzcv & ~FPSCR_NZCV_MASK) == 0);
ASSERT((FPSCR_IDC & ~(1 << 7)) == 0);
ASSERT((FPSCR_UFC & ~(1 << 3)) == 0);
u32 FPSCR = guest_FPSCR_flags;
u32 FPSCR = guest_FPSCR_mode | guest_FPSCR_nzcv;
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
FPSCR |= FPSCR_IDC;
@@ -86,7 +96,8 @@ u32 JitState::Fpscr() const {
void JitState::SetFpscr(u32 FPSCR) {
old_FPSCR = FPSCR;
guest_FPSCR_flags = FPSCR & FPSCR_MASK;
guest_FPSCR_mode = FPSCR & FPSCR_MODE_MASK;
guest_FPSCR_nzcv = FPSCR & FPSCR_NZCV_MASK;
guest_MXCSR = 0;
// Exception masks / enables
@@ -114,6 +125,5 @@ void JitState::SetFpscr(u32 FPSCR) {
}
}
} // namespace BackendX64
} // namespace Dynarmic

View File

@@ -13,9 +13,13 @@
namespace Dynarmic {
namespace BackendX64 {
class BlockOfCode;
constexpr size_t SpillCount = 32;
struct JitState {
JitState(BlockOfCode* code) { ResetRSB(code); }
u32 Cpsr = 0;
std::array<u32, 16> Reg{}; // Current register file.
// TODO: Mode-specific register sets unimplemented.
@@ -34,9 +38,16 @@ struct JitState {
u32 exclusive_state = 0;
u32 exclusive_address = 0;
static constexpr size_t RSBSize = 4; // MUST be a power of 2.
u32 rsb_ptr = 0;
std::array<u64, RSBSize> rsb_location_descriptors;
std::array<u64, RSBSize> rsb_codeptrs;
void ResetRSB(BlockOfCode* code);
u32 FPSCR_IDC = 0;
u32 FPSCR_UFC = 0;
u32 guest_FPSCR_flags = 0;
u32 guest_FPSCR_mode = 0;
u32 guest_FPSCR_nzcv = 0;
u32 old_FPSCR = 0;
u32 Fpscr() const;
void SetFpscr(u32 FPSCR);

View File

@@ -45,7 +45,7 @@ static Gen::X64Reg HostLocToX64(HostLoc loc) {
}
static Gen::OpArg SpillToOpArg(HostLoc loc) {
static_assert(std::is_same<decltype(JitState{}.Spill[0]), u64&>::value, "Spill must be u64");
static_assert(std::is_same<decltype(JitState{nullptr}.Spill[0]), u64&>::value, "Spill must be u64");
DEBUG_ASSERT(HostLocIsSpill(loc));
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);