mirror of
https://git.suyu.dev/suyu/dynarmic.git
synced 2026-03-05 10:16:30 +00:00
Backend: Create "backend" folder
similar to the "frontend" folder
This commit is contained in:
1267
src/backend/x64/a32_emit_x64.cpp
Normal file
1267
src/backend/x64/a32_emit_x64.cpp
Normal file
File diff suppressed because it is too large
Load Diff
87
src/backend/x64/a32_emit_x64.h
Normal file
87
src/backend/x64/a32_emit_x64.h
Normal file
@@ -0,0 +1,87 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
|
||||
#include "backend/x64/a32_jitstate.h"
|
||||
#include "backend/x64/block_range_information.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "dynarmic/A32/a32.h"
|
||||
#include "dynarmic/A32/config.h"
|
||||
#include "frontend/A32/location_descriptor.h"
|
||||
#include "frontend/ir/terminal.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
class RegAlloc;
|
||||
|
||||
struct A32EmitContext final : public EmitContext {
|
||||
A32EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||
A32::LocationDescriptor Location() const;
|
||||
FP::RoundingMode FPSCR_RMode() const override;
|
||||
u32 FPCR() const override;
|
||||
bool FPSCR_FTZ() const override;
|
||||
bool FPSCR_DN() const override;
|
||||
};
|
||||
|
||||
class A32EmitX64 final : public EmitX64 {
|
||||
public:
|
||||
A32EmitX64(BlockOfCode& code, A32::UserConfig config, A32::Jit* jit_interface);
|
||||
~A32EmitX64() override;
|
||||
|
||||
/**
|
||||
* Emit host machine code for a basic block with intermediate representation `ir`.
|
||||
* @note ir is modified.
|
||||
*/
|
||||
BlockDescriptor Emit(IR::Block& ir);
|
||||
|
||||
void ClearCache() override;
|
||||
|
||||
void InvalidateCacheRanges(const boost::icl::interval_set<u32>& ranges);
|
||||
|
||||
protected:
|
||||
const A32::UserConfig config;
|
||||
A32::Jit* jit_interface;
|
||||
BlockRangeInformation<u32> block_ranges;
|
||||
|
||||
const void* read_memory_8;
|
||||
const void* read_memory_16;
|
||||
const void* read_memory_32;
|
||||
const void* read_memory_64;
|
||||
const void* write_memory_8;
|
||||
const void* write_memory_16;
|
||||
const void* write_memory_32;
|
||||
const void* write_memory_64;
|
||||
void GenMemoryAccessors();
|
||||
|
||||
// Microinstruction emitters
|
||||
#define OPCODE(...)
|
||||
#define A32OPC(name, type, ...) void EmitA32##name(A32EmitContext& ctx, IR::Inst* inst);
|
||||
#define A64OPC(...)
|
||||
#include "frontend/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
|
||||
// Terminal instruction emitters
|
||||
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override;
|
||||
|
||||
// Patching
|
||||
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
300
src/backend/x64/a32_interface.cpp
Normal file
300
src/backend/x64/a32_interface.cpp
Normal file
@@ -0,0 +1,300 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "backend/x64/a32_emit_x64.h"
|
||||
#include "backend/x64/a32_jitstate.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/callback.h"
|
||||
#include "backend/x64/devirtualize.h"
|
||||
#include "backend/x64/jitstate_info.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/llvm_disassemble.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "dynarmic/A32/a32.h"
|
||||
#include "dynarmic/A32/context.h"
|
||||
#include "frontend/A32/translate/translate.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/location_descriptor.h"
|
||||
#include "ir_opt/passes.h"
|
||||
|
||||
namespace Dynarmic::A32 {
|
||||
|
||||
using namespace BackendX64;
|
||||
|
||||
static RunCodeCallbacks GenRunCodeCallbacks(A32::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
|
||||
return RunCodeCallbacks{
|
||||
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
|
||||
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::AddTicks>(cb)),
|
||||
std::make_unique<ArgCallback>(Devirtualize<&A32::UserCallbacks::GetTicksRemaining>(cb)),
|
||||
};
|
||||
}
|
||||
|
||||
struct Jit::Impl {
|
||||
Impl(Jit* jit, A32::UserConfig config)
|
||||
: block_of_code(GenRunCodeCallbacks(config.callbacks, &GetCurrentBlock, this), JitStateInfo{jit_state})
|
||||
, emitter(block_of_code, config, jit)
|
||||
, config(config)
|
||||
, jit_interface(jit)
|
||||
{}
|
||||
|
||||
A32JitState jit_state;
|
||||
BlockOfCode block_of_code;
|
||||
A32EmitX64 emitter;
|
||||
|
||||
const A32::UserConfig config;
|
||||
|
||||
// Requests made during execution to invalidate the cache are queued up here.
|
||||
size_t invalid_cache_generation = 0;
|
||||
boost::icl::interval_set<u32> invalid_cache_ranges;
|
||||
bool invalidate_entire_cache = false;
|
||||
|
||||
void Execute() {
|
||||
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A32JitState::RSBPtrMask;
|
||||
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
||||
jit_state.rsb_ptr = new_rsb_ptr;
|
||||
block_of_code.RunCodeFrom(&jit_state, reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]));
|
||||
} else {
|
||||
block_of_code.RunCode(&jit_state);
|
||||
}
|
||||
}
|
||||
|
||||
std::string Disassemble(const IR::LocationDescriptor& descriptor) {
|
||||
auto block = GetBasicBlock(descriptor);
|
||||
std::string result = fmt::format("address: {}\nsize: {} bytes\n", block.entrypoint, block.size);
|
||||
result += Common::DisassembleX64(block.entrypoint, reinterpret_cast<const char*>(block.entrypoint) + block.size);
|
||||
return result;
|
||||
}
|
||||
|
||||
void PerformCacheInvalidation() {
|
||||
if (invalidate_entire_cache) {
|
||||
jit_state.ResetRSB();
|
||||
block_of_code.ClearCache();
|
||||
emitter.ClearCache();
|
||||
|
||||
invalid_cache_ranges.clear();
|
||||
invalidate_entire_cache = false;
|
||||
invalid_cache_generation++;
|
||||
return;
|
||||
}
|
||||
|
||||
if (invalid_cache_ranges.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
jit_state.ResetRSB();
|
||||
emitter.InvalidateCacheRanges(invalid_cache_ranges);
|
||||
invalid_cache_ranges.clear();
|
||||
invalid_cache_generation++;
|
||||
}
|
||||
|
||||
void RequestCacheInvalidation() {
|
||||
if (jit_interface->is_executing) {
|
||||
jit_state.halt_requested = true;
|
||||
return;
|
||||
}
|
||||
|
||||
PerformCacheInvalidation();
|
||||
}
|
||||
|
||||
private:
|
||||
Jit* jit_interface;
|
||||
|
||||
static CodePtr GetCurrentBlock(void* this_voidptr) {
|
||||
Jit::Impl& this_ = *static_cast<Jit::Impl*>(this_voidptr);
|
||||
A32JitState& jit_state = this_.jit_state;
|
||||
|
||||
u32 pc = jit_state.Reg[15];
|
||||
A32::PSR cpsr{jit_state.Cpsr()};
|
||||
A32::FPSCR fpscr{jit_state.FPSCR_mode};
|
||||
A32::LocationDescriptor descriptor{pc, cpsr, fpscr};
|
||||
|
||||
return this_.GetBasicBlock(descriptor).entrypoint;
|
||||
}
|
||||
|
||||
A32EmitX64::BlockDescriptor GetBasicBlock(IR::LocationDescriptor descriptor) {
|
||||
auto block = emitter.GetBasicBlock(descriptor);
|
||||
if (block)
|
||||
return *block;
|
||||
|
||||
constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
|
||||
if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
|
||||
invalidate_entire_cache = true;
|
||||
PerformCacheInvalidation();
|
||||
}
|
||||
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, [this](u32 vaddr) { return config.callbacks->MemoryReadCode(vaddr); });
|
||||
Optimization::A32GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::A32ConstantMemoryReads(ir_block, config.callbacks);
|
||||
Optimization::ConstantPropagation(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::VerificationPass(ir_block);
|
||||
return emitter.Emit(ir_block);
|
||||
}
|
||||
};
|
||||
|
||||
Jit::Jit(UserConfig config) : impl(std::make_unique<Impl>(this, config)) {}
|
||||
|
||||
Jit::~Jit() {}
|
||||
|
||||
void Jit::Run() {
|
||||
ASSERT(!is_executing);
|
||||
is_executing = true;
|
||||
SCOPE_EXIT { this->is_executing = false; };
|
||||
|
||||
impl->jit_state.halt_requested = false;
|
||||
|
||||
impl->Execute();
|
||||
|
||||
impl->PerformCacheInvalidation();
|
||||
}
|
||||
|
||||
void Jit::ClearCache() {
|
||||
impl->invalidate_entire_cache = true;
|
||||
impl->RequestCacheInvalidation();
|
||||
}
|
||||
|
||||
void Jit::InvalidateCacheRange(std::uint32_t start_address, std::size_t length) {
|
||||
impl->invalid_cache_ranges.add(boost::icl::discrete_interval<u32>::closed(start_address, static_cast<u32>(start_address + length - 1)));
|
||||
impl->RequestCacheInvalidation();
|
||||
}
|
||||
|
||||
void Jit::Reset() {
|
||||
ASSERT(!is_executing);
|
||||
impl->jit_state = {};
|
||||
}
|
||||
|
||||
void Jit::HaltExecution() {
|
||||
impl->jit_state.halt_requested = true;
|
||||
}
|
||||
|
||||
std::array<u32, 16>& Jit::Regs() {
|
||||
return impl->jit_state.Reg;
|
||||
}
|
||||
const std::array<u32, 16>& Jit::Regs() const {
|
||||
return impl->jit_state.Reg;
|
||||
}
|
||||
|
||||
std::array<u32, 64>& Jit::ExtRegs() {
|
||||
return impl->jit_state.ExtReg;
|
||||
}
|
||||
|
||||
const std::array<u32, 64>& Jit::ExtRegs() const {
|
||||
return impl->jit_state.ExtReg;
|
||||
}
|
||||
|
||||
u32 Jit::Cpsr() const {
|
||||
return impl->jit_state.Cpsr();
|
||||
}
|
||||
|
||||
void Jit::SetCpsr(u32 value) {
|
||||
return impl->jit_state.SetCpsr(value);
|
||||
}
|
||||
|
||||
u32 Jit::Fpscr() const {
|
||||
return impl->jit_state.Fpscr();
|
||||
}
|
||||
|
||||
void Jit::SetFpscr(u32 value) {
|
||||
return impl->jit_state.SetFpscr(value);
|
||||
}
|
||||
|
||||
Context Jit::SaveContext() const {
|
||||
Context ctx;
|
||||
SaveContext(ctx);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
struct Context::Impl {
|
||||
A32JitState jit_state;
|
||||
size_t invalid_cache_generation;
|
||||
};
|
||||
|
||||
Context::Context() : impl(std::make_unique<Context::Impl>()) { impl->jit_state.ResetRSB(); }
|
||||
Context::~Context() = default;
|
||||
Context::Context(const Context& ctx) : impl(std::make_unique<Context::Impl>(*ctx.impl)) {}
|
||||
Context::Context(Context&& ctx) : impl(std::move(ctx.impl)) {}
|
||||
Context& Context::operator=(const Context& ctx) {
|
||||
*impl = *ctx.impl;
|
||||
return *this;
|
||||
}
|
||||
Context& Context::operator=(Context&& ctx) {
|
||||
impl = std::move(ctx.impl);
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::array<std::uint32_t, 16>& Context::Regs() {
|
||||
return impl->jit_state.Reg;
|
||||
}
|
||||
const std::array<std::uint32_t, 16>& Context::Regs() const {
|
||||
return impl->jit_state.Reg;
|
||||
}
|
||||
std::array<std::uint32_t, 64>& Context::ExtRegs() {
|
||||
return impl->jit_state.ExtReg;
|
||||
}
|
||||
const std::array<std::uint32_t, 64>& Context::ExtRegs() const {
|
||||
return impl->jit_state.ExtReg;
|
||||
}
|
||||
|
||||
/// View and modify CPSR.
|
||||
std::uint32_t Context::Cpsr() const {
|
||||
return impl->jit_state.Cpsr();
|
||||
}
|
||||
void Context::SetCpsr(std::uint32_t value) {
|
||||
impl->jit_state.SetCpsr(value);
|
||||
}
|
||||
|
||||
/// View and modify FPSCR.
|
||||
std::uint32_t Context::Fpscr() const {
|
||||
return impl->jit_state.Fpscr();
|
||||
}
|
||||
void Context::SetFpscr(std::uint32_t value) {
|
||||
return impl->jit_state.SetFpscr(value);
|
||||
}
|
||||
|
||||
void TransferJitState(A32JitState& dest, const A32JitState& src, bool reset_rsb) {
|
||||
dest.CPSR_ge = src.CPSR_ge;
|
||||
dest.CPSR_et = src.CPSR_et;
|
||||
dest.CPSR_q = src.CPSR_q;
|
||||
dest.CPSR_nzcv = src.CPSR_nzcv;
|
||||
dest.CPSR_jaifm = src.CPSR_jaifm;
|
||||
dest.Reg = src.Reg;
|
||||
dest.ExtReg = src.ExtReg;
|
||||
dest.guest_MXCSR = src.guest_MXCSR;
|
||||
dest.FPSCR_IDC = src.FPSCR_IDC;
|
||||
dest.FPSCR_UFC = src.FPSCR_UFC;
|
||||
dest.FPSCR_mode = src.FPSCR_mode;
|
||||
dest.FPSCR_nzcv = src.FPSCR_nzcv;
|
||||
if (reset_rsb) {
|
||||
dest.ResetRSB();
|
||||
} else {
|
||||
dest.rsb_ptr = src.rsb_ptr;
|
||||
dest.rsb_location_descriptors = src.rsb_location_descriptors;
|
||||
dest.rsb_codeptrs = src.rsb_codeptrs;
|
||||
}
|
||||
}
|
||||
|
||||
void Jit::SaveContext(Context& ctx) const {
|
||||
TransferJitState(ctx.impl->jit_state, impl->jit_state, false);
|
||||
ctx.impl->invalid_cache_generation = impl->invalid_cache_generation;
|
||||
}
|
||||
|
||||
void Jit::LoadContext(const Context& ctx) {
|
||||
bool reset_rsb = ctx.impl->invalid_cache_generation != impl->invalid_cache_generation;
|
||||
TransferJitState(impl->jit_state, ctx.impl->jit_state, reset_rsb);
|
||||
}
|
||||
|
||||
std::string Jit::Disassemble(const IR::LocationDescriptor& descriptor) {
|
||||
return impl->Disassemble(descriptor);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
203
src/backend/x64/a32_jitstate.cpp
Normal file
203
src/backend/x64/a32_jitstate.cpp
Normal file
@@ -0,0 +1,203 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend/x64/a32_jitstate.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "frontend/A32/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
/**
|
||||
* CPSR Bits
|
||||
* =========
|
||||
*
|
||||
* ARM CPSR flags
|
||||
* --------------
|
||||
* N bit 31 Negative flag
|
||||
* Z bit 30 Zero flag
|
||||
* C bit 29 Carry flag
|
||||
* V bit 28 oVerflow flag
|
||||
* Q bit 27 Saturation flag
|
||||
* J bit 24 Jazelle instruction set flag
|
||||
* GE bits 16-19 Greater than or Equal flags
|
||||
* E bit 9 Data Endianness flag
|
||||
* A bit 8 Disable imprecise Aborts
|
||||
* I bit 7 Disable IRQ interrupts
|
||||
* F bit 6 Disable FIQ interrupts
|
||||
* T bit 5 Thumb instruction set flag
|
||||
* M bits 0-4 Processor Mode bits
|
||||
*
|
||||
* x64 LAHF+SETO flags
|
||||
* -------------------
|
||||
* SF bit 15 Sign flag
|
||||
* ZF bit 14 Zero flag
|
||||
* AF bit 12 Auxiliary flag
|
||||
* PF bit 10 Parity flag
|
||||
* CF bit 8 Carry flag
|
||||
* OF bit 0 Overflow flag
|
||||
*/
|
||||
|
||||
u32 A32JitState::Cpsr() const {
|
||||
ASSERT((CPSR_nzcv & ~0xF0000000) == 0);
|
||||
ASSERT((CPSR_q & ~1) == 0);
|
||||
ASSERT((CPSR_et & ~3) == 0);
|
||||
ASSERT((CPSR_jaifm & ~0x010001DF) == 0);
|
||||
|
||||
u32 cpsr = 0;
|
||||
|
||||
// NZCV flags
|
||||
cpsr |= CPSR_nzcv;
|
||||
// Q flag
|
||||
cpsr |= CPSR_q ? 1 << 27 : 0;
|
||||
// GE flags
|
||||
cpsr |= Common::Bit<31>(CPSR_ge) ? 1 << 19 : 0;
|
||||
cpsr |= Common::Bit<23>(CPSR_ge) ? 1 << 18 : 0;
|
||||
cpsr |= Common::Bit<15>(CPSR_ge) ? 1 << 17 : 0;
|
||||
cpsr |= Common::Bit<7>(CPSR_ge) ? 1 << 16 : 0;
|
||||
// E flag, T flag
|
||||
cpsr |= Common::Bit<1>(CPSR_et) ? 1 << 9 : 0;
|
||||
cpsr |= Common::Bit<0>(CPSR_et) ? 1 << 5 : 0;
|
||||
// Other flags
|
||||
cpsr |= CPSR_jaifm;
|
||||
|
||||
return cpsr;
|
||||
}
|
||||
|
||||
void A32JitState::SetCpsr(u32 cpsr) {
|
||||
// NZCV flags
|
||||
CPSR_nzcv = cpsr & 0xF0000000;
|
||||
// Q flag
|
||||
CPSR_q = Common::Bit<27>(cpsr) ? 1 : 0;
|
||||
// GE flags
|
||||
CPSR_ge = 0;
|
||||
CPSR_ge |= Common::Bit<19>(cpsr) ? 0xFF000000 : 0;
|
||||
CPSR_ge |= Common::Bit<18>(cpsr) ? 0x00FF0000 : 0;
|
||||
CPSR_ge |= Common::Bit<17>(cpsr) ? 0x0000FF00 : 0;
|
||||
CPSR_ge |= Common::Bit<16>(cpsr) ? 0x000000FF : 0;
|
||||
// E flag, T flag
|
||||
CPSR_et = 0;
|
||||
CPSR_et |= Common::Bit<9>(cpsr) ? 2 : 0;
|
||||
CPSR_et |= Common::Bit<5>(cpsr) ? 1 : 0;
|
||||
// Other flags
|
||||
CPSR_jaifm = cpsr & 0x07F0FDDF;
|
||||
}
|
||||
|
||||
void A32JitState::ResetRSB() {
|
||||
rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
|
||||
rsb_codeptrs.fill(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparing MXCSR and FPSCR
|
||||
* =========================
|
||||
*
|
||||
* SSE MXCSR exception flags
|
||||
* -------------------------
|
||||
* PE bit 5 Precision Flag
|
||||
* UE bit 4 Underflow Flag
|
||||
* OE bit 3 Overflow Flag
|
||||
* ZE bit 2 Divide By Zero Flag
|
||||
* DE bit 1 Denormal Flag // Appears to only be set when MXCSR.DAZ = 0
|
||||
* IE bit 0 Invalid Operation Flag
|
||||
*
|
||||
* VFP FPSCR cumulative exception bits
|
||||
* -----------------------------------
|
||||
* IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPSCR.FTZ = 1
|
||||
* IXC bit 4 Inexact cumulative exception bit
|
||||
* UFC bit 3 Underflow cumulative exception bit
|
||||
* OFC bit 2 Overflow cumulative exception bit
|
||||
* DZC bit 1 Division by Zero cumulative exception bit
|
||||
* IOC bit 0 Invalid Operation cumulative exception bit
|
||||
*
|
||||
* SSE MSCSR exception masks
|
||||
* -------------------------
|
||||
* PM bit 12 Precision Mask
|
||||
* UM bit 11 Underflow Mask
|
||||
* OM bit 10 Overflow Mask
|
||||
* ZM bit 9 Divide By Zero Mask
|
||||
* DM bit 8 Denormal Mask
|
||||
* IM bit 7 Invalid Operation Mask
|
||||
*
|
||||
* VFP FPSCR exception trap enables
|
||||
* --------------------------------
|
||||
* IDE bit 15 Input Denormal exception trap enable
|
||||
* IXE bit 12 Inexact exception trap enable
|
||||
* UFE bit 11 Underflow exception trap enable
|
||||
* OFE bit 10 Overflow exception trap enable
|
||||
* DZE bit 9 Division by Zero exception trap enable
|
||||
* IOE bit 8 Invalid Operation exception trap enable
|
||||
*
|
||||
* SSE MXCSR mode bits
|
||||
* -------------------
|
||||
* FZ bit 15 Flush To Zero
|
||||
* DAZ bit 6 Denormals Are Zero
|
||||
* RN bits 13-14 Round to {0 = Nearest, 1 = Negative, 2 = Positive, 3 = Zero}
|
||||
*
|
||||
* VFP FPSCR mode bits
|
||||
* -------------------
|
||||
* DN bit 25 Default NaN
|
||||
* FZ bit 24 Flush to Zero
|
||||
* RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
|
||||
* Stride bits 20-21 Vector stride
|
||||
* Len bits 16-18 Vector length
|
||||
*/
|
||||
|
||||
// NZCV; QC (ASMID only), AHP; DN, FZ, RMode, Stride; SBZP; Len; trap enables; cumulative bits
|
||||
constexpr u32 FPSCR_MODE_MASK = A32::LocationDescriptor::FPSCR_MODE_MASK;
|
||||
constexpr u32 FPSCR_NZCV_MASK = 0xF0000000;
|
||||
|
||||
u32 A32JitState::Fpscr() const {
|
||||
ASSERT((FPSCR_mode & ~FPSCR_MODE_MASK) == 0);
|
||||
ASSERT((FPSCR_nzcv & ~FPSCR_NZCV_MASK) == 0);
|
||||
ASSERT((FPSCR_IDC & ~(1 << 7)) == 0);
|
||||
ASSERT((FPSCR_UFC & ~(1 << 3)) == 0);
|
||||
|
||||
u32 FPSCR = FPSCR_mode | FPSCR_nzcv;
|
||||
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
||||
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||
FPSCR |= FPSCR_IDC;
|
||||
FPSCR |= FPSCR_UFC;
|
||||
FPSCR |= fpsr_exc;
|
||||
|
||||
return FPSCR;
|
||||
}
|
||||
|
||||
void A32JitState::SetFpscr(u32 FPSCR) {
|
||||
old_FPSCR = FPSCR;
|
||||
FPSCR_mode = FPSCR & FPSCR_MODE_MASK;
|
||||
FPSCR_nzcv = FPSCR & FPSCR_NZCV_MASK;
|
||||
guest_MXCSR = 0;
|
||||
|
||||
// Exception masks / enables
|
||||
guest_MXCSR |= 0x00001f80; // mask all
|
||||
//guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE
|
||||
//guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE
|
||||
//guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE
|
||||
|
||||
// RMode
|
||||
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
|
||||
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
|
||||
|
||||
// Cumulative flags IDC, IOC, IXC, UFC, OFC, DZC
|
||||
FPSCR_IDC = 0;
|
||||
FPSCR_UFC = 0;
|
||||
fpsr_exc = FPSCR & 0x9F;
|
||||
|
||||
if (Common::Bit<24>(FPSCR)) {
|
||||
// VFP Flush to Zero
|
||||
//guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
||||
//guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
||||
}
|
||||
}
|
||||
|
||||
u64 A32JitState::GetUniqueHash() const {
|
||||
return CPSR_et | FPSCR_mode | (static_cast<u64>(Reg[15]) << 32);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
88
src/backend/x64/a32_jitstate.h
Normal file
88
src/backend/x64/a32_jitstate.h
Normal file
@@ -0,0 +1,88 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
class BlockOfCode;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
|
||||
#endif
|
||||
|
||||
struct A32JitState {
|
||||
using ProgramCounterType = u32;
|
||||
|
||||
A32JitState() { ResetRSB(); }
|
||||
|
||||
std::array<u32, 16> Reg{}; // Current register file.
|
||||
// TODO: Mode-specific register sets unimplemented.
|
||||
|
||||
u32 CPSR_et = 0;
|
||||
u32 CPSR_ge = 0;
|
||||
u32 CPSR_q = 0;
|
||||
u32 CPSR_nzcv = 0;
|
||||
u32 CPSR_jaifm = 0;
|
||||
|
||||
u32 Cpsr() const;
|
||||
void SetCpsr(u32 cpsr);
|
||||
|
||||
alignas(u64) std::array<u32, 64> ExtReg{}; // Extension registers.
|
||||
|
||||
static constexpr size_t SpillCount = 64;
|
||||
std::array<u64, SpillCount> Spill{}; // Spill.
|
||||
static Xbyak::Address GetSpillLocationFromIndex(size_t i) {
|
||||
using namespace Xbyak::util;
|
||||
return qword[r15 + offsetof(A32JitState, Spill) + i * sizeof(u64)];
|
||||
}
|
||||
|
||||
// For internal use (See: BlockOfCode::RunCode)
|
||||
u32 guest_MXCSR = 0x00001f80;
|
||||
u32 save_host_MXCSR = 0;
|
||||
s64 cycles_to_run = 0;
|
||||
s64 cycles_remaining = 0;
|
||||
bool halt_requested = false;
|
||||
|
||||
// Exclusive state
|
||||
static constexpr u32 RESERVATION_GRANULE_MASK = 0xFFFFFFF8;
|
||||
u32 exclusive_state = 0;
|
||||
u32 exclusive_address = 0;
|
||||
|
||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||
static constexpr size_t RSBPtrMask = RSBSize - 1;
|
||||
u32 rsb_ptr = 0;
|
||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||
std::array<u64, RSBSize> rsb_codeptrs;
|
||||
void ResetRSB();
|
||||
|
||||
u32 fpsr_exc = 0;
|
||||
u32 fpsr_qc = 0; // Dummy value
|
||||
u32 FPSCR_IDC = 0;
|
||||
u32 FPSCR_UFC = 0;
|
||||
u32 FPSCR_mode = 0;
|
||||
u32 FPSCR_nzcv = 0;
|
||||
u32 old_FPSCR = 0;
|
||||
u32 Fpscr() const;
|
||||
void SetFpscr(u32 FPSCR);
|
||||
|
||||
u64 GetUniqueHash() const;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
using CodePtr = const void*;
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
1119
src/backend/x64/a64_emit_x64.cpp
Normal file
1119
src/backend/x64/a64_emit_x64.cpp
Normal file
File diff suppressed because it is too large
Load Diff
93
src/backend/x64/a64_emit_x64.h
Normal file
93
src/backend/x64/a64_emit_x64.h
Normal file
@@ -0,0 +1,93 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <tuple>
|
||||
|
||||
#include "backend/x64/a64_jitstate.h"
|
||||
#include "backend/x64/block_range_information.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "dynarmic/A64/a64.h"
|
||||
#include "dynarmic/A64/config.h"
|
||||
#include "frontend/A64/location_descriptor.h"
|
||||
#include "frontend/ir/terminal.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
class RegAlloc;
|
||||
|
||||
struct A64EmitContext final : public EmitContext {
|
||||
A64EmitContext(const A64::UserConfig& conf, RegAlloc& reg_alloc, IR::Block& block);
|
||||
A64::LocationDescriptor Location() const;
|
||||
FP::RoundingMode FPSCR_RMode() const override;
|
||||
u32 FPCR() const override;
|
||||
bool FPSCR_FTZ() const override;
|
||||
bool FPSCR_DN() const override;
|
||||
bool AccurateNaN() const override;
|
||||
|
||||
const A64::UserConfig& conf;
|
||||
};
|
||||
|
||||
class A64EmitX64 final : public EmitX64 {
|
||||
public:
|
||||
A64EmitX64(BlockOfCode& code, A64::UserConfig conf, A64::Jit* jit_interface);
|
||||
~A64EmitX64() override;
|
||||
|
||||
/**
|
||||
* Emit host machine code for a basic block with intermediate representation `ir`.
|
||||
* @note ir is modified.
|
||||
*/
|
||||
BlockDescriptor Emit(IR::Block& ir);
|
||||
|
||||
void ClearCache() override;
|
||||
|
||||
void InvalidateCacheRanges(const boost::icl::interval_set<u64>& ranges);
|
||||
|
||||
protected:
|
||||
const A64::UserConfig conf;
|
||||
A64::Jit* jit_interface;
|
||||
BlockRangeInformation<u64> block_ranges;
|
||||
|
||||
void (*memory_read_128)();
|
||||
void (*memory_write_128)();
|
||||
void GenMemory128Accessors();
|
||||
|
||||
std::map<std::tuple<size_t, int, int>, void(*)()> read_fallbacks;
|
||||
std::map<std::tuple<size_t, int, int>, void(*)()> write_fallbacks;
|
||||
void GenFastmemFallbacks();
|
||||
|
||||
void EmitDirectPageTableMemoryRead(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
|
||||
void EmitDirectPageTableMemoryWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
|
||||
void EmitExclusiveWrite(A64EmitContext& ctx, IR::Inst* inst, size_t bitsize);
|
||||
|
||||
// Microinstruction emitters
|
||||
#define OPCODE(...)
|
||||
#define A32OPC(...)
|
||||
#define A64OPC(name, type, ...) void EmitA64##name(A64EmitContext& ctx, IR::Inst* inst);
|
||||
#include "frontend/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
|
||||
// Terminal instruction emitters
|
||||
void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) override;
|
||||
void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) override;
|
||||
|
||||
// Patching
|
||||
void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||
void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) override;
|
||||
void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) override;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
65
src/backend/x64/a64_exclusive_monitor.cpp
Normal file
65
src/backend/x64/a64_exclusive_monitor.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <dynarmic/A64/exclusive_monitor.h>
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace A64 {
|
||||
|
||||
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count) : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS) {
|
||||
Unlock();
|
||||
}
|
||||
|
||||
size_t ExclusiveMonitor::GetProcessorCount() const {
|
||||
return exclusive_addresses.size();
|
||||
}
|
||||
|
||||
void ExclusiveMonitor::Mark(size_t processor_id, VAddr address, size_t size) {
|
||||
ASSERT(size <= 16);
|
||||
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
|
||||
|
||||
Lock();
|
||||
exclusive_addresses[processor_id] = masked_address;
|
||||
Unlock();
|
||||
}
|
||||
|
||||
void ExclusiveMonitor::Lock() {
|
||||
while (is_locked.test_and_set()) {}
|
||||
}
|
||||
|
||||
void ExclusiveMonitor::Unlock() {
|
||||
is_locked.clear();
|
||||
}
|
||||
|
||||
bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address, size_t size) {
|
||||
ASSERT(size <= 16);
|
||||
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
|
||||
|
||||
Lock();
|
||||
if (exclusive_addresses[processor_id] != masked_address) {
|
||||
Unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
for (VAddr& other_address : exclusive_addresses) {
|
||||
if (other_address == masked_address) {
|
||||
other_address = INVALID_EXCLUSIVE_ADDRESS;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ExclusiveMonitor::Clear() {
|
||||
Lock();
|
||||
std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
|
||||
Unlock();
|
||||
}
|
||||
|
||||
} // namespace A64
|
||||
} // namespace Dynarmic
|
||||
358
src/backend/x64/a64_interface.cpp
Normal file
358
src/backend/x64/a64_interface.cpp
Normal file
@@ -0,0 +1,358 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
|
||||
#include "backend/x64/a64_emit_x64.h"
|
||||
#include "backend/x64/a64_jitstate.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/devirtualize.h"
|
||||
#include "backend/x64/jitstate_info.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/llvm_disassemble.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "dynarmic/A64/a64.h"
|
||||
#include "frontend/A64/translate/translate.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "ir_opt/passes.h"
|
||||
|
||||
namespace Dynarmic::A64 {
|
||||
|
||||
using namespace BackendX64;
|
||||
|
||||
static RunCodeCallbacks GenRunCodeCallbacks(A64::UserCallbacks* cb, CodePtr (*LookupBlock)(void* lookup_block_arg), void* arg) {
|
||||
return RunCodeCallbacks{
|
||||
std::make_unique<ArgCallback>(LookupBlock, reinterpret_cast<u64>(arg)),
|
||||
std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::AddTicks>(cb)),
|
||||
std::make_unique<ArgCallback>(Devirtualize<&A64::UserCallbacks::GetTicksRemaining>(cb)),
|
||||
};
|
||||
}
|
||||
|
||||
struct Jit::Impl final {
|
||||
public:
|
||||
Impl(Jit* jit, UserConfig conf)
|
||||
: conf(conf)
|
||||
, block_of_code(GenRunCodeCallbacks(conf.callbacks, &GetCurrentBlockThunk, this), JitStateInfo{jit_state})
|
||||
, emitter(block_of_code, conf, jit)
|
||||
{
|
||||
ASSERT(conf.page_table_address_space_bits >= 12 && conf.page_table_address_space_bits <= 64);
|
||||
}
|
||||
|
||||
~Impl() = default;
|
||||
|
||||
void Run() {
|
||||
ASSERT(!is_executing);
|
||||
is_executing = true;
|
||||
SCOPE_EXIT { this->is_executing = false; };
|
||||
jit_state.halt_requested = false;
|
||||
|
||||
// TODO: Check code alignment
|
||||
|
||||
const u32 new_rsb_ptr = (jit_state.rsb_ptr - 1) & A64JitState::RSBPtrMask;
|
||||
if (jit_state.GetUniqueHash() == jit_state.rsb_location_descriptors[new_rsb_ptr]) {
|
||||
jit_state.rsb_ptr = new_rsb_ptr;
|
||||
block_of_code.RunCodeFrom(&jit_state, reinterpret_cast<CodePtr>(jit_state.rsb_codeptrs[new_rsb_ptr]));
|
||||
} else {
|
||||
block_of_code.RunCode(&jit_state);
|
||||
}
|
||||
|
||||
PerformRequestedCacheInvalidation();
|
||||
}
|
||||
|
||||
void ClearCache() {
|
||||
invalidate_entire_cache = true;
|
||||
RequestCacheInvalidation();
|
||||
}
|
||||
|
||||
void InvalidateCacheRange(u64 start_address, size_t length) {
|
||||
const auto end_address = static_cast<u64>(start_address + length - 1);
|
||||
const auto range = boost::icl::discrete_interval<u64>::closed(start_address, end_address);
|
||||
invalid_cache_ranges.add(range);
|
||||
RequestCacheInvalidation();
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
ASSERT(!is_executing);
|
||||
jit_state = {};
|
||||
}
|
||||
|
||||
void HaltExecution() {
|
||||
jit_state.halt_requested = true;
|
||||
}
|
||||
|
||||
u64 GetSP() const {
|
||||
return jit_state.sp;
|
||||
}
|
||||
|
||||
void SetSP(u64 value) {
|
||||
jit_state.sp = value;
|
||||
}
|
||||
|
||||
u64 GetPC() const {
|
||||
return jit_state.pc;
|
||||
}
|
||||
|
||||
void SetPC(u64 value) {
|
||||
jit_state.pc = value;
|
||||
}
|
||||
|
||||
u64 GetRegister(size_t index) const {
|
||||
if (index == 31)
|
||||
return GetSP();
|
||||
return jit_state.reg.at(index);
|
||||
}
|
||||
|
||||
void SetRegister(size_t index, u64 value) {
|
||||
if (index == 31)
|
||||
return SetSP(value);
|
||||
jit_state.reg.at(index) = value;
|
||||
}
|
||||
|
||||
std::array<u64, 31> GetRegisters() const {
|
||||
return jit_state.reg;
|
||||
}
|
||||
|
||||
void SetRegisters(const std::array<u64, 31>& value) {
|
||||
jit_state.reg = value;
|
||||
}
|
||||
|
||||
Vector GetVector(size_t index) const {
|
||||
return {jit_state.vec.at(index * 2), jit_state.vec.at(index * 2 + 1)};
|
||||
}
|
||||
|
||||
void SetVector(size_t index, Vector value) {
|
||||
jit_state.vec.at(index * 2) = value[0];
|
||||
jit_state.vec.at(index * 2 + 1) = value[1];
|
||||
}
|
||||
|
||||
std::array<Vector, 32> GetVectors() const {
|
||||
std::array<Vector, 32> ret;
|
||||
static_assert(sizeof(ret) == sizeof(jit_state.vec));
|
||||
std::memcpy(ret.data(), jit_state.vec.data(), sizeof(jit_state.vec));
|
||||
return ret;
|
||||
}
|
||||
|
||||
void SetVectors(const std::array<Vector, 32>& value) {
|
||||
static_assert(sizeof(value) == sizeof(jit_state.vec));
|
||||
std::memcpy(jit_state.vec.data(), value.data(), sizeof(jit_state.vec));
|
||||
}
|
||||
|
||||
u32 GetFpcr() const {
|
||||
return jit_state.GetFpcr();
|
||||
}
|
||||
|
||||
void SetFpcr(u32 value) {
|
||||
jit_state.SetFpcr(value);
|
||||
}
|
||||
|
||||
u32 GetFpsr() const {
|
||||
return jit_state.GetFpsr();
|
||||
}
|
||||
|
||||
void SetFpsr(u32 value) {
|
||||
jit_state.SetFpsr(value);
|
||||
}
|
||||
|
||||
u32 GetPstate() const {
|
||||
return jit_state.GetPstate();
|
||||
}
|
||||
|
||||
void SetPstate(u32 value) {
|
||||
jit_state.SetPstate(value);
|
||||
}
|
||||
|
||||
void ClearExclusiveState() {
|
||||
jit_state.exclusive_state = 0;
|
||||
}
|
||||
|
||||
bool IsExecuting() const {
|
||||
return is_executing;
|
||||
}
|
||||
|
||||
std::string Disassemble() const {
|
||||
return Common::DisassembleX64(block_of_code.GetCodeBegin(), block_of_code.getCurr());
|
||||
}
|
||||
|
||||
private:
|
||||
static CodePtr GetCurrentBlockThunk(void* thisptr) {
|
||||
Jit::Impl* this_ = static_cast<Jit::Impl*>(thisptr);
|
||||
return this_->GetCurrentBlock();
|
||||
}
|
||||
|
||||
CodePtr GetCurrentBlock() {
|
||||
IR::LocationDescriptor current_location{jit_state.GetUniqueHash()};
|
||||
|
||||
if (auto block = emitter.GetBasicBlock(current_location))
|
||||
return block->entrypoint;
|
||||
|
||||
constexpr size_t MINIMUM_REMAINING_CODESIZE = 1 * 1024 * 1024;
|
||||
if (block_of_code.SpaceRemaining() < MINIMUM_REMAINING_CODESIZE) {
|
||||
// Immediately evacuate cache
|
||||
invalidate_entire_cache = true;
|
||||
PerformRequestedCacheInvalidation();
|
||||
}
|
||||
|
||||
// JIT Compile
|
||||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code, {conf.define_unpredictable_behaviour});
|
||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||
Optimization::A64GetSetElimination(ir_block);
|
||||
Optimization::DeadCodeElimination(ir_block);
|
||||
Optimization::A64MergeInterpretBlocksPass(ir_block, conf.callbacks);
|
||||
// printf("%s\n", IR::DumpBlock(ir_block).c_str());
|
||||
Optimization::VerificationPass(ir_block);
|
||||
return emitter.Emit(ir_block).entrypoint;
|
||||
}
|
||||
|
||||
void RequestCacheInvalidation() {
|
||||
if (is_executing) {
|
||||
jit_state.halt_requested = true;
|
||||
return;
|
||||
}
|
||||
|
||||
PerformRequestedCacheInvalidation();
|
||||
}
|
||||
|
||||
void PerformRequestedCacheInvalidation() {
|
||||
if (!invalidate_entire_cache && invalid_cache_ranges.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
jit_state.ResetRSB();
|
||||
if (invalidate_entire_cache) {
|
||||
block_of_code.ClearCache();
|
||||
emitter.ClearCache();
|
||||
} else {
|
||||
emitter.InvalidateCacheRanges(invalid_cache_ranges);
|
||||
}
|
||||
invalid_cache_ranges.clear();
|
||||
invalidate_entire_cache = false;
|
||||
}
|
||||
|
||||
bool is_executing = false;
|
||||
|
||||
UserConfig conf;
|
||||
A64JitState jit_state;
|
||||
BlockOfCode block_of_code;
|
||||
A64EmitX64 emitter;
|
||||
|
||||
bool invalidate_entire_cache = false;
|
||||
boost::icl::interval_set<u64> invalid_cache_ranges;
|
||||
};
|
||||
|
||||
Jit::Jit(UserConfig conf)
|
||||
: impl(std::make_unique<Jit::Impl>(this, conf)) {}
|
||||
|
||||
Jit::~Jit() = default;
|
||||
|
||||
void Jit::Run() {
|
||||
impl->Run();
|
||||
}
|
||||
|
||||
void Jit::ClearCache() {
|
||||
impl->ClearCache();
|
||||
}
|
||||
|
||||
void Jit::InvalidateCacheRange(u64 start_address, size_t length) {
|
||||
impl->InvalidateCacheRange(start_address, length);
|
||||
}
|
||||
|
||||
void Jit::Reset() {
|
||||
impl->Reset();
|
||||
}
|
||||
|
||||
void Jit::HaltExecution() {
|
||||
impl->HaltExecution();
|
||||
}
|
||||
|
||||
u64 Jit::GetSP() const {
|
||||
return impl->GetSP();
|
||||
}
|
||||
|
||||
void Jit::SetSP(u64 value) {
|
||||
impl->SetSP(value);
|
||||
}
|
||||
|
||||
u64 Jit::GetPC() const {
|
||||
return impl->GetPC();
|
||||
}
|
||||
|
||||
void Jit::SetPC(u64 value) {
|
||||
impl->SetPC(value);
|
||||
}
|
||||
|
||||
u64 Jit::GetRegister(size_t index) const {
|
||||
return impl->GetRegister(index);
|
||||
}
|
||||
|
||||
void Jit::SetRegister(size_t index, u64 value) {
|
||||
impl->SetRegister(index, value);
|
||||
}
|
||||
|
||||
std::array<u64, 31> Jit::GetRegisters() const {
|
||||
return impl->GetRegisters();
|
||||
}
|
||||
|
||||
void Jit::SetRegisters(const std::array<u64, 31>& value) {
|
||||
impl->SetRegisters(value);
|
||||
}
|
||||
|
||||
Vector Jit::GetVector(size_t index) const {
|
||||
return impl->GetVector(index);
|
||||
}
|
||||
|
||||
void Jit::SetVector(size_t index, Vector value) {
|
||||
impl->SetVector(index, value);
|
||||
}
|
||||
|
||||
std::array<Vector, 32> Jit::GetVectors() const {
|
||||
return impl->GetVectors();
|
||||
}
|
||||
|
||||
void Jit::SetVectors(const std::array<Vector, 32>& value) {
|
||||
impl->SetVectors(value);
|
||||
}
|
||||
|
||||
u32 Jit::GetFpcr() const {
|
||||
return impl->GetFpcr();
|
||||
}
|
||||
|
||||
void Jit::SetFpcr(u32 value) {
|
||||
impl->SetFpcr(value);
|
||||
}
|
||||
|
||||
u32 Jit::GetFpsr() const {
|
||||
return impl->GetFpsr();
|
||||
}
|
||||
|
||||
void Jit::SetFpsr(u32 value) {
|
||||
impl->SetFpsr(value);
|
||||
}
|
||||
|
||||
u32 Jit::GetPstate() const {
|
||||
return impl->GetPstate();
|
||||
}
|
||||
|
||||
void Jit::SetPstate(u32 value) {
|
||||
impl->SetPstate(value);
|
||||
}
|
||||
|
||||
void Jit::ClearExclusiveState() {
|
||||
impl->ClearExclusiveState();
|
||||
}
|
||||
|
||||
bool Jit::IsExecuting() const {
|
||||
return impl->IsExecuting();
|
||||
}
|
||||
|
||||
std::string Jit::Disassemble() const {
|
||||
return impl->Disassemble();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A64
|
||||
121
src/backend/x64/a64_jitstate.cpp
Normal file
121
src/backend/x64/a64_jitstate.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend/x64/a64_jitstate.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "frontend/A64/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
u64 A64JitState::GetUniqueHash() const {
|
||||
u64 fpcr_u64 = static_cast<u64>(fpcr & A64::LocationDescriptor::FPCR_MASK) << 37;
|
||||
u64 pc_u64 = pc & A64::LocationDescriptor::PC_MASK;
|
||||
return pc_u64 | fpcr_u64;
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparing MXCSR and FPCR
|
||||
* ========================
|
||||
*
|
||||
* SSE MSCSR exception masks
|
||||
* -------------------------
|
||||
* PM bit 12 Precision Mask
|
||||
* UM bit 11 Underflow Mask
|
||||
* OM bit 10 Overflow Mask
|
||||
* ZM bit 9 Divide By Zero Mask
|
||||
* DM bit 8 Denormal Mask
|
||||
* IM bit 7 Invalid Operation Mask
|
||||
*
|
||||
* A64 FPCR exception trap enables
|
||||
* -------------------------------
|
||||
* IDE bit 15 Input Denormal exception trap enable
|
||||
* IXE bit 12 Inexact exception trap enable
|
||||
* UFE bit 11 Underflow exception trap enable
|
||||
* OFE bit 10 Overflow exception trap enable
|
||||
* DZE bit 9 Division by Zero exception trap enable
|
||||
* IOE bit 8 Invalid Operation exception trap enable
|
||||
*
|
||||
* SSE MXCSR mode bits
|
||||
* -------------------
|
||||
* FZ bit 15 Flush To Zero
|
||||
* DAZ bit 6 Denormals Are Zero
|
||||
* RN bits 13-14 Round to {0 = Nearest, 1 = Negative, 2 = Positive, 3 = Zero}
|
||||
*
|
||||
* A64 FPCR mode bits
|
||||
* ------------------
|
||||
* AHP bit 26 Alternative half-precision
|
||||
* DN bit 25 Default NaN
|
||||
* FZ bit 24 Flush to Zero
|
||||
* RMode bits 22-23 Round to {0 = Nearest, 1 = Positive, 2 = Negative, 3 = Zero}
|
||||
* FZ16 bit 19 Flush to Zero for half-precision
|
||||
*/
|
||||
|
||||
constexpr u32 FPCR_MASK = 0x07C89F00;
|
||||
|
||||
u32 A64JitState::GetFpcr() const {
|
||||
return fpcr;
|
||||
}
|
||||
|
||||
void A64JitState::SetFpcr(u32 value) {
|
||||
fpcr = value & FPCR_MASK;
|
||||
|
||||
guest_MXCSR &= 0x0000003D;
|
||||
guest_MXCSR |= 0x00001f80; // Mask all exceptions
|
||||
|
||||
// RMode
|
||||
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
|
||||
guest_MXCSR |= MXCSR_RMode[(value >> 22) & 0x3];
|
||||
|
||||
if (Common::Bit<24>(value)) {
|
||||
guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
||||
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Comparing MXCSR and FPSR
|
||||
* ========================
|
||||
*
|
||||
* SSE MXCSR exception flags
|
||||
* -------------------------
|
||||
* PE bit 5 Precision Flag
|
||||
* UE bit 4 Underflow Flag
|
||||
* OE bit 3 Overflow Flag
|
||||
* ZE bit 2 Divide By Zero Flag
|
||||
* DE bit 1 Denormal Flag // Appears to only be set when MXCSR.DAZ = 0
|
||||
* IE bit 0 Invalid Operation Flag
|
||||
*
|
||||
* A64 FPSR cumulative exception bits
|
||||
* ----------------------------------
|
||||
* QC bit 27 Cumulative saturation bit
|
||||
* IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPCR.FTZ = 1
|
||||
* IXC bit 4 Inexact cumulative exception bit
|
||||
* UFC bit 3 Underflow cumulative exception bit
|
||||
* OFC bit 2 Overflow cumulative exception bit
|
||||
* DZC bit 1 Division by Zero cumulative exception bit
|
||||
* IOC bit 0 Invalid Operation cumulative exception bit
|
||||
*/
|
||||
|
||||
u32 A64JitState::GetFpsr() const {
|
||||
u32 fpsr = 0;
|
||||
fpsr |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
||||
fpsr |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||
fpsr |= FPSCR_IDC;
|
||||
fpsr |= FPSCR_UFC;
|
||||
fpsr |= fpsr_exc;
|
||||
fpsr |= (fpsr_qc & 1) << 27;
|
||||
return fpsr;
|
||||
}
|
||||
|
||||
void A64JitState::SetFpsr(u32 value) {
|
||||
guest_MXCSR &= ~0x0000003D;
|
||||
FPSCR_IDC = 0;
|
||||
FPSCR_UFC = 0;
|
||||
fpsr_qc = (value >> 27) & 1;
|
||||
fpsr_exc = value & 0x9F;
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
93
src/backend/x64/a64_jitstate.h
Normal file
93
src/backend/x64/a64_jitstate.h
Normal file
@@ -0,0 +1,93 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
class BlockOfCode;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4324) // Structure was padded due to alignment specifier
|
||||
#endif
|
||||
|
||||
struct A64JitState {
|
||||
using ProgramCounterType = u64;
|
||||
|
||||
A64JitState() { ResetRSB(); }
|
||||
|
||||
std::array<u64, 31> reg{};
|
||||
u64 sp = 0;
|
||||
u64 pc = 0;
|
||||
|
||||
u32 CPSR_nzcv = 0;
|
||||
|
||||
u32 GetPstate() const {
|
||||
return CPSR_nzcv;
|
||||
}
|
||||
void SetPstate(u32 new_pstate) {
|
||||
CPSR_nzcv = new_pstate & 0xF0000000;
|
||||
}
|
||||
|
||||
alignas(16) std::array<u64, 64> vec{}; // Extension registers.
|
||||
|
||||
static constexpr size_t SpillCount = 64;
|
||||
alignas(16) std::array<std::array<u64, 2>, SpillCount> spill{}; // Spill.
|
||||
static Xbyak::Address GetSpillLocationFromIndex(size_t i) {
|
||||
using namespace Xbyak::util;
|
||||
return xword[r15 + offsetof(A64JitState, spill) + i * sizeof(u64) * 2];
|
||||
}
|
||||
|
||||
// For internal use (See: BlockOfCode::RunCode)
|
||||
u32 guest_MXCSR = 0x00001f80;
|
||||
u32 save_host_MXCSR = 0;
|
||||
s64 cycles_to_run = 0;
|
||||
s64 cycles_remaining = 0;
|
||||
bool halt_requested = false;
|
||||
bool check_bit = false;
|
||||
|
||||
// Exclusive state
|
||||
static constexpr u64 RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFF0ull;
|
||||
u8 exclusive_state = 0;
|
||||
u64 exclusive_address = 0;
|
||||
|
||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||
static constexpr size_t RSBPtrMask = RSBSize - 1;
|
||||
u32 rsb_ptr = 0;
|
||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||
std::array<u64, RSBSize> rsb_codeptrs;
|
||||
void ResetRSB() {
|
||||
rsb_location_descriptors.fill(0xFFFFFFFFFFFFFFFFull);
|
||||
rsb_codeptrs.fill(0);
|
||||
}
|
||||
|
||||
u32 fpsr_exc = 0;
|
||||
u32 fpsr_qc = 0;
|
||||
u32 FPSCR_IDC = 0;
|
||||
u32 FPSCR_UFC = 0;
|
||||
u32 fpcr = 0;
|
||||
u32 GetFpcr() const;
|
||||
u32 GetFpsr() const;
|
||||
void SetFpcr(u32 new_fpcr);
|
||||
void SetFpsr(u32 new_fpcr);
|
||||
|
||||
u64 GetUniqueHash() const;
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
using CodePtr = const void*;
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
144
src/backend/x64/abi.cpp
Normal file
144
src/backend/x64/abi.cpp
Normal file
@@ -0,0 +1,144 @@
|
||||
// Copyright (C) 2003 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// 24th August 2016: This code was modified for Dynarmic.
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend/x64/abi.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/iterator_util.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
constexpr size_t GPR_SIZE = 8;
|
||||
constexpr size_t XMM_SIZE = 16;
|
||||
|
||||
struct FrameInfo {
|
||||
size_t stack_subtraction = 0;
|
||||
size_t xmm_offset = 0;
|
||||
};
|
||||
|
||||
static FrameInfo CalculateFrameInfo(size_t num_gprs, size_t num_xmms, size_t frame_size) {
|
||||
FrameInfo frame_info = {};
|
||||
|
||||
size_t rsp_alignment = 8; // We are always 8-byte aligned initially
|
||||
rsp_alignment -= num_gprs * GPR_SIZE;
|
||||
|
||||
if (num_xmms > 0) {
|
||||
frame_info.stack_subtraction = rsp_alignment & 0xF;
|
||||
frame_info.stack_subtraction += num_xmms * XMM_SIZE;
|
||||
}
|
||||
|
||||
size_t xmm_base = frame_info.stack_subtraction;
|
||||
|
||||
frame_info.stack_subtraction += frame_size;
|
||||
frame_info.stack_subtraction += ABI_SHADOW_SPACE;
|
||||
|
||||
rsp_alignment -= frame_info.stack_subtraction;
|
||||
frame_info.stack_subtraction += rsp_alignment & 0xF;
|
||||
|
||||
frame_info.xmm_offset = frame_info.stack_subtraction - xmm_base;
|
||||
|
||||
return frame_info;
|
||||
}
|
||||
|
||||
template<typename RegisterArrayT>
|
||||
void ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
|
||||
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
|
||||
|
||||
FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
for (HostLoc gpr : regs) {
|
||||
if (HostLocIsGPR(gpr)) {
|
||||
code.push(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.stack_subtraction != 0) {
|
||||
code.sub(rsp, u32(frame_info.stack_subtraction));
|
||||
}
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset;
|
||||
for (HostLoc xmm : regs) {
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
|
||||
xmm_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename RegisterArrayT>
|
||||
void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size, const RegisterArrayT& regs) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
|
||||
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
|
||||
|
||||
FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset;
|
||||
for (HostLoc xmm : regs) {
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
|
||||
xmm_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.stack_subtraction != 0) {
|
||||
code.add(rsp, u32(frame_info.stack_subtraction));
|
||||
}
|
||||
|
||||
for (HostLoc gpr : Common::Reverse(regs)) {
|
||||
if (HostLocIsGPR(gpr)) {
|
||||
code.pop(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
|
||||
ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
|
||||
}
|
||||
|
||||
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
|
||||
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLEE_SAVE);
|
||||
}
|
||||
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
|
||||
ABI_PushRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
|
||||
}
|
||||
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size) {
|
||||
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
|
||||
}
|
||||
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception) {
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, regs);
|
||||
}
|
||||
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception) {
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, regs);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
122
src/backend/x64/abi.h
Normal file
122
src/backend/x64/abi.h
Normal file
@@ -0,0 +1,122 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "backend/x64/hostloc.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
constexpr HostLoc ABI_RETURN = HostLoc::RAX;
|
||||
|
||||
constexpr HostLoc ABI_PARAM1 = HostLoc::RCX;
|
||||
constexpr HostLoc ABI_PARAM2 = HostLoc::RDX;
|
||||
constexpr HostLoc ABI_PARAM3 = HostLoc::R8;
|
||||
constexpr HostLoc ABI_PARAM4 = HostLoc::R9;
|
||||
|
||||
constexpr std::array<HostLoc, 13> ABI_ALL_CALLER_SAVE = {
|
||||
HostLoc::RAX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
HostLoc::R8,
|
||||
HostLoc::R9,
|
||||
HostLoc::R10,
|
||||
HostLoc::R11,
|
||||
HostLoc::XMM0,
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
};
|
||||
|
||||
constexpr std::array<HostLoc, 18> ABI_ALL_CALLEE_SAVE = {
|
||||
HostLoc::RBX,
|
||||
HostLoc::RSI,
|
||||
HostLoc::RDI,
|
||||
HostLoc::RBP,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
HostLoc::R15,
|
||||
HostLoc::XMM6,
|
||||
HostLoc::XMM7,
|
||||
HostLoc::XMM8,
|
||||
HostLoc::XMM9,
|
||||
HostLoc::XMM10,
|
||||
HostLoc::XMM11,
|
||||
HostLoc::XMM12,
|
||||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 32; // bytes
|
||||
|
||||
#else
|
||||
|
||||
constexpr HostLoc ABI_RETURN = HostLoc::RAX;
|
||||
|
||||
constexpr HostLoc ABI_PARAM1 = HostLoc::RDI;
|
||||
constexpr HostLoc ABI_PARAM2 = HostLoc::RSI;
|
||||
constexpr HostLoc ABI_PARAM3 = HostLoc::RDX;
|
||||
constexpr HostLoc ABI_PARAM4 = HostLoc::RCX;
|
||||
|
||||
constexpr std::array<HostLoc, 25> ABI_ALL_CALLER_SAVE = {
|
||||
HostLoc::RAX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
HostLoc::RDI,
|
||||
HostLoc::RSI,
|
||||
HostLoc::R8,
|
||||
HostLoc::R9,
|
||||
HostLoc::R10,
|
||||
HostLoc::R11,
|
||||
HostLoc::XMM0,
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
HostLoc::XMM6,
|
||||
HostLoc::XMM7,
|
||||
HostLoc::XMM8,
|
||||
HostLoc::XMM9,
|
||||
HostLoc::XMM10,
|
||||
HostLoc::XMM11,
|
||||
HostLoc::XMM12,
|
||||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
|
||||
constexpr std::array<HostLoc, 6> ABI_ALL_CALLEE_SAVE = {
|
||||
HostLoc::RBX,
|
||||
HostLoc::RBP,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
HostLoc::R15,
|
||||
};
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
|
||||
|
||||
#endif
|
||||
|
||||
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 31, "Invalid total number of registers");
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
|
||||
void ABI_PopCalleeSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStack(Xbyak::CodeGenerator& code, size_t frame_size = 0);
|
||||
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception);
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(Xbyak::CodeGenerator& code, HostLoc exception);
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
266
src/backend/x64/block_of_code.cpp
Normal file
266
src/backend/x64/block_of_code.cpp
Normal file
@@ -0,0 +1,266 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend/x64/a32_jitstate.h"
|
||||
#include "backend/x64/abi.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
#ifdef _WIN32
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_RETURN = Xbyak::util::rax;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = Xbyak::util::rcx;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM2 = Xbyak::util::rdx;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM3 = Xbyak::util::r8;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM4 = Xbyak::util::r9;
|
||||
const std::array<Xbyak::Reg64, 4> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4};
|
||||
#else
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_RETURN = Xbyak::util::rax;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_RETURN2 = Xbyak::util::rdx;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = Xbyak::util::rdi;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM2 = Xbyak::util::rsi;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM3 = Xbyak::util::rdx;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM4 = Xbyak::util::rcx;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM5 = Xbyak::util::r8;
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM6 = Xbyak::util::r9;
|
||||
const std::array<Xbyak::Reg64, 6> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4, BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6};
|
||||
#endif
|
||||
|
||||
constexpr size_t TOTAL_CODE_SIZE = 128 * 1024 * 1024;
|
||||
constexpr size_t FAR_CODE_OFFSET = 100 * 1024 * 1024;
|
||||
constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi)
|
||||
: Xbyak::CodeGenerator(TOTAL_CODE_SIZE)
|
||||
, cb(std::move(cb))
|
||||
, jsi(jsi)
|
||||
, constant_pool(*this, CONSTANT_POOL_SIZE)
|
||||
{
|
||||
GenRunCode();
|
||||
exception_handler.Register(*this);
|
||||
}
|
||||
|
||||
void BlockOfCode::PreludeComplete() {
|
||||
prelude_complete = true;
|
||||
near_code_begin = getCurr();
|
||||
far_code_begin = getCurr() + FAR_CODE_OFFSET;
|
||||
ClearCache();
|
||||
}
|
||||
|
||||
void BlockOfCode::ClearCache() {
|
||||
ASSERT(prelude_complete);
|
||||
in_far_code = false;
|
||||
near_code_ptr = near_code_begin;
|
||||
far_code_ptr = far_code_begin;
|
||||
SetCodePtr(near_code_begin);
|
||||
}
|
||||
|
||||
size_t BlockOfCode::SpaceRemaining() const {
|
||||
ASSERT(prelude_complete);
|
||||
// This function provides an underestimate of near-code-size but that's okay.
|
||||
// (Why? The maximum size of near code should be measured from near_code_begin, not top_.)
|
||||
// These are offsets from Xbyak::CodeArray::top_.
|
||||
std::size_t far_code_offset, near_code_offset;
|
||||
if (in_far_code) {
|
||||
near_code_offset = static_cast<const u8*>(near_code_ptr) - getCode();
|
||||
far_code_offset = getCurr() - getCode();
|
||||
} else {
|
||||
near_code_offset = getCurr() - getCode();
|
||||
far_code_offset = static_cast<const u8*>(far_code_ptr) - getCode();
|
||||
}
|
||||
if (far_code_offset > TOTAL_CODE_SIZE)
|
||||
return 0;
|
||||
if (near_code_offset > FAR_CODE_OFFSET)
|
||||
return 0;
|
||||
return std::min(TOTAL_CODE_SIZE - far_code_offset, FAR_CODE_OFFSET - near_code_offset);
|
||||
}
|
||||
|
||||
void BlockOfCode::RunCode(void* jit_state) const {
|
||||
run_code(jit_state);
|
||||
}
|
||||
|
||||
void BlockOfCode::RunCodeFrom(void* jit_state, CodePtr code_ptr) const {
|
||||
run_code_from(jit_state, code_ptr);
|
||||
}
|
||||
|
||||
void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) {
|
||||
size_t index = 0;
|
||||
if (mxcsr_already_exited)
|
||||
index |= MXCSR_ALREADY_EXITED;
|
||||
jmp(return_from_run_code[index]);
|
||||
}
|
||||
|
||||
void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) {
|
||||
size_t index = FORCE_RETURN;
|
||||
if (mxcsr_already_exited)
|
||||
index |= MXCSR_ALREADY_EXITED;
|
||||
jmp(return_from_run_code[index]);
|
||||
}
|
||||
|
||||
void BlockOfCode::GenRunCode() {
|
||||
Xbyak::Label loop, enter_mxcsr_then_loop;
|
||||
|
||||
align();
|
||||
run_code_from = getCurr<RunCodeFromFuncType>();
|
||||
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
|
||||
|
||||
mov(r15, ABI_PARAM1);
|
||||
mov(r14, ABI_PARAM2); // save temporarily in non-volatile register
|
||||
|
||||
cb.GetTicksRemaining->EmitCall(*this);
|
||||
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
|
||||
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
jmp(r14);
|
||||
|
||||
align();
|
||||
run_code = getCurr<RunCodeFuncType>();
|
||||
|
||||
// This serves two purposes:
|
||||
// 1. It saves all the registers we as a callee need to save.
|
||||
// 2. It aligns the stack so that the code the JIT emits can assume
|
||||
// that the stack is appropriately aligned for CALLs.
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this);
|
||||
|
||||
mov(r15, ABI_PARAM1);
|
||||
|
||||
cb.GetTicksRemaining->EmitCall(*this);
|
||||
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
|
||||
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
|
||||
|
||||
L(enter_mxcsr_then_loop);
|
||||
SwitchMxcsrOnEntry();
|
||||
L(loop);
|
||||
cb.LookupBlock->EmitCall(*this);
|
||||
|
||||
jmp(ABI_RETURN);
|
||||
|
||||
// Return from run code variants
|
||||
const auto emit_return_from_run_code = [this, &loop, &enter_mxcsr_then_loop](bool mxcsr_already_exited, bool force_return){
|
||||
if (!force_return) {
|
||||
cmp(qword[r15 + jsi.offsetof_cycles_remaining], 0);
|
||||
jg(mxcsr_already_exited ? enter_mxcsr_then_loop : loop);
|
||||
}
|
||||
|
||||
if (!mxcsr_already_exited) {
|
||||
SwitchMxcsrOnExit();
|
||||
}
|
||||
|
||||
cb.AddTicks->EmitCall(*this, [this](RegList param) {
|
||||
mov(param[0], qword[r15 + jsi.offsetof_cycles_to_run]);
|
||||
sub(param[0], qword[r15 + jsi.offsetof_cycles_remaining]);
|
||||
});
|
||||
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this);
|
||||
ret();
|
||||
};
|
||||
|
||||
align();
|
||||
return_from_run_code[0] = getCurr<const void*>();
|
||||
emit_return_from_run_code(false, false);
|
||||
|
||||
align();
|
||||
return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
|
||||
emit_return_from_run_code(true, false);
|
||||
|
||||
align();
|
||||
return_from_run_code[FORCE_RETURN] = getCurr<const void*>();
|
||||
emit_return_from_run_code(false, true);
|
||||
|
||||
align();
|
||||
return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>();
|
||||
emit_return_from_run_code(true, true);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||
stmxcsr(dword[r15 + jsi.offsetof_save_host_MXCSR]);
|
||||
ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnExit() {
|
||||
stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[r15 + jsi.offsetof_save_host_MXCSR]);
|
||||
}
|
||||
|
||||
void BlockOfCode::UpdateTicks() {
|
||||
cb.AddTicks->EmitCall(*this, [this](RegList param) {
|
||||
mov(param[0], qword[r15 + jsi.offsetof_cycles_to_run]);
|
||||
sub(param[0], qword[r15 + jsi.offsetof_cycles_remaining]);
|
||||
});
|
||||
|
||||
cb.GetTicksRemaining->EmitCall(*this);
|
||||
mov(qword[r15 + jsi.offsetof_cycles_to_run], ABI_RETURN);
|
||||
mov(qword[r15 + jsi.offsetof_cycles_remaining], ABI_RETURN);
|
||||
}
|
||||
|
||||
Xbyak::Address BlockOfCode::MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
||||
return constant_pool.GetConstant(frame, lower, upper);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchToFarCode() {
|
||||
ASSERT(prelude_complete);
|
||||
ASSERT(!in_far_code);
|
||||
in_far_code = true;
|
||||
near_code_ptr = getCurr();
|
||||
SetCodePtr(far_code_ptr);
|
||||
|
||||
ASSERT_MSG(near_code_ptr < far_code_begin, "Near code has overwritten far code!");
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchToNearCode() {
|
||||
ASSERT(prelude_complete);
|
||||
ASSERT(in_far_code);
|
||||
in_far_code = false;
|
||||
far_code_ptr = getCurr();
|
||||
SetCodePtr(near_code_ptr);
|
||||
}
|
||||
|
||||
CodePtr BlockOfCode::GetCodeBegin() const {
|
||||
return near_code_begin;
|
||||
}
|
||||
|
||||
void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
|
||||
if (size_ + alloc_size >= maxSize_) {
|
||||
throw Xbyak::Error(Xbyak::ERR_CODE_IS_TOO_BIG);
|
||||
}
|
||||
|
||||
void* ret = getCurr<void*>();
|
||||
size_ += alloc_size;
|
||||
memset(ret, 0, alloc_size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
|
||||
// The "size" defines where top_, the insertion point, is.
|
||||
size_t required_size = reinterpret_cast<const u8*>(code_ptr) - getCode();
|
||||
setSize(required_size);
|
||||
}
|
||||
|
||||
void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
|
||||
size_t current_size = getCurr<const u8*>() - reinterpret_cast<const u8*>(begin);
|
||||
ASSERT(current_size <= size);
|
||||
nop(size - current_size);
|
||||
}
|
||||
|
||||
bool BlockOfCode::DoesCpuSupport(Xbyak::util::Cpu::Type type) const {
|
||||
#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
|
||||
return cpu_info.has(type);
|
||||
#else
|
||||
(void)type;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
165
src/backend/x64/block_of_code.h
Normal file
165
src/backend/x64/block_of_code.h
Normal file
@@ -0,0 +1,165 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
#include <xbyak.h>
|
||||
#include <xbyak_util.h>
|
||||
|
||||
#include "backend/x64/callback.h"
|
||||
#include "backend/x64/constant_pool.h"
|
||||
#include "backend/x64/jitstate_info.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
using CodePtr = const void*;
|
||||
|
||||
struct RunCodeCallbacks {
|
||||
std::unique_ptr<Callback> LookupBlock;
|
||||
std::unique_ptr<Callback> AddTicks;
|
||||
std::unique_ptr<Callback> GetTicksRemaining;
|
||||
};
|
||||
|
||||
class BlockOfCode final : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi);
|
||||
/// Call when external emitters have finished emitting their preludes.
|
||||
void PreludeComplete();
|
||||
|
||||
/// Clears this block of code and resets code pointer to beginning.
|
||||
void ClearCache();
|
||||
/// Calculates how much space is remaining to use. This is the minimum of near code and far code.
|
||||
size_t SpaceRemaining() const;
|
||||
|
||||
/// Runs emulated code.
|
||||
void RunCode(void* jit_state) const;
|
||||
/// Runs emulated code from code_ptr.
|
||||
void RunCodeFrom(void* jit_state, CodePtr code_ptr) const;
|
||||
/// Code emitter: Returns to dispatcher
|
||||
void ReturnFromRunCode(bool mxcsr_already_exited = false);
|
||||
/// Code emitter: Returns to dispatcher, forces return to host
|
||||
void ForceReturnFromRunCode(bool mxcsr_already_exited = false);
|
||||
/// Code emitter: Makes guest MXCSR the current MXCSR
|
||||
void SwitchMxcsrOnEntry();
|
||||
/// Code emitter: Makes saved host MXCSR the current MXCSR
|
||||
void SwitchMxcsrOnExit();
|
||||
/// Code emitter: Updates cycles remaining my calling cb.AddTicks and cb.GetTicksRemaining
|
||||
/// @note this clobbers ABI callee-save registers
|
||||
void UpdateTicks();
|
||||
|
||||
/// Code emitter: Calls the function
|
||||
template <typename FunctionPointer>
|
||||
void CallFunction(FunctionPointer fn) {
|
||||
static_assert(std::is_pointer<FunctionPointer>() && std::is_function<std::remove_pointer_t<FunctionPointer>>(),
|
||||
"Supplied type must be a pointer to a function");
|
||||
|
||||
const u64 address = reinterpret_cast<u64>(fn);
|
||||
const u64 distance = address - (getCurr<u64>() + 5);
|
||||
|
||||
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
mov(rax, address);
|
||||
call(rax);
|
||||
} else {
|
||||
call(fn);
|
||||
}
|
||||
}
|
||||
|
||||
Xbyak::Address MConst(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
|
||||
|
||||
/// Far code sits far away from the near code. Execution remains primarily in near code.
|
||||
/// "Cold" / Rarely executed instructions sit in far code, so the CPU doesn't fetch them unless necessary.
|
||||
void SwitchToFarCode();
|
||||
void SwitchToNearCode();
|
||||
|
||||
CodePtr GetCodeBegin() const;
|
||||
|
||||
const void* GetReturnFromRunCodeAddress() const {
|
||||
return return_from_run_code[0];
|
||||
}
|
||||
|
||||
const void* GetForceReturnFromRunCodeAddress() const {
|
||||
return return_from_run_code[FORCE_RETURN];
|
||||
}
|
||||
|
||||
void int3() { db(0xCC); }
|
||||
|
||||
/// Allocate memory of `size` bytes from the same block of memory the code is in.
|
||||
/// This is useful for objects that need to be placed close to or within code.
|
||||
/// The lifetime of this memory is the same as the code around it.
|
||||
void* AllocateFromCodeSpace(size_t size);
|
||||
|
||||
void SetCodePtr(CodePtr code_ptr);
|
||||
void EnsurePatchLocationSize(CodePtr begin, size_t size);
|
||||
|
||||
// ABI registers
|
||||
#ifdef _WIN32
|
||||
static const Xbyak::Reg64 ABI_RETURN;
|
||||
static const Xbyak::Reg64 ABI_PARAM1;
|
||||
static const Xbyak::Reg64 ABI_PARAM2;
|
||||
static const Xbyak::Reg64 ABI_PARAM3;
|
||||
static const Xbyak::Reg64 ABI_PARAM4;
|
||||
static const std::array<Xbyak::Reg64, 4> ABI_PARAMS;
|
||||
#else
|
||||
static const Xbyak::Reg64 ABI_RETURN;
|
||||
static const Xbyak::Reg64 ABI_RETURN2;
|
||||
static const Xbyak::Reg64 ABI_PARAM1;
|
||||
static const Xbyak::Reg64 ABI_PARAM2;
|
||||
static const Xbyak::Reg64 ABI_PARAM3;
|
||||
static const Xbyak::Reg64 ABI_PARAM4;
|
||||
static const Xbyak::Reg64 ABI_PARAM5;
|
||||
static const Xbyak::Reg64 ABI_PARAM6;
|
||||
static const std::array<Xbyak::Reg64, 6> ABI_PARAMS;
|
||||
#endif
|
||||
|
||||
bool DoesCpuSupport(Xbyak::util::Cpu::Type type) const;
|
||||
|
||||
JitStateInfo GetJitStateInfo() const { return jsi; }
|
||||
|
||||
private:
|
||||
RunCodeCallbacks cb;
|
||||
JitStateInfo jsi;
|
||||
|
||||
bool prelude_complete = false;
|
||||
CodePtr near_code_begin;
|
||||
CodePtr far_code_begin;
|
||||
|
||||
ConstantPool constant_pool;
|
||||
|
||||
bool in_far_code = false;
|
||||
CodePtr near_code_ptr;
|
||||
CodePtr far_code_ptr;
|
||||
|
||||
using RunCodeFuncType = void(*)(void*);
|
||||
using RunCodeFromFuncType = void(*)(void*, CodePtr);
|
||||
RunCodeFuncType run_code = nullptr;
|
||||
RunCodeFromFuncType run_code_from = nullptr;
|
||||
static constexpr size_t MXCSR_ALREADY_EXITED = 1 << 0;
|
||||
static constexpr size_t FORCE_RETURN = 1 << 1;
|
||||
std::array<const void*, 4> return_from_run_code;
|
||||
void GenRunCode();
|
||||
|
||||
class ExceptionHandler final {
|
||||
public:
|
||||
ExceptionHandler();
|
||||
~ExceptionHandler();
|
||||
|
||||
void Register(BlockOfCode& code);
|
||||
private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
};
|
||||
ExceptionHandler exception_handler;
|
||||
|
||||
Xbyak::util::Cpu cpu_info;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
45
src/backend/x64/block_range_information.cpp
Normal file
45
src/backend/x64/block_range_information.cpp
Normal file
@@ -0,0 +1,45 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
|
||||
#include "backend/x64/block_range_information.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
template <typename ProgramCounterType>
|
||||
void BlockRangeInformation<ProgramCounterType>::AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location) {
|
||||
block_ranges.add(std::make_pair(range, std::set<IR::LocationDescriptor>{location}));
|
||||
}
|
||||
|
||||
template <typename ProgramCounterType>
|
||||
void BlockRangeInformation<ProgramCounterType>::ClearCache() {
|
||||
block_ranges.clear();
|
||||
}
|
||||
|
||||
template <typename ProgramCounterType>
|
||||
std::unordered_set<IR::LocationDescriptor> BlockRangeInformation<ProgramCounterType>::InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges) {
|
||||
std::unordered_set<IR::LocationDescriptor> erase_locations;
|
||||
for (auto invalidate_interval : ranges) {
|
||||
auto pair = block_ranges.equal_range(invalidate_interval);
|
||||
for (auto it = pair.first; it != pair.second; ++it) {
|
||||
for (const auto &descriptor : it->second) {
|
||||
erase_locations.insert(descriptor);
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: EFFICIENCY: Remove ranges that are to be erased.
|
||||
return erase_locations;
|
||||
}
|
||||
|
||||
template class BlockRangeInformation<u32>;
|
||||
template class BlockRangeInformation<u64>;
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
29
src/backend/x64/block_range_information.h
Normal file
29
src/backend/x64/block_range_information.h
Normal file
@@ -0,0 +1,29 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
|
||||
#include "frontend/ir/location_descriptor.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
template <typename ProgramCounterType>
|
||||
class BlockRangeInformation {
|
||||
public:
|
||||
void AddRange(boost::icl::discrete_interval<ProgramCounterType> range, IR::LocationDescriptor location);
|
||||
void ClearCache();
|
||||
std::unordered_set<IR::LocationDescriptor> InvalidateRanges(const boost::icl::interval_set<ProgramCounterType>& ranges);
|
||||
|
||||
private:
|
||||
boost::icl::interval_map<ProgramCounterType, std::set<IR::LocationDescriptor>> block_ranges;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
41
src/backend/x64/callback.cpp
Normal file
41
src/backend/x64/callback.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend/x64/callback.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
Callback::~Callback() = default;
|
||||
|
||||
void SimpleCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
|
||||
l({code.ABI_PARAM1, code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
|
||||
code.CallFunction(fn);
|
||||
}
|
||||
|
||||
void SimpleCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> l) const {
|
||||
l(code.ABI_PARAM1, {code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
|
||||
code.CallFunction(fn);
|
||||
}
|
||||
|
||||
void ArgCallback::EmitCall(BlockOfCode& code, std::function<void(RegList)> l) const {
|
||||
l({code.ABI_PARAM2, code.ABI_PARAM3, code.ABI_PARAM4});
|
||||
code.mov(code.ABI_PARAM1, arg);
|
||||
code.CallFunction(fn);
|
||||
}
|
||||
|
||||
void ArgCallback::EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> l) const {
|
||||
#if defined(WIN32) && !defined(__MINGW64__)
|
||||
l(code.ABI_PARAM2, {code.ABI_PARAM3, code.ABI_PARAM4});
|
||||
code.mov(code.ABI_PARAM1, arg);
|
||||
#else
|
||||
l(code.ABI_PARAM1, {code.ABI_PARAM3, code.ABI_PARAM4});
|
||||
code.mov(code.ABI_PARAM2, arg);
|
||||
#endif
|
||||
code.CallFunction(fn);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
55
src/backend/x64/callback.h
Normal file
55
src/backend/x64/callback.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
using RegList = std::vector<Xbyak::Reg64>;
|
||||
|
||||
class BlockOfCode;
|
||||
|
||||
class Callback {
|
||||
public:
|
||||
virtual ~Callback();
|
||||
|
||||
virtual void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const = 0;
|
||||
virtual void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const = 0;
|
||||
};
|
||||
|
||||
class SimpleCallback final : public Callback {
|
||||
public:
|
||||
template <typename Function>
|
||||
SimpleCallback(Function fn) : fn(reinterpret_cast<void(*)()>(fn)) {}
|
||||
|
||||
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override;
|
||||
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
|
||||
|
||||
private:
|
||||
void (*fn)();
|
||||
};
|
||||
|
||||
class ArgCallback final : public Callback {
|
||||
public:
|
||||
template <typename Function>
|
||||
ArgCallback(Function fn, u64 arg) : fn(reinterpret_cast<void(*)()>(fn)), arg(arg) {}
|
||||
|
||||
void EmitCall(BlockOfCode& code, std::function<void(RegList)> fn = [](RegList){}) const override;
|
||||
void EmitCallWithReturnPointer(BlockOfCode& code, std::function<void(Xbyak::Reg64, RegList)> fn) const override;
|
||||
|
||||
private:
|
||||
void (*fn)();
|
||||
u64 arg;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
35
src/backend/x64/constant_pool.cpp
Normal file
35
src/backend/x64/constant_pool.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/constant_pool.h"
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
ConstantPool::ConstantPool(BlockOfCode& code, size_t size) : code(code), pool_size(size) {
|
||||
code.int3();
|
||||
code.align(align_size);
|
||||
pool_begin = reinterpret_cast<u8*>(code.AllocateFromCodeSpace(size));
|
||||
current_pool_ptr = pool_begin;
|
||||
}
|
||||
|
||||
Xbyak::Address ConstantPool::GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
|
||||
const auto constant = std::make_tuple(lower, upper);
|
||||
auto iter = constant_info.find(constant);
|
||||
if (iter == constant_info.end()) {
|
||||
ASSERT(static_cast<size_t>(current_pool_ptr - pool_begin) < pool_size);
|
||||
std::memcpy(current_pool_ptr, &lower, sizeof(u64));
|
||||
std::memcpy(current_pool_ptr + sizeof(u64), &upper, sizeof(u64));
|
||||
iter = constant_info.emplace(constant, current_pool_ptr).first;
|
||||
current_pool_ptr += align_size;
|
||||
}
|
||||
return frame[code.rip + iter->second];
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
40
src/backend/x64/constant_pool.h
Normal file
40
src/backend/x64/constant_pool.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
class BlockOfCode;
|
||||
|
||||
/// ConstantPool allocates a block of memory from BlockOfCode.
|
||||
/// It places constants into this block of memory, returning the address
|
||||
/// of the memory location where the constant is placed. If the constant
|
||||
/// already exists, its memory location is reused.
|
||||
class ConstantPool final {
|
||||
public:
|
||||
ConstantPool(BlockOfCode& code, size_t size);
|
||||
|
||||
Xbyak::Address GetConstant(const Xbyak::AddressFrame& frame, u64 lower, u64 upper = 0);
|
||||
|
||||
private:
|
||||
static constexpr size_t align_size = 16; // bytes
|
||||
|
||||
std::map<std::tuple<u64, u64>, void*> constant_info;
|
||||
|
||||
BlockOfCode& code;
|
||||
size_t pool_size;
|
||||
u8* pool_begin;
|
||||
u8* current_pool_ptr;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
82
src/backend/x64/devirtualize.h
Normal file
82
src/backend/x64/devirtualize.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include "backend/x64/callback.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/cast_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/mp/function_info.h"
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
namespace impl {
|
||||
|
||||
template <typename FunctionType, FunctionType mfp>
|
||||
struct ThunkBuilder;
|
||||
|
||||
template <typename C, typename R, typename... Args, R(C::*mfp)(Args...)>
|
||||
struct ThunkBuilder<R(C::*)(Args...), mfp> {
|
||||
static R Thunk(C* this_, Args... args) {
|
||||
return (this_->*mfp)(std::forward<Args>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace impl
|
||||
|
||||
template<auto mfp>
|
||||
ArgCallback DevirtualizeGeneric(Common::mp::class_type_t<decltype(mfp)>* this_) {
|
||||
return ArgCallback{&impl::ThunkBuilder<decltype(mfp), mfp>::Thunk, reinterpret_cast<u64>(this_)};
|
||||
}
|
||||
|
||||
template<auto mfp>
|
||||
ArgCallback DevirtualizeWindows(Common::mp::class_type_t<decltype(mfp)>* this_) {
|
||||
static_assert(sizeof(mfp) == 8);
|
||||
return ArgCallback{Common::BitCast<u64>(mfp), reinterpret_cast<u64>(this_)};
|
||||
}
|
||||
|
||||
template<auto mfp>
|
||||
ArgCallback DevirtualizeItanium(Common::mp::class_type_t<decltype(mfp)>* this_) {
|
||||
struct MemberFunctionPointer {
|
||||
/// For a non-virtual function, this is a simple function pointer.
|
||||
/// For a virtual function, it is (1 + virtual table offset in bytes).
|
||||
u64 ptr;
|
||||
/// The required adjustment to `this`, prior to the call.
|
||||
u64 adj;
|
||||
} mfp_struct = Common::BitCast<MemberFunctionPointer>(mfp);
|
||||
|
||||
static_assert(sizeof(MemberFunctionPointer) == 16);
|
||||
static_assert(sizeof(MemberFunctionPointer) == sizeof(mfp));
|
||||
|
||||
u64 fn_ptr = mfp_struct.ptr;
|
||||
u64 this_ptr = reinterpret_cast<u64>(this_) + mfp_struct.adj;
|
||||
if (mfp_struct.ptr & 1) {
|
||||
u64 vtable = Common::BitCastPointee<u64>(this_ptr);
|
||||
fn_ptr = Common::BitCastPointee<u64>(vtable + fn_ptr - 1);
|
||||
}
|
||||
return ArgCallback{fn_ptr, this_ptr};
|
||||
}
|
||||
|
||||
template<auto mfp>
|
||||
ArgCallback Devirtualize(Common::mp::class_type_t<decltype(mfp)>* this_) {
|
||||
#if defined(__APPLE__) || defined(linux) || defined(__linux) || defined(__linux__)
|
||||
return DevirtualizeItanium<mfp>(this_);
|
||||
#elif defined(__MINGW64__)
|
||||
return DevirtualizeItanium<mfp>(this_);
|
||||
#elif defined(_WIN32)
|
||||
return DevirtualizeWindows<mfp>(this_);
|
||||
#else
|
||||
return DevirtualizeGeneric<mfp>(this_);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace BackendX64
|
||||
} // namespace Dynarmic
|
||||
343
src/backend/x64/emit_x64.cpp
Normal file
343
src/backend/x64/emit_x64.cpp
Normal file
@@ -0,0 +1,343 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/variant_util.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
||||
// TODO: Have ARM flags in host flags and not have them use up GPR registers unless necessary.
|
||||
// TODO: Actually implement that proper instruction selector you've always wanted to sweetheart.
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
||||
: reg_alloc(reg_alloc), block(block) {}
|
||||
|
||||
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||
block.Instructions().erase(inst);
|
||||
inst->ClearArgs();
|
||||
}
|
||||
|
||||
EmitX64::EmitX64(BlockOfCode& code)
|
||||
: code(code) {}
|
||||
|
||||
EmitX64::~EmitX64() = default;
|
||||
|
||||
boost::optional<typename EmitX64::BlockDescriptor> EmitX64::GetBasicBlock(IR::LocationDescriptor descriptor) const {
|
||||
auto iter = block_descriptors.find(descriptor);
|
||||
if (iter == block_descriptors.end())
|
||||
return boost::none;
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
void EmitX64::EmitVoid(EmitContext&, IR::Inst*) {
|
||||
}
|
||||
|
||||
void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) {
|
||||
code.int3();
|
||||
}
|
||||
|
||||
void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (!args[0].IsImmediate()) {
|
||||
ctx.reg_alloc.DefineValue(inst, args[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
auto iter = block_descriptors.find(target);
|
||||
CodePtr target_code_ptr = iter != block_descriptors.end()
|
||||
? iter->second.entrypoint
|
||||
: code.GetReturnFromRunCodeAddress();
|
||||
|
||||
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
|
||||
|
||||
code.mov(loc_desc_reg, target.Value());
|
||||
|
||||
patch_information[target].mov_rcx.emplace_back(code.getCurr());
|
||||
EmitPatchMovRcx(target_code_ptr);
|
||||
|
||||
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
|
||||
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
|
||||
|
||||
code.add(index_reg.cvt32(), 1);
|
||||
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask));
|
||||
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
|
||||
}
|
||||
|
||||
void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ASSERT(args[0].IsImmediate());
|
||||
u64 unique_hash_of_target = args[0].GetImmediateU64();
|
||||
|
||||
ctx.reg_alloc.ScratchGpr({HostLoc::RCX});
|
||||
Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
|
||||
}
|
||||
|
||||
void EmitX64::EmitGetCarryFromOp(EmitContext&, IR::Inst*) {
|
||||
ASSERT_MSG(false, "should never happen");
|
||||
}
|
||||
|
||||
void EmitX64::EmitGetOverflowFromOp(EmitContext&, IR::Inst*) {
|
||||
ASSERT_MSG(false, "should never happen");
|
||||
}
|
||||
|
||||
void EmitX64::EmitGetGEFromOp(EmitContext&, IR::Inst*) {
|
||||
ASSERT_MSG(false, "should never happen");
|
||||
}
|
||||
|
||||
void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const int bitsize = [&]{
|
||||
switch (args[0].GetType()) {
|
||||
case IR::Type::U8:
|
||||
return 8;
|
||||
case IR::Type::U16:
|
||||
return 16;
|
||||
case IR::Type::U32:
|
||||
return 32;
|
||||
case IR::Type::U64:
|
||||
return 64;
|
||||
default:
|
||||
ASSERT_MSG(false, "Unreachable");
|
||||
return 0;
|
||||
}
|
||||
}();
|
||||
|
||||
Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr({HostLoc::RAX});
|
||||
Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
|
||||
code.cmp(value, 0);
|
||||
code.lahf();
|
||||
code.seto(code.al);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
}
|
||||
|
||||
void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsImmediate()) {
|
||||
Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
u32 value = 0;
|
||||
value |= Common::Bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
|
||||
value |= Common::Bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
|
||||
value |= Common::Bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
|
||||
value |= Common::Bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
|
||||
code.mov(nzcv, value);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
} else {
|
||||
Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
// TODO: Optimize
|
||||
code.shr(nzcv, 28);
|
||||
code.imul(nzcv, nzcv, 0b00010000'10000001);
|
||||
code.and_(nzcv.cvt8(), 1);
|
||||
ctx.reg_alloc.DefineValue(inst, nzcv);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitAddCycles(size_t cycles) {
|
||||
ASSERT(cycles < std::numeric_limits<u32>::max());
|
||||
code.sub(qword[r15 + code.GetJitStateInfo().offsetof_cycles_remaining], static_cast<u32>(cycles));
|
||||
}
|
||||
|
||||
Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
|
||||
Xbyak::Label label;
|
||||
|
||||
const Xbyak::Reg32 cpsr = eax;
|
||||
code.mov(cpsr, dword[r15 + code.GetJitStateInfo().offsetof_CPSR_nzcv]);
|
||||
|
||||
constexpr size_t n_shift = 31;
|
||||
constexpr size_t z_shift = 30;
|
||||
constexpr size_t c_shift = 29;
|
||||
constexpr size_t v_shift = 28;
|
||||
constexpr u32 n_mask = 1u << n_shift;
|
||||
constexpr u32 z_mask = 1u << z_shift;
|
||||
constexpr u32 c_mask = 1u << c_shift;
|
||||
constexpr u32 v_mask = 1u << v_shift;
|
||||
|
||||
switch (cond) {
|
||||
case IR::Cond::EQ: //z
|
||||
code.test(cpsr, z_mask);
|
||||
code.jnz(label);
|
||||
break;
|
||||
case IR::Cond::NE: //!z
|
||||
code.test(cpsr, z_mask);
|
||||
code.jz(label);
|
||||
break;
|
||||
case IR::Cond::CS: //c
|
||||
code.test(cpsr, c_mask);
|
||||
code.jnz(label);
|
||||
break;
|
||||
case IR::Cond::CC: //!c
|
||||
code.test(cpsr, c_mask);
|
||||
code.jz(label);
|
||||
break;
|
||||
case IR::Cond::MI: //n
|
||||
code.test(cpsr, n_mask);
|
||||
code.jnz(label);
|
||||
break;
|
||||
case IR::Cond::PL: //!n
|
||||
code.test(cpsr, n_mask);
|
||||
code.jz(label);
|
||||
break;
|
||||
case IR::Cond::VS: //v
|
||||
code.test(cpsr, v_mask);
|
||||
code.jnz(label);
|
||||
break;
|
||||
case IR::Cond::VC: //!v
|
||||
code.test(cpsr, v_mask);
|
||||
code.jz(label);
|
||||
break;
|
||||
case IR::Cond::HI: { //c & !z
|
||||
code.and_(cpsr, z_mask | c_mask);
|
||||
code.cmp(cpsr, c_mask);
|
||||
code.je(label);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::LS: { //!c | z
|
||||
code.and_(cpsr, z_mask | c_mask);
|
||||
code.cmp(cpsr, c_mask);
|
||||
code.jne(label);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::GE: { // n == v
|
||||
code.and_(cpsr, n_mask | v_mask);
|
||||
code.jz(label);
|
||||
code.cmp(cpsr, n_mask | v_mask);
|
||||
code.je(label);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::LT: { // n != v
|
||||
Xbyak::Label fail;
|
||||
code.and_(cpsr, n_mask | v_mask);
|
||||
code.jz(fail);
|
||||
code.cmp(cpsr, n_mask | v_mask);
|
||||
code.jne(label);
|
||||
code.L(fail);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::GT: { // !z & (n == v)
|
||||
const Xbyak::Reg32 tmp1 = ebx;
|
||||
const Xbyak::Reg32 tmp2 = esi;
|
||||
code.mov(tmp1, cpsr);
|
||||
code.mov(tmp2, cpsr);
|
||||
code.shr(tmp1, n_shift);
|
||||
code.shr(tmp2, v_shift);
|
||||
code.shr(cpsr, z_shift);
|
||||
code.xor_(tmp1, tmp2);
|
||||
code.or_(tmp1, cpsr);
|
||||
code.test(tmp1, 1);
|
||||
code.jz(label);
|
||||
break;
|
||||
}
|
||||
case IR::Cond::LE: { // z | (n != v)
|
||||
const Xbyak::Reg32 tmp1 = ebx;
|
||||
const Xbyak::Reg32 tmp2 = esi;
|
||||
code.mov(tmp1, cpsr);
|
||||
code.mov(tmp2, cpsr);
|
||||
code.shr(tmp1, n_shift);
|
||||
code.shr(tmp2, v_shift);
|
||||
code.shr(cpsr, z_shift);
|
||||
code.xor_(tmp1, tmp2);
|
||||
code.or_(tmp1, cpsr);
|
||||
code.test(tmp1, 1);
|
||||
code.jnz(label);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
|
||||
break;
|
||||
}
|
||||
|
||||
return label;
|
||||
}
|
||||
|
||||
void EmitX64::EmitCondPrelude(const IR::Block& block) {
|
||||
if (block.GetCondition() == IR::Cond::AL) {
|
||||
ASSERT(!block.HasConditionFailedLocation());
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(block.HasConditionFailedLocation());
|
||||
|
||||
Xbyak::Label pass = EmitCond(block.GetCondition());
|
||||
EmitAddCycles(block.ConditionFailedCycleCount());
|
||||
EmitTerminal(IR::Term::LinkBlock{block.ConditionFailedLocation()}, block.Location());
|
||||
code.L(pass);
|
||||
}
|
||||
|
||||
void EmitX64::EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location) {
|
||||
Common::VisitVariant<void>(terminal, [this, &initial_location](auto x) {
|
||||
using T = std::decay_t<decltype(x)>;
|
||||
if constexpr (!std::is_same_v<T, IR::Term::Invalid>) {
|
||||
this->EmitTerminalImpl(x, initial_location);
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid terminal");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void EmitX64::Patch(const IR::LocationDescriptor& desc, CodePtr bb) {
|
||||
const CodePtr save_code_ptr = code.getCurr();
|
||||
const PatchInformation& patch_info = patch_information[desc];
|
||||
|
||||
for (CodePtr location : patch_info.jg) {
|
||||
code.SetCodePtr(location);
|
||||
EmitPatchJg(desc, bb);
|
||||
}
|
||||
|
||||
for (CodePtr location : patch_info.jmp) {
|
||||
code.SetCodePtr(location);
|
||||
EmitPatchJmp(desc, bb);
|
||||
}
|
||||
|
||||
for (CodePtr location : patch_info.mov_rcx) {
|
||||
code.SetCodePtr(location);
|
||||
EmitPatchMovRcx(bb);
|
||||
}
|
||||
|
||||
code.SetCodePtr(save_code_ptr);
|
||||
}
|
||||
|
||||
void EmitX64::Unpatch(const IR::LocationDescriptor& desc) {
|
||||
Patch(desc, nullptr);
|
||||
}
|
||||
|
||||
void EmitX64::ClearCache() {
|
||||
block_descriptors.clear();
|
||||
patch_information.clear();
|
||||
}
|
||||
|
||||
void EmitX64::InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations) {
|
||||
for (const auto &descriptor : locations) {
|
||||
auto it = block_descriptors.find(descriptor);
|
||||
if (it == block_descriptors.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (patch_information.count(descriptor)) {
|
||||
Unpatch(descriptor);
|
||||
}
|
||||
block_descriptors.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
122
src/backend/x64/emit_x64.h
Normal file
122
src/backend/x64/emit_x64.h
Normal file
@@ -0,0 +1,122 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
|
||||
#include <xbyak_util.h>
|
||||
|
||||
#include "backend/x64/reg_alloc.h"
|
||||
#include "common/address_range.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/fp/rounding_mode.h"
|
||||
#include "frontend/ir/location_descriptor.h"
|
||||
#include "frontend/ir/terminal.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
class Block;
|
||||
class Inst;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
class BlockOfCode;
|
||||
|
||||
using A64FullVectorWidth = std::integral_constant<size_t, 128>;
|
||||
|
||||
// Array alias that always sizes itself according to the given type T
|
||||
// relative to the size of a vector register. e.g. T = u32 would result
|
||||
// in a std::array<u32, 4>.
|
||||
template <typename T>
|
||||
using VectorArray = std::array<T, A64FullVectorWidth::value / Common::BitSize<T>()>;
|
||||
|
||||
struct EmitContext {
|
||||
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||
|
||||
void EraseInstruction(IR::Inst* inst);
|
||||
|
||||
virtual FP::RoundingMode FPSCR_RMode() const = 0;
|
||||
virtual u32 FPCR() const = 0;
|
||||
virtual bool FPSCR_FTZ() const = 0;
|
||||
virtual bool FPSCR_DN() const = 0;
|
||||
virtual bool AccurateNaN() const { return true; }
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Block& block;
|
||||
};
|
||||
|
||||
class EmitX64 {
|
||||
public:
|
||||
struct BlockDescriptor {
|
||||
CodePtr entrypoint; // Entrypoint of emitted code
|
||||
size_t size; // Length in bytes of emitted code
|
||||
};
|
||||
|
||||
EmitX64(BlockOfCode& code);
|
||||
virtual ~EmitX64();
|
||||
|
||||
/// Looks up an emitted host block in the cache.
|
||||
boost::optional<BlockDescriptor> GetBasicBlock(IR::LocationDescriptor descriptor) const;
|
||||
|
||||
/// Empties the entire cache.
|
||||
virtual void ClearCache();
|
||||
|
||||
/// Invalidates a selection of basic blocks.
|
||||
void InvalidateBasicBlocks(const std::unordered_set<IR::LocationDescriptor>& locations);
|
||||
|
||||
protected:
|
||||
// Microinstruction emitters
|
||||
#define OPCODE(name, type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst);
|
||||
#define A32OPC(...)
|
||||
#define A64OPC(...)
|
||||
#include "frontend/ir/opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
|
||||
// Helpers
|
||||
void EmitAddCycles(size_t cycles);
|
||||
Xbyak::Label EmitCond(IR::Cond cond);
|
||||
void EmitCondPrelude(const IR::Block& block);
|
||||
void PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, IR::LocationDescriptor target);
|
||||
|
||||
// Terminal instruction emitters
|
||||
void EmitTerminal(IR::Terminal terminal, IR::LocationDescriptor initial_location);
|
||||
virtual void EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::ReturnToDispatch terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::PopRSBHint terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::If terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
virtual void EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location) = 0;
|
||||
|
||||
// Patching
|
||||
struct PatchInformation {
|
||||
std::vector<CodePtr> jg;
|
||||
std::vector<CodePtr> jmp;
|
||||
std::vector<CodePtr> mov_rcx;
|
||||
};
|
||||
void Patch(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr);
|
||||
void Unpatch(const IR::LocationDescriptor& target_desc);
|
||||
virtual void EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
|
||||
virtual void EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr target_code_ptr = nullptr) = 0;
|
||||
virtual void EmitPatchMovRcx(CodePtr target_code_ptr = nullptr) = 0;
|
||||
|
||||
// State
|
||||
BlockOfCode& code;
|
||||
std::unordered_map<IR::LocationDescriptor, BlockDescriptor> block_descriptors;
|
||||
std::unordered_map<IR::LocationDescriptor, PatchInformation> patch_information;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
77
src/backend/x64/emit_x64_aes.cpp
Normal file
77
src/backend/x64/emit_x64_aes.cpp
Normal file
@@ -0,0 +1,77 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend/x64/abi.h"
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/crypto/aes.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
using namespace Xbyak::util;
|
||||
namespace AES = Common::Crypto::AES;
|
||||
|
||||
using AESFn = void(AES::State&, const AES::State&);
|
||||
|
||||
static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, BlockOfCode& code,
|
||||
IR::Inst* inst, AESFn fn) {
|
||||
constexpr u32 stack_space = static_cast<u32>(sizeof(AES::State)) * 2;
|
||||
const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(args[0]);
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
ctx.reg_alloc.EndOfAllocScope();
|
||||
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
code.sub(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]);
|
||||
|
||||
code.movaps(xword[code.ABI_PARAM2], input);
|
||||
|
||||
code.CallFunction(fn);
|
||||
|
||||
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]);
|
||||
|
||||
// Free memory
|
||||
code.add(rsp, stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitAESDecryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
EmitAESFunction(args, ctx, code, inst, AES::DecryptSingleRound);
|
||||
}
|
||||
|
||||
void EmitX64::EmitAESEncryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
EmitAESFunction(args, ctx, code, inst, AES::EncryptSingleRound);
|
||||
}
|
||||
|
||||
void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tAESNI)) {
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.aesimc(data, data);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
} else {
|
||||
EmitAESFunction(args, ctx, code, inst, AES::InverseMixColumns);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
EmitAESFunction(args, ctx, code, inst, AES::MixColumns);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
77
src/backend/x64/emit_x64_crc32.cpp
Normal file
77
src/backend/x64/emit_x64_crc32.cpp
Normal file
@@ -0,0 +1,77 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public icense version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <climits>
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/crypto/crc32.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
using namespace Xbyak::util;
|
||||
namespace CRC32 = Common::Crypto::CRC32;
|
||||
|
||||
static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE42)) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
|
||||
code.crc32(crc, value);
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC32Castagnoli(code, ctx, inst, 8);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32Castagnoli16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC32Castagnoli(code, ctx, inst, 16);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32Castagnoli32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC32Castagnoli(code, ctx, inst, 32);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32Castagnoli64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC32Castagnoli(code, ctx, inst, 64);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32ISO8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC32ISO(code, ctx, inst, 8);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32ISO16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC32ISO(code, ctx, inst, 16);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32ISO32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC32ISO(code, ctx, inst, 32);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32ISO64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitCRC32ISO(code, ctx, inst, 64);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
1416
src/backend/x64/emit_x64_data_processing.cpp
Normal file
1416
src/backend/x64/emit_x64_data_processing.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1265
src/backend/x64/emit_x64_floating_point.cpp
Normal file
1265
src/backend/x64/emit_x64_floating_point.cpp
Normal file
File diff suppressed because it is too large
Load Diff
704
src/backend/x64/emit_x64_packed.cpp
Normal file
704
src/backend/x64/emit_x64_packed.cpp
Normal file
@@ -0,0 +1,704 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
code.paddb(xmm_a, xmm_b);
|
||||
|
||||
if (ge_inst) {
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pcmpeqb(ones, ones);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.pminub(xmm_ge, xmm_b);
|
||||
code.pcmpeqb(xmm_ge, xmm_b);
|
||||
code.pxor(xmm_ge, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.paddsb(saturated_sum, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code.paddb(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
code.paddw(xmm_a, xmm_b);
|
||||
|
||||
if (ge_inst) {
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pcmpeqb(ones, ones);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.pminuw(xmm_ge, xmm_b);
|
||||
code.pcmpeqw(xmm_ge, xmm_b);
|
||||
code.pxor(xmm_ge, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
} else {
|
||||
Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
// !(b <= a+b) == b > a+b
|
||||
code.movdqa(tmp_a, xmm_a);
|
||||
code.movdqa(tmp_b, xmm_b);
|
||||
code.paddw(tmp_a, code.MConst(xword, 0x80008000));
|
||||
code.paddw(tmp_b, code.MConst(xword, 0x80008000));
|
||||
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.paddsw(saturated_sum, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, saturated_sum);
|
||||
code.pcmpeqw(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code.paddw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.pmaxub(xmm_ge, xmm_b);
|
||||
code.pcmpeqb(xmm_ge, xmm_a);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code.psubb(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.psubsb(saturated_sum, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code.psubb(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
if (!ge_inst) {
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
code.psubw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.DoesCpuSupport(Xbyak::util::Cpu::tSSE41)) {
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
|
||||
code.pcmpeqw(xmm_ge, xmm_a);
|
||||
|
||||
code.psubw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
return;
|
||||
}
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
// (a >= b) == !(b > a)
|
||||
code.pcmpeqb(ones, ones);
|
||||
code.paddw(xmm_a, code.MConst(xword, 0x80008000));
|
||||
code.paddw(xmm_b, code.MConst(xword, 0x80008000));
|
||||
code.movdqa(xmm_ge, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, xmm_a); // *Signed* comparison!
|
||||
code.pxor(xmm_ge, ones);
|
||||
|
||||
code.psubw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
|
||||
Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_diff, xmm_a);
|
||||
code.psubsw(saturated_diff, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, saturated_diff);
|
||||
code.pcmpeqw(saturated_diff, saturated_diff);
|
||||
code.pxor(xmm_ge, saturated_diff);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
code.psubw(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsInXmm() || args[1].IsInXmm()) {
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
// Since,
|
||||
// pavg(a, b) == (a + b + 1) >> 1
|
||||
// Therefore,
|
||||
// ~pavg(~a, ~b) == (a + b) >> 1
|
||||
|
||||
code.pcmpeqb(ones, ones);
|
||||
code.pxor(xmm_a, ones);
|
||||
code.pxor(xmm_b, ones);
|
||||
code.pavgb(xmm_a, xmm_b);
|
||||
code.pxor(xmm_a, ones);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
} else {
|
||||
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 and_a_b = reg_a;
|
||||
Xbyak::Reg32 result = reg_a;
|
||||
|
||||
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
||||
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
|
||||
|
||||
code.mov(xor_a_b, reg_a);
|
||||
code.and_(and_a_b, reg_b);
|
||||
code.xor_(xor_a_b, reg_b);
|
||||
code.shr(xor_a_b, 1);
|
||||
code.and_(xor_a_b, 0x7F7F7F7F);
|
||||
code.add(result, xor_a_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (args[0].IsInXmm() || args[1].IsInXmm()) {
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.movdqa(tmp, xmm_a);
|
||||
code.pand(xmm_a, xmm_b);
|
||||
code.pxor(tmp, xmm_b);
|
||||
code.psrlw(tmp, 1);
|
||||
code.paddw(xmm_a, tmp);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
} else {
|
||||
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 and_a_b = reg_a;
|
||||
Xbyak::Reg32 result = reg_a;
|
||||
|
||||
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
||||
// We mask by 0x7FFF to remove the LSB so that it doesn't leak into the field below.
|
||||
|
||||
code.mov(xor_a_b, reg_a);
|
||||
code.and_(and_a_b, reg_b);
|
||||
code.xor_(xor_a_b, reg_b);
|
||||
code.shr(xor_a_b, 1);
|
||||
code.and_(xor_a_b, 0x7FFF7FFF);
|
||||
code.add(result, xor_a_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 and_a_b = reg_a;
|
||||
Xbyak::Reg32 result = reg_a;
|
||||
Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
|
||||
// We mask by 0x7F to remove the LSB so that it doesn't leak into the field below.
|
||||
// carry propagates the sign bit from (x^y)>>1 upwards by one.
|
||||
|
||||
code.mov(xor_a_b, reg_a);
|
||||
code.and_(and_a_b, reg_b);
|
||||
code.xor_(xor_a_b, reg_b);
|
||||
code.mov(carry, xor_a_b);
|
||||
code.and_(carry, 0x80808080);
|
||||
code.shr(xor_a_b, 1);
|
||||
code.and_(xor_a_b, 0x7F7F7F7F);
|
||||
code.add(result, xor_a_b);
|
||||
code.xor_(result, carry);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>>1).
|
||||
// The arithmetic shift right makes this signed.
|
||||
|
||||
code.movdqa(tmp, xmm_a);
|
||||
code.pand(xmm_a, xmm_b);
|
||||
code.pxor(tmp, xmm_b);
|
||||
code.psraw(tmp, 1);
|
||||
code.paddw(xmm_a, tmp);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||
|
||||
code.xor_(minuend, subtrahend);
|
||||
code.and_(subtrahend, minuend);
|
||||
code.shr(minuend, 1);
|
||||
|
||||
// At this point,
|
||||
// minuend := (a^b) >> 1
|
||||
// subtrahend := (a^b) & b
|
||||
|
||||
// We must now perform a partitioned subtraction.
|
||||
// We can do this because minuend contains 7 bit fields.
|
||||
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
||||
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
||||
code.or_(minuend, 0x80808080);
|
||||
code.sub(minuend, subtrahend);
|
||||
code.xor_(minuend, 0x80808080);
|
||||
|
||||
// minuend now contains the desired result.
|
||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
|
||||
Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||
|
||||
code.xor_(minuend, subtrahend);
|
||||
code.and_(subtrahend, minuend);
|
||||
code.mov(carry, minuend);
|
||||
code.and_(carry, 0x80808080);
|
||||
code.shr(minuend, 1);
|
||||
|
||||
// At this point,
|
||||
// minuend := (a^b) >> 1
|
||||
// subtrahend := (a^b) & b
|
||||
// carry := (a^b) & 0x80808080
|
||||
|
||||
// We must now perform a partitioned subtraction.
|
||||
// We can do this because minuend contains 7 bit fields.
|
||||
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
|
||||
// We invert this bit at the end as this tells us if that bit was borrowed from.
|
||||
// We then sign extend the result into this bit.
|
||||
code.or_(minuend, 0x80808080);
|
||||
code.sub(minuend, subtrahend);
|
||||
code.xor_(minuend, 0x80808080);
|
||||
code.xor_(minuend, carry);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
|
||||
|
||||
code.pxor(minuend, subtrahend);
|
||||
code.pand(subtrahend, minuend);
|
||||
code.psrlw(minuend, 1);
|
||||
|
||||
// At this point,
|
||||
// minuend := (a^b) >> 1
|
||||
// subtrahend := (a^b) & b
|
||||
|
||||
code.psubw(minuend, subtrahend);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
|
||||
// Note that x^y always contains the LSB of the result.
|
||||
// Since we want to calculate (x-y)/2, we can instead calculate ((x^y)>>>1) - ((x^y)&y).
|
||||
|
||||
code.pxor(minuend, subtrahend);
|
||||
code.pand(subtrahend, minuend);
|
||||
code.psraw(minuend, 1);
|
||||
|
||||
// At this point,
|
||||
// minuend := (a^b) >>> 1
|
||||
// subtrahend := (a^b) & b
|
||||
|
||||
code.psubw(minuend, subtrahend);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, minuend);
|
||||
}
|
||||
|
||||
void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
|
||||
|
||||
Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 reg_sum, reg_diff;
|
||||
|
||||
if (is_signed) {
|
||||
code.movsx(reg_a_lo, reg_a_hi.cvt16());
|
||||
code.movsx(reg_b_lo, reg_b_hi.cvt16());
|
||||
code.sar(reg_a_hi, 16);
|
||||
code.sar(reg_b_hi, 16);
|
||||
} else {
|
||||
code.movzx(reg_a_lo, reg_a_hi.cvt16());
|
||||
code.movzx(reg_b_lo, reg_b_hi.cvt16());
|
||||
code.shr(reg_a_hi, 16);
|
||||
code.shr(reg_b_hi, 16);
|
||||
}
|
||||
|
||||
if (hi_is_sum) {
|
||||
code.sub(reg_a_lo, reg_b_hi);
|
||||
code.add(reg_a_hi, reg_b_lo);
|
||||
reg_diff = reg_a_lo;
|
||||
reg_sum = reg_a_hi;
|
||||
} else {
|
||||
code.add(reg_a_lo, reg_b_hi);
|
||||
code.sub(reg_a_hi, reg_b_lo);
|
||||
reg_diff = reg_a_hi;
|
||||
reg_sum = reg_a_lo;
|
||||
}
|
||||
|
||||
if (ge_inst) {
|
||||
// The reg_b registers are no longer required.
|
||||
Xbyak::Reg32 ge_sum = reg_b_hi;
|
||||
Xbyak::Reg32 ge_diff = reg_b_lo;
|
||||
|
||||
code.mov(ge_sum, reg_sum);
|
||||
code.mov(ge_diff, reg_diff);
|
||||
|
||||
if (!is_signed) {
|
||||
code.shl(ge_sum, 15);
|
||||
code.sar(ge_sum, 31);
|
||||
} else {
|
||||
code.not_(ge_sum);
|
||||
code.sar(ge_sum, 31);
|
||||
}
|
||||
code.not_(ge_diff);
|
||||
code.sar(ge_diff, 31);
|
||||
code.and_(ge_sum, hi_is_sum ? 0xFFFF0000 : 0x0000FFFF);
|
||||
code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
|
||||
code.or_(ge_sum, ge_diff);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
}
|
||||
|
||||
if (is_halving) {
|
||||
code.shl(reg_a_lo, 15);
|
||||
code.shr(reg_a_hi, 1);
|
||||
} else {
|
||||
code.shl(reg_a_lo, 16);
|
||||
}
|
||||
|
||||
// reg_a_lo now contains the low word and reg_a_hi now contains the high word.
|
||||
// Merge them.
|
||||
code.shld(reg_a_hi, reg_a_lo, 16);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, reg_a_hi);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedSubAdd(code, ctx, inst, true, false, false);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAddSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedSubAdd(code, ctx, inst, true, true, false);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedSubAdd(code, ctx, inst, false, false, false);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSubAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedSubAdd(code, ctx, inst, false, true, false);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedSubAdd(code, ctx, inst, true, false, true);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingAddSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedSubAdd(code, ctx, inst, true, true, true);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedSubAdd(code, ctx, inst, false, false, true);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedSubAdd(code, ctx, inst, false, true, true);
|
||||
}
|
||||
|
||||
static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
(code.*fn)(xmm_a, xmm_b);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, xmm_a);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddusb);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddsb);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedSubU8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubusb);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubsb);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedAddU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddusw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::paddsw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedSubU16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubusw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psubsw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedAbsDiffSumS8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitPackedOperation(code, ctx, inst, &Xbyak::CodeGenerator::psadbw);
|
||||
}
|
||||
|
||||
void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm();
|
||||
|
||||
if (num_args_in_xmm >= 2) {
|
||||
Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
|
||||
Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
|
||||
|
||||
code.pand(from, ge);
|
||||
code.pandn(ge, to);
|
||||
code.por(from, ge);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, from);
|
||||
} else if (code.DoesCpuSupport(Xbyak::util::Cpu::tBMI1)) {
|
||||
Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
|
||||
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
||||
|
||||
code.and_(from, ge);
|
||||
code.andn(to, ge, to);
|
||||
code.or_(from, to);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, from);
|
||||
} else {
|
||||
Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
|
||||
|
||||
code.and_(from, ge);
|
||||
code.not_(ge);
|
||||
code.and_(ge, to);
|
||||
code.or_(from, ge);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, from);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
264
src/backend/x64/emit_x64_saturation.cpp
Normal file
264
src/backend/x64/emit_x64_saturation.cpp
Normal file
@@ -0,0 +1,264 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_util.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/mp/integer.h"
|
||||
#include "frontend/ir/basic_block.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
using namespace Xbyak::util;
|
||||
namespace mp = Dynarmic::Common::mp;
|
||||
|
||||
namespace {
|
||||
|
||||
enum class Op {
|
||||
Add,
|
||||
Sub,
|
||||
};
|
||||
|
||||
template<Op op, size_t size>
|
||||
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]);
|
||||
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
result.setBit(size);
|
||||
addend.setBit(size);
|
||||
overflow.setBit(size);
|
||||
|
||||
constexpr u64 int_max = static_cast<u64>(std::numeric_limits<mp::signed_integer_of_size<size>>::max());
|
||||
if constexpr (size < 64) {
|
||||
code.xor_(overflow.cvt32(), overflow.cvt32());
|
||||
code.bt(result.cvt32(), size - 1);
|
||||
code.adc(overflow.cvt32(), int_max);
|
||||
} else {
|
||||
code.mov(overflow, int_max);
|
||||
code.bt(result, 63);
|
||||
code.adc(overflow, 0);
|
||||
}
|
||||
|
||||
// overflow now contains 0x7F... if a was positive, or 0x80... if a was negative
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
code.add(result, addend);
|
||||
} else {
|
||||
code.sub(result, addend);
|
||||
}
|
||||
|
||||
if constexpr (size < 64) {
|
||||
code.cmovo(result.cvt32(), overflow.cvt32());
|
||||
} else {
|
||||
code.cmovo(result, overflow);
|
||||
}
|
||||
|
||||
if (overflow_inst) {
|
||||
code.seto(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
template<Op op, size_t size>
|
||||
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]);
|
||||
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||
|
||||
op_result.setBit(size);
|
||||
addend.setBit(size);
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
code.add(op_result, addend);
|
||||
} else {
|
||||
code.sub(op_result, addend);
|
||||
}
|
||||
|
||||
constexpr u64 boundary = op == Op::Add ? std::numeric_limits<mp::unsigned_integer_of_size<size>>::max()
|
||||
: 0;
|
||||
code.mov(addend, boundary);
|
||||
|
||||
if constexpr (size < 64) {
|
||||
code.cmovae(addend.cvt32(), op_result.cvt32());
|
||||
} else {
|
||||
code.cmovae(addend, op_result);
|
||||
}
|
||||
|
||||
if (overflow_inst) {
|
||||
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||
code.setb(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, addend);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitSignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
size_t N = args[1].GetImmediateU8();
|
||||
ASSERT(N >= 1 && N <= 32);
|
||||
|
||||
if (N == 32) {
|
||||
if (overflow_inst) {
|
||||
auto no_overflow = IR::Value(false);
|
||||
overflow_inst->ReplaceUsesWith(no_overflow);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(inst, args[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
u32 mask = (1u << N) - 1;
|
||||
u32 positive_saturated_value = (1u << (N - 1)) - 1;
|
||||
u32 negative_saturated_value = 1u << (N - 1);
|
||||
u32 sext_negative_satured_value = Common::SignExtend(N, negative_saturated_value);
|
||||
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
|
||||
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
|
||||
|
||||
// Put the appropriate saturated value in result
|
||||
code.cmp(reg_a, positive_saturated_value);
|
||||
code.mov(tmp, positive_saturated_value);
|
||||
code.mov(result, sext_negative_satured_value);
|
||||
code.cmovg(result, tmp);
|
||||
|
||||
// Do the saturation
|
||||
code.cmp(overflow, mask);
|
||||
code.cmovbe(result, reg_a);
|
||||
|
||||
if (overflow_inst) {
|
||||
code.seta(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Add, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 8>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub16(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 16>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub32(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 32>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturatedSub64(EmitContext& ctx, IR::Inst* inst) {
|
||||
EmitUnsignedSaturatedOp<Op::Sub, 64>(code, ctx, inst);
|
||||
}
|
||||
|
||||
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp);
|
||||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
size_t N = args[1].GetImmediateU8();
|
||||
ASSERT(N <= 31);
|
||||
|
||||
u32 saturated_value = (1u << N) - 1;
|
||||
|
||||
Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
|
||||
code.xor_(overflow, overflow);
|
||||
code.cmp(reg_a, saturated_value);
|
||||
code.mov(result, saturated_value);
|
||||
code.cmovle(result, overflow);
|
||||
code.cmovbe(result, reg_a);
|
||||
|
||||
if (overflow_inst) {
|
||||
code.seta(overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
ctx.EraseInstruction(overflow_inst);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
23
src/backend/x64/emit_x64_sm4.cpp
Normal file
23
src/backend/x64/emit_x64_sm4.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2018 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public icense version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/emit_x64.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/crypto/sm4.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/opcodes.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
void EmitX64::EmitSM4AccessSubstitutionBox(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
3001
src/backend/x64/emit_x64_vector.cpp
Normal file
3001
src/backend/x64/emit_x64_vector.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1164
src/backend/x64/emit_x64_vector_floating_point.cpp
Normal file
1164
src/backend/x64/emit_x64_vector_floating_point.cpp
Normal file
File diff suppressed because it is too large
Load Diff
21
src/backend/x64/exception_handler_generic.cpp
Normal file
21
src/backend/x64/exception_handler_generic.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
struct BlockOfCode::ExceptionHandler::Impl final {
|
||||
};
|
||||
|
||||
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
|
||||
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
void BlockOfCode::ExceptionHandler::Register(BlockOfCode&) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
202
src/backend/x64/exception_handler_windows.cpp
Normal file
202
src/backend/x64/exception_handler_windows.cpp
Normal file
@@ -0,0 +1,202 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
using UBYTE = u8;
|
||||
|
||||
enum UNWIND_REGISTER_CODES {
|
||||
UWRC_RAX,
|
||||
UWRC_RCX,
|
||||
UWRC_RDX,
|
||||
UWRC_RBX,
|
||||
UWRC_RSP,
|
||||
UWRC_RBP,
|
||||
UWRC_RSI,
|
||||
UWRC_RDI,
|
||||
UWRC_R8,
|
||||
UWRC_R9,
|
||||
UWRC_R10,
|
||||
UWRC_R11,
|
||||
UWRC_R12,
|
||||
UWRC_R13,
|
||||
UWRC_R14,
|
||||
UWRC_R15,
|
||||
};
|
||||
|
||||
enum UNWIND_OPCODE {
|
||||
UWOP_PUSH_NONVOL = 0,
|
||||
UWOP_ALLOC_LARGE = 1,
|
||||
UWOP_ALLOC_SMALL = 2,
|
||||
UWOP_SET_FPREG = 3,
|
||||
UWOP_SAVE_NONVOL = 4,
|
||||
UWOP_SAVE_NONVOL_FAR = 5,
|
||||
UWOP_SAVE_XMM128 = 8,
|
||||
UWOP_SAVE_XMM128_FAR = 9,
|
||||
UWOP_PUSH_MACHFRAME = 10,
|
||||
};
|
||||
|
||||
union UNWIND_CODE {
|
||||
struct {
|
||||
UBYTE CodeOffset;
|
||||
UBYTE UnwindOp : 4;
|
||||
UBYTE OpInfo : 4;
|
||||
} code;
|
||||
USHORT FrameOffset;
|
||||
};
|
||||
|
||||
// UNWIND_INFO is a tail-padded structure
|
||||
struct UNWIND_INFO {
|
||||
UBYTE Version : 3;
|
||||
UBYTE Flags : 5;
|
||||
UBYTE SizeOfProlog;
|
||||
UBYTE CountOfCodes;
|
||||
UBYTE FrameRegister : 4;
|
||||
UBYTE FrameOffset : 4;
|
||||
// UNWIND_CODE UnwindCode[];
|
||||
// With Flags == 0 there are no additional fields.
|
||||
};
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
struct PrologueInformation {
|
||||
std::vector<UNWIND_CODE> unwind_code;
|
||||
size_t number_of_unwind_code_entries;
|
||||
u8 prolog_size;
|
||||
};
|
||||
|
||||
static PrologueInformation GetPrologueInformation() {
|
||||
PrologueInformation ret;
|
||||
|
||||
const auto next_entry = [&]() -> UNWIND_CODE& {
|
||||
ret.unwind_code.emplace_back();
|
||||
return ret.unwind_code.back();
|
||||
};
|
||||
const auto push_nonvol = [&](u8 offset, UNWIND_REGISTER_CODES reg) {
|
||||
auto& entry = next_entry();
|
||||
entry.code.CodeOffset = offset;
|
||||
entry.code.UnwindOp = UWOP_PUSH_NONVOL;
|
||||
entry.code.OpInfo = reg;
|
||||
};
|
||||
const auto alloc_large = [&](u8 offset, size_t size) {
|
||||
ASSERT(size % 8 == 0);
|
||||
size /= 8;
|
||||
|
||||
auto& entry = next_entry();
|
||||
entry.code.CodeOffset = offset;
|
||||
entry.code.UnwindOp = UWOP_ALLOC_LARGE;
|
||||
if (size <= 0xFFFF) {
|
||||
entry.code.OpInfo = 0;
|
||||
auto& size_entry = next_entry();
|
||||
size_entry.FrameOffset = static_cast<USHORT>(size);
|
||||
} else {
|
||||
entry.code.OpInfo = 1;
|
||||
auto& size_entry_1 = next_entry();
|
||||
size_entry_1.FrameOffset = static_cast<USHORT>(size);
|
||||
auto& size_entry_2 = next_entry();
|
||||
size_entry_2.FrameOffset = static_cast<USHORT>(size >> 16);
|
||||
}
|
||||
};
|
||||
const auto save_xmm128 = [&](u8 offset, u8 reg, size_t frame_offset) {
|
||||
ASSERT(frame_offset % 16 == 0);
|
||||
|
||||
auto& entry = next_entry();
|
||||
entry.code.CodeOffset = offset;
|
||||
entry.code.UnwindOp = UWOP_SAVE_XMM128;
|
||||
entry.code.OpInfo = reg;
|
||||
auto& offset_entry = next_entry();
|
||||
offset_entry.FrameOffset = static_cast<USHORT>(frame_offset / 16);
|
||||
};
|
||||
|
||||
// This is a list of operations that occur in the prologue.
|
||||
// The debugger uses this information to retrieve register values and
|
||||
// to calculate the size of the stack frame.
|
||||
ret.prolog_size = 89;
|
||||
save_xmm128(89, 15, 0xB0); // +050 44 0F 29 BC 24 B0 00 00 00 movaps xmmword ptr [rsp+0B0h],xmm15
|
||||
save_xmm128(80, 14, 0xA0); // +047 44 0F 29 B4 24 A0 00 00 00 movaps xmmword ptr [rsp+0A0h],xmm14
|
||||
save_xmm128(71, 13, 0x90); // +03E 44 0F 29 AC 24 90 00 00 00 movaps xmmword ptr [rsp+90h],xmm13
|
||||
save_xmm128(62, 12, 0x80); // +035 44 0F 29 A4 24 80 00 00 00 movaps xmmword ptr [rsp+80h],xmm12
|
||||
save_xmm128(53, 11, 0x70); // +02F 44 0F 29 5C 24 70 movaps xmmword ptr [rsp+70h],xmm11
|
||||
save_xmm128(47, 10, 0x60); // +029 44 0F 29 54 24 60 movaps xmmword ptr [rsp+60h],xmm10
|
||||
save_xmm128(41, 9, 0x50); // +023 44 0F 29 4C 24 50 movaps xmmword ptr [rsp+50h],xmm9
|
||||
save_xmm128(35, 8, 0x40); // +01D 44 0F 29 44 24 40 movaps xmmword ptr [rsp+40h],xmm8
|
||||
save_xmm128(29, 7, 0x30); // +018 0F 29 7C 24 30 movaps xmmword ptr [rsp+30h],xmm7
|
||||
save_xmm128(24, 6, 0x20); // +013 0F 29 74 24 20 movaps xmmword ptr [rsp+20h],xmm6
|
||||
alloc_large(19, 0xC8); // +00C 48 81 EC C8 00 00 00 sub rsp,0C8h
|
||||
push_nonvol(12, UWRC_R15); // +00A 41 57 push r15
|
||||
push_nonvol(10, UWRC_R14); // +008 41 56 push r14
|
||||
push_nonvol(8, UWRC_R13); // +006 41 55 push r13
|
||||
push_nonvol(6, UWRC_R12); // +004 41 54 push r12
|
||||
push_nonvol(4, UWRC_RBP); // +003 55 push rbp
|
||||
push_nonvol(3, UWRC_RDI); // +002 57 push rdi
|
||||
push_nonvol(2, UWRC_RSI); // +001 56 push rsi
|
||||
push_nonvol(1, UWRC_RBX); // +000 53 push rbx
|
||||
|
||||
ret.number_of_unwind_code_entries = ret.unwind_code.size();
|
||||
|
||||
// The Windows API requires the size of the unwind_code array
|
||||
// to be a multiple of two for alignment reasons.
|
||||
if (ret.unwind_code.size() % 2 == 1) {
|
||||
auto& last_entry = next_entry();
|
||||
last_entry.FrameOffset = 0;
|
||||
}
|
||||
ASSERT(ret.unwind_code.size() % 2 == 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct BlockOfCode::ExceptionHandler::Impl final {
|
||||
Impl(RUNTIME_FUNCTION* rfuncs_, const u8* base_ptr) : rfuncs(rfuncs_) {
|
||||
RtlAddFunctionTable(rfuncs, 1, reinterpret_cast<DWORD64>(base_ptr));
|
||||
}
|
||||
|
||||
~Impl() {
|
||||
RtlDeleteFunctionTable(rfuncs);
|
||||
}
|
||||
|
||||
private:
|
||||
RUNTIME_FUNCTION* rfuncs = nullptr;
|
||||
};
|
||||
|
||||
BlockOfCode::ExceptionHandler::ExceptionHandler() = default;
|
||||
BlockOfCode::ExceptionHandler::~ExceptionHandler() = default;
|
||||
|
||||
void BlockOfCode::ExceptionHandler::Register(BlockOfCode& code) {
|
||||
const auto prolog_info = GetPrologueInformation();
|
||||
|
||||
code.align(16);
|
||||
UNWIND_INFO* unwind_info = static_cast<UNWIND_INFO*>(code.AllocateFromCodeSpace(sizeof(UNWIND_INFO)));
|
||||
unwind_info->Version = 1;
|
||||
unwind_info->Flags = 0; // No special exception handling required.
|
||||
unwind_info->SizeOfProlog = prolog_info.prolog_size;
|
||||
unwind_info->CountOfCodes = static_cast<UBYTE>(prolog_info.number_of_unwind_code_entries);
|
||||
unwind_info->FrameRegister = 0; // No frame register present
|
||||
unwind_info->FrameOffset = 0; // Unused because FrameRegister == 0
|
||||
// UNWIND_INFO::UnwindCode field:
|
||||
const size_t size_of_unwind_code = sizeof(UNWIND_CODE) * prolog_info.unwind_code.size();
|
||||
UNWIND_CODE* unwind_code = static_cast<UNWIND_CODE*>(code.AllocateFromCodeSpace(size_of_unwind_code));
|
||||
memcpy(unwind_code, prolog_info.unwind_code.data(), size_of_unwind_code);
|
||||
|
||||
code.align(16);
|
||||
RUNTIME_FUNCTION* rfuncs = static_cast<RUNTIME_FUNCTION*>(code.AllocateFromCodeSpace(sizeof(RUNTIME_FUNCTION)));
|
||||
rfuncs->BeginAddress = static_cast<DWORD>(reinterpret_cast<u8*>(code.run_code) - code.getCode());
|
||||
rfuncs->EndAddress = static_cast<DWORD>(code.maxSize_);
|
||||
rfuncs->UnwindData = static_cast<DWORD>(reinterpret_cast<u8*>(unwind_info) - code.getCode());
|
||||
|
||||
impl = std::make_unique<Impl>(rfuncs, code.getCode());
|
||||
}
|
||||
|
||||
} // namespace BackendX64
|
||||
} // namespace Dynarmic
|
||||
23
src/backend/x64/hostloc.cpp
Normal file
23
src/backend/x64/hostloc.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend/x64/hostloc.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
Xbyak::Reg64 HostLocToReg64(HostLoc loc) {
|
||||
ASSERT(HostLocIsGPR(loc));
|
||||
return Xbyak::Reg64(static_cast<int>(loc));
|
||||
}
|
||||
|
||||
Xbyak::Xmm HostLocToXmm(HostLoc loc) {
|
||||
ASSERT(HostLocIsXMM(loc));
|
||||
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
125
src/backend/x64/hostloc.h
Normal file
125
src/backend/x64/hostloc.h
Normal file
@@ -0,0 +1,125 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
enum class HostLoc {
|
||||
// Ordering of the registers is intentional. See also: HostLocToX64.
|
||||
RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
|
||||
CF, PF, AF, ZF, SF, OF,
|
||||
FirstSpill,
|
||||
};
|
||||
|
||||
constexpr size_t NonSpillHostLocCount = static_cast<size_t>(HostLoc::FirstSpill);
|
||||
|
||||
inline bool HostLocIsGPR(HostLoc reg) {
|
||||
return reg >= HostLoc::RAX && reg <= HostLoc::R15;
|
||||
}
|
||||
|
||||
inline bool HostLocIsXMM(HostLoc reg) {
|
||||
return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15;
|
||||
}
|
||||
|
||||
inline bool HostLocIsRegister(HostLoc reg) {
|
||||
return HostLocIsGPR(reg) || HostLocIsXMM(reg);
|
||||
}
|
||||
|
||||
inline bool HostLocIsFlag(HostLoc reg) {
|
||||
return reg >= HostLoc::CF && reg <= HostLoc::OF;
|
||||
}
|
||||
|
||||
inline HostLoc HostLocRegIdx(int idx) {
|
||||
ASSERT(idx >= 0 && idx <= 15);
|
||||
return static_cast<HostLoc>(idx);
|
||||
}
|
||||
|
||||
inline HostLoc HostLocXmmIdx(int idx) {
|
||||
ASSERT(idx >= 0 && idx <= 15);
|
||||
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::XMM0) + idx);
|
||||
}
|
||||
|
||||
inline HostLoc HostLocSpill(size_t i) {
|
||||
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
|
||||
}
|
||||
|
||||
inline bool HostLocIsSpill(HostLoc reg) {
|
||||
return reg >= HostLoc::FirstSpill;
|
||||
}
|
||||
|
||||
inline size_t HostLocBitWidth(HostLoc loc) {
|
||||
if (HostLocIsGPR(loc))
|
||||
return 64;
|
||||
if (HostLocIsXMM(loc))
|
||||
return 128;
|
||||
if (HostLocIsSpill(loc))
|
||||
return 128;
|
||||
if (HostLocIsFlag(loc))
|
||||
return 1;
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
using HostLocList = std::initializer_list<HostLoc>;
|
||||
|
||||
// RSP is preserved for function calls
|
||||
// R15 contains the JitState pointer
|
||||
const HostLocList any_gpr = {
|
||||
HostLoc::RAX,
|
||||
HostLoc::RBX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
HostLoc::RSI,
|
||||
HostLoc::RDI,
|
||||
HostLoc::RBP,
|
||||
HostLoc::R8,
|
||||
HostLoc::R9,
|
||||
HostLoc::R10,
|
||||
HostLoc::R11,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
};
|
||||
|
||||
// XMM0 is reserved for use by instructions that implicitly use it as an argument
|
||||
const HostLocList any_xmm = {
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
HostLoc::XMM6,
|
||||
HostLoc::XMM7,
|
||||
HostLoc::XMM8,
|
||||
HostLoc::XMM9,
|
||||
HostLoc::XMM10,
|
||||
HostLoc::XMM11,
|
||||
HostLoc::XMM12,
|
||||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
|
||||
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
|
||||
Xbyak::Xmm HostLocToXmm(HostLoc loc);
|
||||
|
||||
template <typename JitStateType>
|
||||
Xbyak::Address SpillToOpArg(HostLoc loc) {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
|
||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||
ASSERT_MSG(i < JitStateType::SpillCount, "Spill index greater than number of available spill locations");
|
||||
|
||||
return JitStateType::GetSpillLocationFromIndex(i);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
48
src/backend/x64/jitstate_info.h
Normal file
48
src/backend/x64/jitstate_info.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
struct JitStateInfo {
|
||||
template <typename JitStateType>
|
||||
JitStateInfo(const JitStateType&)
|
||||
: offsetof_cycles_remaining(offsetof(JitStateType, cycles_remaining))
|
||||
, offsetof_cycles_to_run(offsetof(JitStateType, cycles_to_run))
|
||||
, offsetof_save_host_MXCSR(offsetof(JitStateType, save_host_MXCSR))
|
||||
, offsetof_guest_MXCSR(offsetof(JitStateType, guest_MXCSR))
|
||||
, offsetof_rsb_ptr(offsetof(JitStateType, rsb_ptr))
|
||||
, rsb_ptr_mask(JitStateType::RSBPtrMask)
|
||||
, offsetof_rsb_location_descriptors(offsetof(JitStateType, rsb_location_descriptors))
|
||||
, offsetof_rsb_codeptrs(offsetof(JitStateType, rsb_codeptrs))
|
||||
, offsetof_CPSR_nzcv(offsetof(JitStateType, CPSR_nzcv))
|
||||
, offsetof_FPSCR_IDC(offsetof(JitStateType, FPSCR_IDC))
|
||||
, offsetof_FPSCR_UFC(offsetof(JitStateType, FPSCR_UFC))
|
||||
, offsetof_fpsr_exc(offsetof(JitStateType, fpsr_exc))
|
||||
, offsetof_fpsr_qc(offsetof(JitStateType, fpsr_qc))
|
||||
{}
|
||||
|
||||
const size_t offsetof_cycles_remaining;
|
||||
const size_t offsetof_cycles_to_run;
|
||||
const size_t offsetof_save_host_MXCSR;
|
||||
const size_t offsetof_guest_MXCSR;
|
||||
const size_t offsetof_rsb_ptr;
|
||||
const size_t rsb_ptr_mask;
|
||||
const size_t offsetof_rsb_location_descriptors;
|
||||
const size_t offsetof_rsb_codeptrs;
|
||||
const size_t offsetof_CPSR_nzcv;
|
||||
const size_t offsetof_FPSCR_IDC;
|
||||
const size_t offsetof_FPSCR_UFC;
|
||||
const size_t offsetof_fpsr_exc;
|
||||
const size_t offsetof_fpsr_qc;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
78
src/backend/x64/oparg.h
Normal file
78
src/backend/x64/oparg.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
struct OpArg {
|
||||
OpArg() : type(Type::Operand), inner_operand() {}
|
||||
/* implicit */ OpArg(const Xbyak::Address& address) : type(Type::Address), inner_address(address) {}
|
||||
/* implicit */ OpArg(const Xbyak::Reg& reg) : type(Type::Reg), inner_reg(reg) {}
|
||||
|
||||
Xbyak::Operand& operator*() {
|
||||
switch (type) {
|
||||
case Type::Address:
|
||||
return inner_address;
|
||||
case Type::Operand:
|
||||
return inner_operand;
|
||||
case Type::Reg:
|
||||
return inner_reg;
|
||||
}
|
||||
ASSERT_MSG(false, "Unreachable");
|
||||
}
|
||||
|
||||
void setBit(int bits) {
|
||||
switch (type) {
|
||||
case Type::Address:
|
||||
inner_address.setBit(bits);
|
||||
return;
|
||||
case Type::Operand:
|
||||
inner_operand.setBit(bits);
|
||||
return;
|
||||
case Type::Reg:
|
||||
switch (bits) {
|
||||
case 8:
|
||||
inner_reg = inner_reg.cvt8();
|
||||
return;
|
||||
case 16:
|
||||
inner_reg = inner_reg.cvt16();
|
||||
return;
|
||||
case 32:
|
||||
inner_reg = inner_reg.cvt32();
|
||||
return;
|
||||
case 64:
|
||||
inner_reg = inner_reg.cvt64();
|
||||
return;
|
||||
default:
|
||||
ASSERT_MSG(false, "Invalid bits");
|
||||
return;
|
||||
}
|
||||
}
|
||||
ASSERT_MSG(false, "Unreachable");
|
||||
}
|
||||
|
||||
private:
|
||||
enum class Type {
|
||||
Operand,
|
||||
Address,
|
||||
Reg,
|
||||
};
|
||||
|
||||
Type type;
|
||||
|
||||
union {
|
||||
Xbyak::Operand inner_operand;
|
||||
Xbyak::Address inner_address;
|
||||
Xbyak::Reg inner_reg;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
684
src/backend/x64/reg_alloc.cpp
Normal file
684
src/backend/x64/reg_alloc.cpp
Normal file
@@ -0,0 +1,684 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/ostream.h>
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend/x64/abi.h"
|
||||
#include "backend/x64/reg_alloc.h"
|
||||
#include "common/assert.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
static u64 ImmediateToU64(const IR::Value& imm) {
|
||||
switch (imm.GetType()) {
|
||||
case IR::Type::U1:
|
||||
return u64(imm.GetU1());
|
||||
case IR::Type::U8:
|
||||
return u64(imm.GetU8());
|
||||
case IR::Type::U16:
|
||||
return u64(imm.GetU16());
|
||||
case IR::Type::U32:
|
||||
return u64(imm.GetU32());
|
||||
case IR::Type::U64:
|
||||
return u64(imm.GetU64());
|
||||
default:
|
||||
ASSERT_MSG(false, "This should never happen.");
|
||||
}
|
||||
}
|
||||
|
||||
static bool CanExchange(HostLoc a, HostLoc b) {
|
||||
return HostLocIsGPR(a) && HostLocIsGPR(b);
|
||||
}
|
||||
|
||||
// Minimum number of bits required to represent a type
|
||||
static size_t GetBitWidth(IR::Type type) {
|
||||
switch (type) {
|
||||
case IR::Type::A32Reg:
|
||||
case IR::Type::A32ExtReg:
|
||||
case IR::Type::A64Reg:
|
||||
case IR::Type::A64Vec:
|
||||
case IR::Type::CoprocInfo:
|
||||
case IR::Type::Cond:
|
||||
case IR::Type::Void:
|
||||
case IR::Type::Table:
|
||||
ASSERT_MSG(false, "Type {} cannot be represented at runtime", type);
|
||||
return 0;
|
||||
case IR::Type::Opaque:
|
||||
ASSERT_MSG(false, "Not a concrete type");
|
||||
return 0;
|
||||
case IR::Type::U1:
|
||||
return 8;
|
||||
case IR::Type::U8:
|
||||
return 8;
|
||||
case IR::Type::U16:
|
||||
return 16;
|
||||
case IR::Type::U32:
|
||||
return 32;
|
||||
case IR::Type::U64:
|
||||
return 64;
|
||||
case IR::Type::U128:
|
||||
return 128;
|
||||
case IR::Type::NZCVFlags:
|
||||
return 32; // TODO: Update to 16 when flags optimization is done
|
||||
}
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool IsValuelessType(IR::Type type) {
|
||||
switch (type) {
|
||||
case IR::Type::Table:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool HostLocInfo::IsLocked() const {
|
||||
return is_being_used_count > 0;
|
||||
}
|
||||
|
||||
bool HostLocInfo::IsEmpty() const {
|
||||
return is_being_used_count == 0 && values.empty();
|
||||
}
|
||||
|
||||
bool HostLocInfo::IsLastUse() const {
|
||||
return is_being_used_count == 0 && current_references == 1 && accumulated_uses + 1 == total_uses;
|
||||
}
|
||||
|
||||
void HostLocInfo::ReadLock() {
|
||||
ASSERT(!is_scratch);
|
||||
is_being_used_count++;
|
||||
}
|
||||
|
||||
void HostLocInfo::WriteLock() {
|
||||
ASSERT(is_being_used_count == 0);
|
||||
is_being_used_count++;
|
||||
is_scratch = true;
|
||||
}
|
||||
|
||||
void HostLocInfo::AddArgReference() {
|
||||
current_references++;
|
||||
ASSERT(accumulated_uses + current_references <= total_uses);
|
||||
}
|
||||
|
||||
void HostLocInfo::ReleaseOne() {
|
||||
is_being_used_count--;
|
||||
is_scratch = false;
|
||||
|
||||
if (current_references == 0)
|
||||
return;
|
||||
|
||||
accumulated_uses++;
|
||||
current_references--;
|
||||
|
||||
if (current_references == 0)
|
||||
ReleaseAll();
|
||||
}
|
||||
|
||||
void HostLocInfo::ReleaseAll() {
|
||||
accumulated_uses += current_references;
|
||||
current_references = 0;
|
||||
|
||||
ASSERT(total_uses == std::accumulate(values.begin(), values.end(), size_t(0), [](size_t sum, IR::Inst* inst) { return sum + inst->UseCount(); }));
|
||||
|
||||
if (total_uses == accumulated_uses) {
|
||||
values.clear();
|
||||
accumulated_uses = 0;
|
||||
total_uses = 0;
|
||||
max_bit_width = 0;
|
||||
}
|
||||
|
||||
is_being_used_count = 0;
|
||||
is_scratch = false;
|
||||
}
|
||||
|
||||
bool HostLocInfo::ContainsValue(const IR::Inst* inst) const {
|
||||
return std::find(values.begin(), values.end(), inst) != values.end();
|
||||
}
|
||||
|
||||
size_t HostLocInfo::GetMaxBitWidth() const {
|
||||
return max_bit_width;
|
||||
}
|
||||
|
||||
void HostLocInfo::AddValue(IR::Inst* inst) {
|
||||
values.push_back(inst);
|
||||
total_uses += inst->UseCount();
|
||||
max_bit_width = std::max(max_bit_width, GetBitWidth(inst->GetType()));
|
||||
}
|
||||
|
||||
IR::Type Argument::GetType() const {
|
||||
return value.GetType();
|
||||
}
|
||||
|
||||
bool Argument::IsImmediate() const {
|
||||
return value.IsImmediate();
|
||||
}
|
||||
|
||||
bool Argument::IsVoid() const {
|
||||
return GetType() == IR::Type::Void;
|
||||
}
|
||||
|
||||
bool Argument::FitsInImmediateU32() const {
|
||||
if (!IsImmediate())
|
||||
return false;
|
||||
u64 imm = ImmediateToU64(value);
|
||||
return imm < 0x100000000;
|
||||
}
|
||||
|
||||
bool Argument::FitsInImmediateS32() const {
|
||||
if (!IsImmediate())
|
||||
return false;
|
||||
s64 imm = static_cast<s64>(ImmediateToU64(value));
|
||||
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
|
||||
}
|
||||
|
||||
bool Argument::GetImmediateU1() const {
|
||||
return value.GetU1();
|
||||
}
|
||||
|
||||
u8 Argument::GetImmediateU8() const {
|
||||
u64 imm = ImmediateToU64(value);
|
||||
ASSERT(imm < 0x100);
|
||||
return u8(imm);
|
||||
}
|
||||
|
||||
u16 Argument::GetImmediateU16() const {
|
||||
u64 imm = ImmediateToU64(value);
|
||||
ASSERT(imm < 0x10000);
|
||||
return u16(imm);
|
||||
}
|
||||
|
||||
u32 Argument::GetImmediateU32() const {
|
||||
u64 imm = ImmediateToU64(value);
|
||||
ASSERT(imm < 0x100000000);
|
||||
return u32(imm);
|
||||
}
|
||||
|
||||
u64 Argument::GetImmediateS32() const {
|
||||
ASSERT(FitsInImmediateS32());
|
||||
u64 imm = ImmediateToU64(value);
|
||||
return imm;
|
||||
}
|
||||
|
||||
u64 Argument::GetImmediateU64() const {
|
||||
return ImmediateToU64(value);
|
||||
}
|
||||
|
||||
IR::Cond Argument::GetImmediateCond() const {
|
||||
ASSERT(IsImmediate() && GetType() == IR::Type::Cond);
|
||||
return value.GetCond();
|
||||
}
|
||||
|
||||
bool Argument::IsInGpr() const {
|
||||
if (IsImmediate())
|
||||
return false;
|
||||
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
|
||||
}
|
||||
|
||||
bool Argument::IsInXmm() const {
|
||||
if (IsImmediate())
|
||||
return false;
|
||||
return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst()));
|
||||
}
|
||||
|
||||
bool Argument::IsInMemory() const {
|
||||
if (IsImmediate())
|
||||
return false;
|
||||
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
|
||||
}
|
||||
|
||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
|
||||
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
|
||||
for (size_t i = 0; i < inst->NumArgs(); i++) {
|
||||
const IR::Value& arg = inst->GetArg(i);
|
||||
ret[i].value = arg;
|
||||
if (!arg.IsImmediate() && !IsValuelessType(arg.GetType())) {
|
||||
ASSERT_MSG(ValueLocation(arg.GetInst()), "argument must already been defined");
|
||||
LocInfo(*ValueLocation(arg.GetInst())).AddArgReference();
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
Xbyak::Reg64 RegAlloc::UseGpr(Argument& arg) {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
return HostLocToReg64(UseImpl(arg.value, any_gpr));
|
||||
}
|
||||
|
||||
Xbyak::Xmm RegAlloc::UseXmm(Argument& arg) {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
return HostLocToXmm(UseImpl(arg.value, any_xmm));
|
||||
}
|
||||
|
||||
OpArg RegAlloc::UseOpArg(Argument& arg) {
|
||||
return UseGpr(arg);
|
||||
}
|
||||
|
||||
void RegAlloc::Use(Argument& arg, HostLoc host_loc) {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
UseImpl(arg.value, {host_loc});
|
||||
}
|
||||
|
||||
Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
return HostLocToReg64(UseScratchImpl(arg.value, any_gpr));
|
||||
}
|
||||
|
||||
Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
return HostLocToXmm(UseScratchImpl(arg.value, any_xmm));
|
||||
}
|
||||
|
||||
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
UseScratchImpl(arg.value, {host_loc});
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) {
|
||||
ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
|
||||
HostLoc hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
|
||||
DefineValueImpl(inst, hostloc);
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
DefineValueImpl(inst, arg.value);
|
||||
}
|
||||
|
||||
void RegAlloc::Release(const Xbyak::Reg& reg) {
|
||||
ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
|
||||
const HostLoc hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
|
||||
LocInfo(hostloc).ReleaseOne();
|
||||
}
|
||||
|
||||
Xbyak::Reg64 RegAlloc::ScratchGpr(HostLocList desired_locations) {
|
||||
return HostLocToReg64(ScratchImpl(desired_locations));
|
||||
}
|
||||
|
||||
Xbyak::Xmm RegAlloc::ScratchXmm(HostLocList desired_locations) {
|
||||
return HostLocToXmm(ScratchImpl(desired_locations));
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::UseImpl(IR::Value use_value, HostLocList desired_locations) {
|
||||
if (use_value.IsImmediate()) {
|
||||
return LoadImmediate(use_value, ScratchImpl(desired_locations));
|
||||
}
|
||||
|
||||
const IR::Inst* use_inst = use_value.GetInst();
|
||||
const HostLoc current_location = *ValueLocation(use_inst);
|
||||
const size_t max_bit_width = LocInfo(current_location).GetMaxBitWidth();
|
||||
|
||||
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
|
||||
if (can_use_current_location) {
|
||||
LocInfo(current_location).ReadLock();
|
||||
return current_location;
|
||||
}
|
||||
|
||||
if (LocInfo(current_location).IsLocked()) {
|
||||
return UseScratchImpl(use_value, desired_locations);
|
||||
}
|
||||
|
||||
const HostLoc destination_location = SelectARegister(desired_locations);
|
||||
if (max_bit_width > HostLocBitWidth(destination_location)) {
|
||||
return UseScratchImpl(use_value, desired_locations);
|
||||
} else if (CanExchange(destination_location, current_location)) {
|
||||
Exchange(destination_location, current_location);
|
||||
} else {
|
||||
MoveOutOfTheWay(destination_location);
|
||||
Move(destination_location, current_location);
|
||||
}
|
||||
LocInfo(destination_location).ReadLock();
|
||||
return destination_location;
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, HostLocList desired_locations) {
|
||||
if (use_value.IsImmediate()) {
|
||||
return LoadImmediate(use_value, ScratchImpl(desired_locations));
|
||||
}
|
||||
|
||||
const IR::Inst* use_inst = use_value.GetInst();
|
||||
const HostLoc current_location = *ValueLocation(use_inst);
|
||||
const size_t bit_width = GetBitWidth(use_inst->GetType());
|
||||
|
||||
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
|
||||
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
|
||||
if (!LocInfo(current_location).IsLastUse()) {
|
||||
MoveOutOfTheWay(current_location);
|
||||
}
|
||||
LocInfo(current_location).WriteLock();
|
||||
return current_location;
|
||||
}
|
||||
|
||||
const HostLoc destination_location = SelectARegister(desired_locations);
|
||||
MoveOutOfTheWay(destination_location);
|
||||
CopyToScratch(bit_width, destination_location, current_location);
|
||||
LocInfo(destination_location).WriteLock();
|
||||
return destination_location;
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::ScratchImpl(HostLocList desired_locations) {
|
||||
HostLoc location = SelectARegister(desired_locations);
|
||||
MoveOutOfTheWay(location);
|
||||
LocInfo(location).WriteLock();
|
||||
return location;
|
||||
}
|
||||
|
||||
void RegAlloc::HostCall(IR::Inst* result_def, boost::optional<Argument&> arg0, boost::optional<Argument&> arg1, boost::optional<Argument&> arg2, boost::optional<Argument&> arg3) {
|
||||
constexpr size_t args_count = 4;
|
||||
constexpr std::array<HostLoc, args_count> args_hostloc = { ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4 };
|
||||
const std::array<boost::optional<Argument&>, args_count> args = { arg0, arg1, arg2, arg3 };
|
||||
|
||||
static const std::vector<HostLoc> other_caller_save = [args_hostloc]() {
|
||||
std::vector<HostLoc> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
|
||||
|
||||
ret.erase(std::find(ret.begin(), ret.end(), ABI_RETURN));
|
||||
for (auto hostloc : args_hostloc)
|
||||
ret.erase(std::find(ret.begin(), ret.end(), hostloc));
|
||||
|
||||
return ret;
|
||||
}();
|
||||
|
||||
ScratchGpr({ABI_RETURN});
|
||||
if (result_def) {
|
||||
DefineValueImpl(result_def, ABI_RETURN);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args_count; i++) {
|
||||
if (args[i]) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
#if defined(__llvm__) && !defined(_WIN32)
|
||||
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
|
||||
Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
|
||||
switch (args[i]->GetType()) {
|
||||
case IR::Type::U8:
|
||||
code.movzx(reg.cvt32(), reg.cvt8());
|
||||
break;
|
||||
case IR::Type::U16:
|
||||
code.movzx(reg.cvt32(), reg.cvt16());
|
||||
break;
|
||||
default:
|
||||
break; // Nothing needs to be done
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args_count; i++) {
|
||||
if (!args[i]) {
|
||||
// TODO: Force spill
|
||||
ScratchGpr({args_hostloc[i]});
|
||||
}
|
||||
}
|
||||
|
||||
for (HostLoc caller_saved : other_caller_save) {
|
||||
ScratchImpl({caller_saved});
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::EndOfAllocScope() {
|
||||
for (auto& iter : hostloc_info) {
|
||||
iter.ReleaseAll();
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::AssertNoMoreUses() {
|
||||
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) { return i.IsEmpty(); }));
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::SelectARegister(HostLocList desired_locations) const {
|
||||
std::vector<HostLoc> candidates = desired_locations;
|
||||
|
||||
// Find all locations that have not been allocated..
|
||||
auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc){
|
||||
return !this->LocInfo(loc).IsLocked();
|
||||
});
|
||||
candidates.erase(allocated_locs, candidates.end());
|
||||
ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
|
||||
|
||||
// Selects the best location out of the available locations.
|
||||
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
|
||||
|
||||
std::partition(candidates.begin(), candidates.end(), [this](auto loc){
|
||||
return this->LocInfo(loc).IsEmpty();
|
||||
});
|
||||
|
||||
return candidates.front();
|
||||
}
|
||||
|
||||
boost::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const {
|
||||
for (size_t i = 0; i < hostloc_info.size(); i++)
|
||||
if (hostloc_info[i].ContainsValue(value))
|
||||
return static_cast<HostLoc>(i);
|
||||
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) {
|
||||
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
|
||||
LocInfo(host_loc).AddValue(def_inst);
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) {
|
||||
ASSERT_MSG(!ValueLocation(def_inst), "def_inst has already been defined");
|
||||
|
||||
if (use_inst.IsImmediate()) {
|
||||
HostLoc location = ScratchImpl(any_gpr);
|
||||
DefineValueImpl(def_inst, location);
|
||||
LoadImmediate(use_inst, location);
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_MSG(ValueLocation(use_inst.GetInst()), "use_inst must already be defined");
|
||||
HostLoc location = *ValueLocation(use_inst.GetInst());
|
||||
DefineValueImpl(def_inst, location);
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) {
|
||||
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
|
||||
|
||||
if (HostLocIsGPR(host_loc)) {
|
||||
Xbyak::Reg64 reg = HostLocToReg64(host_loc);
|
||||
u64 imm_value = ImmediateToU64(imm);
|
||||
if (imm_value == 0)
|
||||
code.xor_(reg.cvt32(), reg.cvt32());
|
||||
else
|
||||
code.mov(reg, imm_value);
|
||||
return host_loc;
|
||||
}
|
||||
|
||||
if (HostLocIsXMM(host_loc)) {
|
||||
Xbyak::Xmm reg = HostLocToXmm(host_loc);
|
||||
u64 imm_value = ImmediateToU64(imm);
|
||||
if (imm_value == 0)
|
||||
code.pxor(reg, reg);
|
||||
else
|
||||
code.movdqa(reg, code.MConst(code.xword, imm_value)); // TODO: movaps/movapd more appropriate sometimes
|
||||
return host_loc;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void RegAlloc::Move(HostLoc to, HostLoc from) {
|
||||
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
|
||||
|
||||
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
|
||||
ASSERT(bit_width <= HostLocBitWidth(to));
|
||||
|
||||
if (LocInfo(from).IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
EmitMove(bit_width, to, from);
|
||||
|
||||
LocInfo(to) = std::exchange(LocInfo(from), {});
|
||||
}
|
||||
|
||||
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) {
|
||||
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
|
||||
|
||||
EmitMove(bit_width, to, from);
|
||||
}
|
||||
|
||||
void RegAlloc::Exchange(HostLoc a, HostLoc b) {
|
||||
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
|
||||
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
|
||||
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
|
||||
|
||||
if (LocInfo(a).IsEmpty()) {
|
||||
Move(a, b);
|
||||
return;
|
||||
}
|
||||
|
||||
if (LocInfo(b).IsEmpty()) {
|
||||
Move(b, a);
|
||||
return;
|
||||
}
|
||||
|
||||
EmitExchange(a, b);
|
||||
|
||||
std::swap(LocInfo(a), LocInfo(b));
|
||||
}
|
||||
|
||||
void RegAlloc::MoveOutOfTheWay(HostLoc reg) {
|
||||
ASSERT(!LocInfo(reg).IsLocked());
|
||||
if (!LocInfo(reg).IsEmpty()) {
|
||||
SpillRegister(reg);
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::SpillRegister(HostLoc loc) {
|
||||
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
|
||||
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
|
||||
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
|
||||
|
||||
HostLoc new_loc = FindFreeSpill();
|
||||
Move(new_loc, loc);
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::FindFreeSpill() const {
|
||||
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
|
||||
HostLoc loc = static_cast<HostLoc>(i);
|
||||
if (LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
}
|
||||
|
||||
ASSERT_MSG(false, "All spill locations are full");
|
||||
}
|
||||
|
||||
HostLocInfo& RegAlloc::LocInfo(HostLoc loc) {
|
||||
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
}
|
||||
|
||||
const HostLocInfo& RegAlloc::LocInfo(HostLoc loc) const {
|
||||
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
}
|
||||
|
||||
void RegAlloc::EmitMove(size_t bit_width, HostLoc to, HostLoc from) {
|
||||
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
||||
code.movaps(HostLocToXmm(to), HostLocToXmm(from));
|
||||
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code.mov(HostLocToReg64(to), HostLocToReg64(from));
|
||||
} else {
|
||||
code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code.movq(HostLocToXmm(to), HostLocToReg64(from));
|
||||
} else {
|
||||
code.movd(HostLocToXmm(to), HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else if (HostLocIsGPR(to) && HostLocIsXMM(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code.movq(HostLocToReg64(to), HostLocToXmm(from));
|
||||
} else {
|
||||
code.movd(HostLocToReg64(to).cvt32(), HostLocToXmm(from));
|
||||
}
|
||||
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
||||
Xbyak::Address spill_addr = spill_to_addr(from);
|
||||
ASSERT(spill_addr.getBit() >= bit_width);
|
||||
switch (bit_width) {
|
||||
case 128:
|
||||
code.movaps(HostLocToXmm(to), spill_addr);
|
||||
break;
|
||||
case 64:
|
||||
code.movsd(HostLocToXmm(to), spill_addr);
|
||||
break;
|
||||
case 32:
|
||||
case 16:
|
||||
case 8:
|
||||
code.movss(HostLocToXmm(to), spill_addr);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
|
||||
Xbyak::Address spill_addr = spill_to_addr(to);
|
||||
ASSERT(spill_addr.getBit() >= bit_width);
|
||||
switch (bit_width) {
|
||||
case 128:
|
||||
code.movaps(spill_addr, HostLocToXmm(from));
|
||||
break;
|
||||
case 64:
|
||||
code.movsd(spill_addr, HostLocToXmm(from));
|
||||
break;
|
||||
case 32:
|
||||
case 16:
|
||||
case 8:
|
||||
code.movss(spill_addr, HostLocToXmm(from));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code.mov(HostLocToReg64(to), spill_to_addr(from));
|
||||
} else {
|
||||
code.mov(HostLocToReg64(to).cvt32(), spill_to_addr(from));
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code.mov(spill_to_addr(to), HostLocToReg64(from));
|
||||
} else {
|
||||
code.mov(spill_to_addr(to), HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid RegAlloc::EmitMove");
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::EmitExchange(HostLoc a, HostLoc b) {
|
||||
if (HostLocIsGPR(a) && HostLocIsGPR(b)) {
|
||||
code.xchg(HostLocToReg64(a), HostLocToReg64(b));
|
||||
} else if (HostLocIsXMM(a) && HostLocIsXMM(b)) {
|
||||
ASSERT_MSG(false, "Check your code: Exchanging XMM registers is unnecessary");
|
||||
} else {
|
||||
ASSERT_MSG(false, "Invalid RegAlloc::EmitExchange");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
159
src/backend/x64/reg_alloc.h
Normal file
159
src/backend/x64/reg_alloc.h
Normal file
@@ -0,0 +1,159 @@
|
||||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/optional.hpp>
|
||||
#include <xbyak.h>
|
||||
|
||||
#include "backend/x64/block_of_code.h"
|
||||
#include "backend/x64/hostloc.h"
|
||||
#include "backend/x64/oparg.h"
|
||||
#include "common/common_types.h"
|
||||
#include "frontend/ir/cond.h"
|
||||
#include "frontend/ir/microinstruction.h"
|
||||
#include "frontend/ir/value.h"
|
||||
|
||||
namespace Dynarmic::BackendX64 {
|
||||
|
||||
class RegAlloc;
|
||||
|
||||
struct HostLocInfo {
|
||||
public:
|
||||
bool IsLocked() const;
|
||||
bool IsEmpty() const;
|
||||
bool IsLastUse() const;
|
||||
|
||||
void ReadLock();
|
||||
void WriteLock();
|
||||
void AddArgReference();
|
||||
void ReleaseOne();
|
||||
void ReleaseAll();
|
||||
|
||||
bool ContainsValue(const IR::Inst* inst) const;
|
||||
size_t GetMaxBitWidth() const;
|
||||
|
||||
void AddValue(IR::Inst* inst);
|
||||
|
||||
private:
|
||||
// Current instruction state
|
||||
size_t is_being_used_count = 0;
|
||||
bool is_scratch = false;
|
||||
|
||||
// Block state
|
||||
size_t current_references = 0;
|
||||
size_t accumulated_uses = 0;
|
||||
size_t total_uses = 0;
|
||||
|
||||
// Value state
|
||||
std::vector<IR::Inst*> values;
|
||||
size_t max_bit_width = 0;
|
||||
};
|
||||
|
||||
struct Argument {
|
||||
public:
|
||||
IR::Type GetType() const;
|
||||
bool IsImmediate() const;
|
||||
bool IsVoid() const;
|
||||
|
||||
bool FitsInImmediateU32() const;
|
||||
bool FitsInImmediateS32() const;
|
||||
|
||||
bool GetImmediateU1() const;
|
||||
u8 GetImmediateU8() const;
|
||||
u16 GetImmediateU16() const;
|
||||
u32 GetImmediateU32() const;
|
||||
u64 GetImmediateS32() const;
|
||||
u64 GetImmediateU64() const;
|
||||
IR::Cond GetImmediateCond() const;
|
||||
|
||||
/// Is this value currently in a GPR?
|
||||
bool IsInGpr() const;
|
||||
/// Is this value currently in a XMM?
|
||||
bool IsInXmm() const;
|
||||
/// Is this value currently in memory?
|
||||
bool IsInMemory() const;
|
||||
|
||||
private:
|
||||
friend class RegAlloc;
|
||||
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
|
||||
|
||||
bool allocated = false;
|
||||
RegAlloc& reg_alloc;
|
||||
IR::Value value;
|
||||
};
|
||||
|
||||
class RegAlloc final {
|
||||
public:
|
||||
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||
|
||||
explicit RegAlloc(BlockOfCode& code, size_t num_spills, std::function<Xbyak::Address(HostLoc)> spill_to_addr)
|
||||
: hostloc_info(NonSpillHostLocCount + num_spills), code(code), spill_to_addr(std::move(spill_to_addr)) {}
|
||||
|
||||
ArgumentInfo GetArgumentInfo(IR::Inst* inst);
|
||||
|
||||
Xbyak::Reg64 UseGpr(Argument& arg);
|
||||
Xbyak::Xmm UseXmm(Argument& arg);
|
||||
OpArg UseOpArg(Argument& arg);
|
||||
void Use(Argument& arg, HostLoc host_loc);
|
||||
|
||||
Xbyak::Reg64 UseScratchGpr(Argument& arg);
|
||||
Xbyak::Xmm UseScratchXmm(Argument& arg);
|
||||
void UseScratch(Argument& arg, HostLoc host_loc);
|
||||
|
||||
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
|
||||
void DefineValue(IR::Inst* inst, Argument& arg);
|
||||
|
||||
void Release(const Xbyak::Reg& reg);
|
||||
|
||||
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr);
|
||||
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm);
|
||||
|
||||
void HostCall(IR::Inst* result_def = nullptr, boost::optional<Argument&> arg0 = {}, boost::optional<Argument&> arg1 = {}, boost::optional<Argument&> arg2 = {}, boost::optional<Argument&> arg3 = {});
|
||||
|
||||
// TODO: Values in host flags
|
||||
|
||||
void EndOfAllocScope();
|
||||
|
||||
void AssertNoMoreUses();
|
||||
|
||||
private:
|
||||
friend struct Argument;
|
||||
|
||||
HostLoc SelectARegister(HostLocList desired_locations) const;
|
||||
boost::optional<HostLoc> ValueLocation(const IR::Inst* value) const;
|
||||
|
||||
HostLoc UseImpl(IR::Value use_value, HostLocList desired_locations);
|
||||
HostLoc UseScratchImpl(IR::Value use_value, HostLocList desired_locations);
|
||||
HostLoc ScratchImpl(HostLocList desired_locations);
|
||||
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc);
|
||||
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst);
|
||||
|
||||
HostLoc LoadImmediate(IR::Value imm, HostLoc reg);
|
||||
void Move(HostLoc to, HostLoc from);
|
||||
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from);
|
||||
void Exchange(HostLoc a, HostLoc b);
|
||||
void MoveOutOfTheWay(HostLoc reg);
|
||||
|
||||
void SpillRegister(HostLoc loc);
|
||||
HostLoc FindFreeSpill() const;
|
||||
|
||||
std::vector<HostLocInfo> hostloc_info;
|
||||
HostLocInfo& LocInfo(HostLoc loc);
|
||||
const HostLocInfo& LocInfo(HostLoc loc) const;
|
||||
|
||||
BlockOfCode& code;
|
||||
std::function<Xbyak::Address(HostLoc)> spill_to_addr;
|
||||
void EmitMove(size_t bit_width, HostLoc to, HostLoc from);
|
||||
void EmitExchange(HostLoc a, HostLoc b);
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::BackendX64
|
||||
Reference in New Issue
Block a user