mirror of
https://git.suyu.dev/suyu/dynarmic.git
synced 2026-03-08 06:06:29 +00:00
More Speed
This commit is contained in:
@@ -14,36 +14,22 @@
|
||||
namespace Dynarmic {
|
||||
namespace BackendX64 {
|
||||
|
||||
// TODO: Just turn this into a function that indexes a std::array.
|
||||
const static std::map<HostLoc, Gen::X64Reg> hostloc_to_x64 = {
|
||||
{ HostLoc::RAX, Gen::RAX },
|
||||
{ HostLoc::RBX, Gen::RBX },
|
||||
{ HostLoc::RCX, Gen::RCX },
|
||||
{ HostLoc::RDX, Gen::RDX },
|
||||
{ HostLoc::RSI, Gen::RSI },
|
||||
{ HostLoc::RDI, Gen::RDI },
|
||||
{ HostLoc::RBP, Gen::RBP },
|
||||
{ HostLoc::RSP, Gen::RSP },
|
||||
{ HostLoc::R8, Gen::R8 },
|
||||
{ HostLoc::R9, Gen::R9 },
|
||||
{ HostLoc::R10, Gen::R10 },
|
||||
{ HostLoc::R11, Gen::R11 },
|
||||
{ HostLoc::R12, Gen::R12 },
|
||||
{ HostLoc::R13, Gen::R13 },
|
||||
{ HostLoc::R14, Gen::R14 },
|
||||
};
|
||||
static Gen::X64Reg HostLocToX64(HostLoc loc) {
|
||||
DEBUG_ASSERT(HostLocIsRegister(loc));
|
||||
// HostLoc is ordered such that the numbers line up.
|
||||
return static_cast<Gen::X64Reg>(loc);
|
||||
}
|
||||
|
||||
static Gen::OpArg SpillToOpArg(HostLoc loc) {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
DEBUG_ASSERT(HostLocIsSpill(loc));
|
||||
|
||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||
return Gen::MDisp(Gen::R15, static_cast<int>(offsetof(JitState, Spill) + i * sizeof(u32)));
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::DefRegister(IR::Value* def_value, std::initializer_list<HostLoc> desired_locations) {
|
||||
ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
ASSERT_MSG(remaining_uses.find(def_value) == remaining_uses.end(), "def_value has already been defined");
|
||||
ASSERT_MSG(ValueLocations(def_value).empty(), "def_value has already been defined");
|
||||
Gen::X64Reg RegAlloc::DefRegister(IR::Inst* def_inst, std::initializer_list<HostLoc> desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT_MSG(ValueLocations(def_inst).empty(), "def_inst has already been defined");
|
||||
|
||||
HostLoc location = SelectARegister(desired_locations);
|
||||
|
||||
@@ -52,43 +38,54 @@ Gen::X64Reg RegAlloc::DefRegister(IR::Value* def_value, std::initializer_list<Ho
|
||||
}
|
||||
|
||||
// Update state
|
||||
hostloc_state[location] = HostLocState::Def;
|
||||
hostloc_to_value[location] = def_value;
|
||||
remaining_uses[def_value] = def_value->NumUses();
|
||||
hostloc_state[static_cast<size_t>(location)] = HostLocState::Def;
|
||||
hostloc_to_inst[static_cast<size_t>(location)] = def_inst;
|
||||
|
||||
return hostloc_to_x64.at(location);
|
||||
return HostLocToX64(location);
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseDefRegister(IR::Value* use_value, IR::Value* def_value, std::initializer_list<HostLoc> desired_locations) {
|
||||
ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
ASSERT_MSG(remaining_uses.find(def_value) == remaining_uses.end(), "def_value has already been defined");
|
||||
ASSERT_MSG(ValueLocations(def_value).empty(), "def_value has already been defined");
|
||||
ASSERT_MSG(remaining_uses.find(use_value) != remaining_uses.end(), "use_value has not been defined");
|
||||
ASSERT_MSG(!ValueLocations(use_value).empty(), "use_value has not been defined");
|
||||
Gen::X64Reg RegAlloc::UseDefRegister(IR::Value use_value, IR::Inst* def_inst, std::initializer_list<HostLoc> desired_locations) {
|
||||
if (!use_value.IsImmediate()) {
|
||||
return UseDefRegister(use_value.GetInst(), def_inst, desired_locations);
|
||||
}
|
||||
|
||||
// TODO: Optimize the case when this is the last use_value use.
|
||||
Gen::X64Reg use_reg = UseRegister(use_value);
|
||||
Gen::X64Reg def_reg = DefRegister(def_value, desired_locations);
|
||||
return LoadImmediateIntoRegister(use_value, DefRegister(def_inst, desired_locations));
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseDefRegister(IR::Inst* use_inst, IR::Inst* def_inst, std::initializer_list<HostLoc> desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT_MSG(ValueLocations(def_inst).empty(), "def_inst has already been defined");
|
||||
DEBUG_ASSERT_MSG(!ValueLocations(use_inst).empty(), "use_inst has not been defined");
|
||||
|
||||
// TODO: Optimize the case when this is the last use_inst use.
|
||||
Gen::X64Reg use_reg = UseRegister(use_inst);
|
||||
Gen::X64Reg def_reg = DefRegister(def_inst, desired_locations);
|
||||
code->MOV(32, Gen::R(def_reg), Gen::R(use_reg));
|
||||
return def_reg;
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseRegister(IR::Value* use_value, std::initializer_list<HostLoc> desired_locations) {
|
||||
ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
ASSERT_MSG(remaining_uses.find(use_value) != remaining_uses.end(), "use_value has not been defined");
|
||||
ASSERT_MSG(!ValueLocations(use_value).empty(), "use_value has not been defined");
|
||||
ASSERT_MSG(remaining_uses[use_value] != 0, "use_value ran out of uses. (Use-d an IR::Value* too many times)");
|
||||
Gen::X64Reg RegAlloc::UseRegister(IR::Value use_value, std::initializer_list<HostLoc> desired_locations) {
|
||||
if (!use_value.IsImmediate()) {
|
||||
return UseRegister(use_value.GetInst(), desired_locations);
|
||||
}
|
||||
|
||||
HostLoc current_location = ValueLocations(use_value).front();
|
||||
return LoadImmediateIntoRegister(use_value, ScratchRegister(desired_locations));
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseRegister(IR::Inst* use_inst, std::initializer_list<HostLoc> desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT_MSG(!ValueLocations(use_inst).empty(), "use_inst has not been defined");
|
||||
|
||||
HostLoc current_location = ValueLocations(use_inst).front();
|
||||
auto iter = std::find(desired_locations.begin(), desired_locations.end(), current_location);
|
||||
if (iter != desired_locations.end()) {
|
||||
ASSERT(hostloc_state[current_location] == HostLocState::Idle || hostloc_state[current_location] == HostLocState::Use);
|
||||
ASSERT(hostloc_state[static_cast<size_t>(current_location)] == HostLocState::Idle || hostloc_state[static_cast<size_t>(current_location)] == HostLocState::Use);
|
||||
|
||||
// Update state
|
||||
hostloc_state[current_location] = HostLocState::Use;
|
||||
remaining_uses[use_value]--;
|
||||
hostloc_state[static_cast<size_t>(current_location)] = HostLocState::Use;
|
||||
DecrementRemainingUses(use_inst);
|
||||
|
||||
return hostloc_to_x64.at(current_location);
|
||||
return HostLocToX64(current_location);
|
||||
}
|
||||
|
||||
HostLoc new_location = SelectARegister(desired_locations);
|
||||
@@ -98,33 +95,40 @@ Gen::X64Reg RegAlloc::UseRegister(IR::Value* use_value, std::initializer_list<Ho
|
||||
SpillRegister(new_location);
|
||||
}
|
||||
|
||||
code->MOV(32, Gen::R(hostloc_to_x64.at(new_location)), SpillToOpArg(current_location));
|
||||
code->MOV(32, Gen::R(HostLocToX64(new_location)), SpillToOpArg(current_location));
|
||||
|
||||
hostloc_state[new_location] = HostLocState::Use;
|
||||
std::swap(hostloc_to_value[new_location], hostloc_to_value[current_location]);
|
||||
remaining_uses[use_value]--;
|
||||
hostloc_state[static_cast<size_t>(new_location)] = HostLocState::Use;
|
||||
std::swap(hostloc_to_inst[static_cast<size_t>(new_location)], hostloc_to_inst[static_cast<size_t>(current_location)]);
|
||||
DecrementRemainingUses(use_inst);
|
||||
} else if (HostLocIsRegister(current_location)) {
|
||||
ASSERT(hostloc_state[current_location] == HostLocState::Idle);
|
||||
ASSERT(hostloc_state[static_cast<size_t>(current_location)] == HostLocState::Idle);
|
||||
|
||||
code->XCHG(32, Gen::R(hostloc_to_x64.at(new_location)), Gen::R(hostloc_to_x64.at(current_location)));
|
||||
code->XCHG(32, Gen::R(HostLocToX64(new_location)), Gen::R(HostLocToX64(current_location)));
|
||||
|
||||
hostloc_state[new_location] = HostLocState::Use;
|
||||
std::swap(hostloc_to_value[new_location], hostloc_to_value[current_location]);
|
||||
remaining_uses[use_value]--;
|
||||
hostloc_state[static_cast<size_t>(new_location)] = HostLocState::Use;
|
||||
std::swap(hostloc_to_inst[static_cast<size_t>(new_location)], hostloc_to_inst[static_cast<size_t>(current_location)]);
|
||||
DecrementRemainingUses(use_inst);
|
||||
} else {
|
||||
ASSERT_MSG(0, "Invalid current_location");
|
||||
}
|
||||
|
||||
return hostloc_to_x64.at(new_location);
|
||||
return HostLocToX64(new_location);
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseScratchRegister(IR::Value* use_value, std::initializer_list<HostLoc> desired_locations) {
|
||||
ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
ASSERT_MSG(remaining_uses.find(use_value) != remaining_uses.end(), "use_value has not been defined");
|
||||
ASSERT_MSG(!ValueLocations(use_value).empty(), "use_value has not been defined");
|
||||
ASSERT_MSG(remaining_uses[use_value] != 0, "use_value ran out of uses. (Use-d an IR::Value* too many times)");
|
||||
Gen::X64Reg RegAlloc::UseScratchRegister(IR::Value use_value, std::initializer_list<HostLoc> desired_locations) {
|
||||
if (!use_value.IsImmediate()) {
|
||||
return UseScratchRegister(use_value.GetInst(), desired_locations);
|
||||
}
|
||||
|
||||
HostLoc current_location = ValueLocations(use_value).front();
|
||||
return LoadImmediateIntoRegister(use_value, ScratchRegister(desired_locations));
|
||||
}
|
||||
|
||||
Gen::X64Reg RegAlloc::UseScratchRegister(IR::Inst* use_inst, std::initializer_list<HostLoc> desired_locations) {
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT_MSG(!ValueLocations(use_inst).empty(), "use_inst has not been defined");
|
||||
ASSERT_MSG(use_inst->use_count != 0, "use_inst ran out of uses. (Use-d an IR::Inst* too many times)");
|
||||
|
||||
HostLoc current_location = ValueLocations(use_inst).front();
|
||||
HostLoc new_location = SelectARegister(desired_locations);
|
||||
|
||||
if (HostLocIsSpill(current_location)) {
|
||||
@@ -132,34 +136,34 @@ Gen::X64Reg RegAlloc::UseScratchRegister(IR::Value* use_value, std::initializer_
|
||||
SpillRegister(new_location);
|
||||
}
|
||||
|
||||
code->MOV(32, Gen::R(hostloc_to_x64.at(new_location)), SpillToOpArg(current_location));
|
||||
code->MOV(32, Gen::R(HostLocToX64(new_location)), SpillToOpArg(current_location));
|
||||
|
||||
hostloc_state[new_location] = HostLocState::Scratch;
|
||||
remaining_uses[use_value]--;
|
||||
hostloc_state[static_cast<size_t>(new_location)] = HostLocState::Scratch;
|
||||
DecrementRemainingUses(use_inst);
|
||||
} else if (HostLocIsRegister(current_location)) {
|
||||
ASSERT(hostloc_state[current_location] == HostLocState::Idle);
|
||||
ASSERT(hostloc_state[static_cast<size_t>(current_location)] == HostLocState::Idle);
|
||||
|
||||
if (IsRegisterOccupied(new_location)) {
|
||||
SpillRegister(new_location);
|
||||
if (current_location != new_location) {
|
||||
code->MOV(32, Gen::R(hostloc_to_x64.at(new_location)), Gen::R(hostloc_to_x64.at(current_location)));
|
||||
code->MOV(32, Gen::R(HostLocToX64(new_location)), Gen::R(HostLocToX64(current_location)));
|
||||
}
|
||||
} else {
|
||||
code->MOV(32, Gen::R(hostloc_to_x64.at(new_location)), Gen::R(hostloc_to_x64.at(current_location)));
|
||||
code->MOV(32, Gen::R(HostLocToX64(new_location)), Gen::R(HostLocToX64(current_location)));
|
||||
}
|
||||
|
||||
hostloc_state[new_location] = HostLocState::Scratch;
|
||||
remaining_uses[use_value]--;
|
||||
hostloc_state[static_cast<size_t>(new_location)] = HostLocState::Scratch;
|
||||
DecrementRemainingUses(use_inst);
|
||||
} else {
|
||||
ASSERT_MSG(0, "Invalid current_location");
|
||||
}
|
||||
|
||||
return hostloc_to_x64.at(new_location);
|
||||
return HostLocToX64(new_location);
|
||||
}
|
||||
|
||||
|
||||
Gen::X64Reg RegAlloc::ScratchRegister(std::initializer_list<HostLoc> desired_locations) {
|
||||
ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
DEBUG_ASSERT(std::all_of(desired_locations.begin(), desired_locations.end(), HostLocIsRegister));
|
||||
|
||||
HostLoc location = SelectARegister(desired_locations);
|
||||
|
||||
@@ -168,12 +172,32 @@ Gen::X64Reg RegAlloc::ScratchRegister(std::initializer_list<HostLoc> desired_loc
|
||||
}
|
||||
|
||||
// Update state
|
||||
hostloc_state[location] = HostLocState::Scratch;
|
||||
hostloc_state[static_cast<size_t>(location)] = HostLocState::Scratch;
|
||||
|
||||
return hostloc_to_x64.at(location);
|
||||
return HostLocToX64(location);
|
||||
}
|
||||
|
||||
void RegAlloc::HostCall(IR::Value* result_def, IR::Value* arg0_use, IR::Value* arg1_use, IR::Value* arg2_use, IR::Value* arg3_use) {
|
||||
Gen::X64Reg RegAlloc::LoadImmediateIntoRegister(IR::Value imm, Gen::X64Reg reg) {
|
||||
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
|
||||
|
||||
switch (imm.GetType()) {
|
||||
case IR::Type::U1:
|
||||
code->MOV(32, R(reg), Gen::Imm32(imm.GetU1()));
|
||||
break;
|
||||
case IR::Type::U8:
|
||||
code->MOV(32, R(reg), Gen::Imm32(imm.GetU8()));
|
||||
break;
|
||||
case IR::Type::U32:
|
||||
code->MOV(32, R(reg), Gen::Imm32(imm.GetU32()));
|
||||
break;
|
||||
default:
|
||||
ASSERT_MSG(false, "This should never happen.");
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
void RegAlloc::HostCall(IR::Inst* result_def, IR::Value arg0_use, IR::Value arg1_use, IR::Value arg2_use, IR::Value arg3_use) {
|
||||
constexpr HostLoc AbiReturn = HostLoc::RAX;
|
||||
#ifdef _WIN32
|
||||
constexpr std::array<HostLoc, 4> AbiArgs = { HostLoc::RCX, HostLoc::RDX, HostLoc::R8, HostLoc::R9 };
|
||||
@@ -185,7 +209,7 @@ void RegAlloc::HostCall(IR::Value* result_def, IR::Value* arg0_use, IR::Value* a
|
||||
constexpr std::array<HostLoc, 4> OtherCallerSave = { HostLoc::R8, HostLoc::R9, HostLoc::R10, HostLoc::R11 };
|
||||
#endif
|
||||
|
||||
const std::array<IR::Value*, 4> args = {arg0_use, arg1_use, arg2_use, arg3_use};
|
||||
const std::array<IR::Value*, 4> args = {&arg0_use, &arg1_use, &arg2_use, &arg3_use};
|
||||
|
||||
// TODO: This works but almost certainly leads to suboptimal generated code.
|
||||
|
||||
@@ -200,8 +224,8 @@ void RegAlloc::HostCall(IR::Value* result_def, IR::Value* arg0_use, IR::Value* a
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < AbiArgs.size(); i++) {
|
||||
if (args[i]) {
|
||||
UseScratchRegister(args[i], {AbiArgs[i]});
|
||||
if (!args[i]->IsEmpty()) {
|
||||
UseScratchRegister(*args[i], {AbiArgs[i]});
|
||||
} else {
|
||||
ScratchRegister({AbiArgs[i]});
|
||||
}
|
||||
@@ -231,36 +255,36 @@ HostLoc RegAlloc::SelectARegister(std::initializer_list<HostLoc> desired_locatio
|
||||
return candidates.front();
|
||||
}
|
||||
|
||||
std::vector<HostLoc> RegAlloc::ValueLocations(IR::Value* value) const {
|
||||
std::vector<HostLoc> RegAlloc::ValueLocations(IR::Inst* value) const {
|
||||
std::vector<HostLoc> locations;
|
||||
|
||||
for (const auto& iter : hostloc_to_value)
|
||||
if (iter.second == value)
|
||||
locations.emplace_back(iter.first);
|
||||
for (size_t i = 0; i < HostLocCount; i++)
|
||||
if (hostloc_to_inst[i] == value)
|
||||
locations.emplace_back(static_cast<HostLoc>(i));
|
||||
|
||||
return locations;
|
||||
}
|
||||
|
||||
bool RegAlloc::IsRegisterOccupied(HostLoc loc) const {
|
||||
return hostloc_to_value.find(loc) != hostloc_to_value.end() && hostloc_to_value.at(loc) != nullptr;
|
||||
return hostloc_to_inst.at(static_cast<size_t>(loc)) != nullptr;
|
||||
}
|
||||
|
||||
bool RegAlloc::IsRegisterAllocated(HostLoc loc) const {
|
||||
return hostloc_state.find(loc) != hostloc_state.end() && hostloc_state.at(loc) != HostLocState::Idle;
|
||||
return hostloc_state.at(static_cast<size_t>(loc)) != HostLocState::Idle;
|
||||
}
|
||||
|
||||
void RegAlloc::SpillRegister(HostLoc loc) {
|
||||
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
|
||||
ASSERT_MSG(hostloc_state[loc] == HostLocState::Idle, "Allocated registers cannot be spilled");
|
||||
ASSERT_MSG(hostloc_state[static_cast<size_t>(loc)] == HostLocState::Idle, "Allocated registers cannot be spilled");
|
||||
ASSERT_MSG(IsRegisterOccupied(loc), "There is no need to spill unoccupied registers");
|
||||
ASSERT_MSG(!IsRegisterAllocated(loc), "Registers that have been allocated must not be spilt");
|
||||
|
||||
HostLoc new_loc = FindFreeSpill();
|
||||
|
||||
code->MOV(32, SpillToOpArg(new_loc), Gen::R(hostloc_to_x64.at(loc)));
|
||||
code->MOV(32, SpillToOpArg(new_loc), Gen::R(HostLocToX64(loc)));
|
||||
|
||||
hostloc_to_value[new_loc] = hostloc_to_value[loc];
|
||||
hostloc_to_value[loc] = nullptr;
|
||||
hostloc_to_inst[static_cast<size_t>(new_loc)] = hostloc_to_inst[static_cast<size_t>(loc)];
|
||||
hostloc_to_inst[static_cast<size_t>(loc)] = nullptr;
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::FindFreeSpill() const {
|
||||
@@ -272,27 +296,25 @@ HostLoc RegAlloc::FindFreeSpill() const {
|
||||
}
|
||||
|
||||
void RegAlloc::EndOfAllocScope() {
|
||||
hostloc_state.clear();
|
||||
hostloc_state.fill(HostLocState::Idle);
|
||||
|
||||
for (auto& iter : hostloc_to_value)
|
||||
if (iter.second && remaining_uses[iter.second] == 0)
|
||||
iter.second = nullptr;
|
||||
for (auto& iter : hostloc_to_inst)
|
||||
if (iter && iter->use_count == 0)
|
||||
iter = nullptr;
|
||||
}
|
||||
|
||||
void RegAlloc::DecrementRemainingUses(IR::Value* value) {
|
||||
ASSERT_MSG(remaining_uses.find(value) != remaining_uses.end(), "value does not exist");
|
||||
ASSERT_MSG(remaining_uses[value] > 0, "value doesn't have any remaining uses");
|
||||
remaining_uses[value]--;
|
||||
void RegAlloc::DecrementRemainingUses(IR::Inst* value) {
|
||||
ASSERT_MSG(value->use_count > 0, "value doesn't have any remaining uses");
|
||||
value->use_count--;
|
||||
}
|
||||
|
||||
void RegAlloc::AssertNoMoreUses() {
|
||||
ASSERT(std::all_of(hostloc_to_value.begin(), hostloc_to_value.end(), [](const auto& pair){ return !pair.second; }));
|
||||
ASSERT(std::all_of(hostloc_to_inst.begin(), hostloc_to_inst.end(), [](const auto& inst){ return !inst; }));
|
||||
}
|
||||
|
||||
void RegAlloc::Reset() {
|
||||
hostloc_to_value.clear();
|
||||
hostloc_state.clear();
|
||||
remaining_uses.clear();
|
||||
hostloc_to_inst.fill(nullptr);
|
||||
hostloc_state.fill(HostLocState::Idle);
|
||||
}
|
||||
|
||||
} // namespace BackendX64
|
||||
|
||||
Reference in New Issue
Block a user