mirror of
https://git.suyu.dev/suyu/dynarmic.git
synced 2026-03-24 08:42:57 +00:00
VFP: Implement VADD.{F32,F64}
This commit is contained in:
@@ -119,6 +119,7 @@ void EmitX64::EmitGetRegister(IR::Block&, IR::Inst* inst) {
|
||||
void EmitX64::EmitGetExtendedRegister32(IR::Block& block, IR::Inst* inst) {
|
||||
Arm::ExtReg reg = inst->GetArg(0).GetExtRegRef();
|
||||
ASSERT(reg >= Arm::ExtReg::S0 && reg <= Arm::ExtReg::S31);
|
||||
|
||||
X64Reg result = reg_alloc.DefRegister(inst, any_xmm);
|
||||
code->MOVSS(result, MJitStateExtReg(reg));
|
||||
}
|
||||
@@ -1005,6 +1006,108 @@ void EmitX64::EmitByteReverseDual(IR::Block&, IR::Inst* inst) {
|
||||
code->BSWAP(64, result);
|
||||
}
|
||||
|
||||
static void DenormalsAreZero32(XEmitter* code, X64Reg xmm_value, X64Reg gpr_scratch) {
|
||||
// We need to report back whether we've found a denormal on input.
|
||||
// SSE doesn't do this for us when SSE's DAZ is enabled.
|
||||
code->MOVD_xmm(R(gpr_scratch), xmm_value);
|
||||
code->AND(32, R(gpr_scratch), Imm32(0x7FFFFFFF));
|
||||
code->SUB(32, R(gpr_scratch), Imm32(1));
|
||||
code->CMP(32, R(gpr_scratch), Imm32(0x007FFFFE));
|
||||
auto fixup = code->J_CC(CC_A);
|
||||
code->PXOR(xmm_value, R(xmm_value));
|
||||
code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_IDC)), Imm32(1 << 7));
|
||||
code->SetJumpTarget(fixup);
|
||||
}
|
||||
|
||||
static void DenormalsAreZero64(XEmitter* code, Routines* routines, X64Reg xmm_value, X64Reg gpr_scratch) {
|
||||
code->MOVQ_xmm(R(gpr_scratch), xmm_value);
|
||||
code->AND(64, R(gpr_scratch), routines->MFloatNonSignMask64());
|
||||
code->SUB(64, R(gpr_scratch), Imm32(1));
|
||||
code->CMP(64, R(gpr_scratch), routines->MFloatPenultimatePositiveDenormal64());
|
||||
auto fixup = code->J_CC(CC_A);
|
||||
code->PXOR(xmm_value, R(xmm_value));
|
||||
code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_IDC)), Imm32(1 << 7));
|
||||
code->SetJumpTarget(fixup);
|
||||
}
|
||||
|
||||
static void FlushToZero32(XEmitter* code, X64Reg xmm_value, X64Reg gpr_scratch) {
|
||||
code->MOVD_xmm(R(gpr_scratch), xmm_value);
|
||||
code->AND(32, R(gpr_scratch), Imm32(0x7FFFFFFF));
|
||||
code->SUB(32, R(gpr_scratch), Imm32(1));
|
||||
code->CMP(32, R(gpr_scratch), Imm32(0x007FFFFE));
|
||||
auto fixup = code->J_CC(CC_A);
|
||||
code->PXOR(xmm_value, R(xmm_value));
|
||||
code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_UFC)), Imm32(1 << 3));
|
||||
code->SetJumpTarget(fixup);
|
||||
}
|
||||
|
||||
static void FlushToZero64(XEmitter* code, Routines* routines, X64Reg xmm_value, X64Reg gpr_scratch) {
|
||||
code->MOVQ_xmm(R(gpr_scratch), xmm_value);
|
||||
code->AND(64, R(gpr_scratch), routines->MFloatNonSignMask64());
|
||||
code->SUB(64, R(gpr_scratch), Imm32(1));
|
||||
code->CMP(64, R(gpr_scratch), routines->MFloatPenultimatePositiveDenormal64());
|
||||
auto fixup = code->J_CC(CC_A);
|
||||
code->PXOR(xmm_value, R(xmm_value));
|
||||
code->MOV(32, MDisp(R15, offsetof(JitState, FPSCR_UFC)), Imm32(1 << 3));
|
||||
code->SetJumpTarget(fixup);
|
||||
}
|
||||
|
||||
static void DefaultNaN32(XEmitter* code, Routines* routines, X64Reg xmm_value) {
|
||||
code->UCOMISS(xmm_value, R(xmm_value));
|
||||
auto fixup = code->J_CC(CC_NP);
|
||||
code->MOVAPS(xmm_value, routines->MFloatNaN32());
|
||||
code->SetJumpTarget(fixup);
|
||||
}
|
||||
|
||||
static void DefaultNaN64(XEmitter* code, Routines* routines, X64Reg xmm_value) {
|
||||
code->UCOMISD(xmm_value, R(xmm_value));
|
||||
auto fixup = code->J_CC(CC_NP);
|
||||
code->MOVAPS(xmm_value, routines->MFloatNaN64());
|
||||
code->SetJumpTarget(fixup);
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPAdd32(IR::Block& block, IR::Inst* inst) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
IR::Value b = inst->GetArg(1);
|
||||
|
||||
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
|
||||
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||
|
||||
if (block.location.FPSCR_FTZ()) {
|
||||
DenormalsAreZero32(code, result, gpr_scratch);
|
||||
DenormalsAreZero32(code, operand, gpr_scratch);
|
||||
}
|
||||
code->ADDSS(result, R(operand));
|
||||
if (block.location.FPSCR_FTZ()) {
|
||||
FlushToZero32(code, result, gpr_scratch);
|
||||
}
|
||||
if (block.location.FPSCR_DN()) {
|
||||
DefaultNaN32(code, routines, result);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitFPAdd64(IR::Block& block, IR::Inst* inst) {
|
||||
IR::Value a = inst->GetArg(0);
|
||||
IR::Value b = inst->GetArg(1);
|
||||
|
||||
X64Reg result = reg_alloc.UseDefRegister(a, inst, any_xmm);
|
||||
X64Reg operand = reg_alloc.UseRegister(b, any_xmm);
|
||||
X64Reg gpr_scratch = reg_alloc.ScratchRegister(any_gpr);
|
||||
|
||||
if (block.location.FPSCR_FTZ()) {
|
||||
DenormalsAreZero64(code, routines, result, gpr_scratch);
|
||||
DenormalsAreZero64(code, routines, operand, gpr_scratch);
|
||||
}
|
||||
code->ADDSD(result, R(operand));
|
||||
if (block.location.FPSCR_FTZ()) {
|
||||
FlushToZero64(code, routines, result, gpr_scratch);
|
||||
}
|
||||
if (block.location.FPSCR_DN()) {
|
||||
DefaultNaN64(code, routines, result);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitX64::EmitReadMemory8(IR::Block&, IR::Inst* inst) {
|
||||
reg_alloc.HostCall(inst, inst->GetArg(0));
|
||||
|
||||
|
||||
@@ -22,12 +22,12 @@ namespace BackendX64 {
|
||||
* UE bit 4 Underflow Flag
|
||||
* OE bit 3 Overflow Flag
|
||||
* ZE bit 2 Divide By Zero Flag
|
||||
* DE bit 1 Denormal Flag
|
||||
* DE bit 1 Denormal Flag // Appears to only be set when MXCSR.DAZ = 0
|
||||
* IE bit 0 Invalid Operation Flag
|
||||
*
|
||||
* VFP FPSCR cumulative exception bits
|
||||
* -----------------------------------
|
||||
* IDC bit 7 Input Denormal cumulative exception bit
|
||||
* IDC bit 7 Input Denormal cumulative exception bit // Only ever set when FPSCR.FTZ = 1
|
||||
* IXC bit 4 Inexact cumulative exception bit
|
||||
* UFC bit 3 Underflow cumulative exception bit
|
||||
* OFC bit 2 Overflow cumulative exception bit
|
||||
@@ -72,38 +72,44 @@ constexpr u32 FPSCR_MASK = 0b1111'00'111111'0'111'10011111'00000000;
|
||||
|
||||
u32 JitState::Fpscr() const {
|
||||
ASSERT((guest_FPSCR_flags & ~FPSCR_MASK) == 0);
|
||||
ASSERT((FPSCR_IDC & ~(1 << 7)) == 0);
|
||||
ASSERT((FPSCR_UFC & ~(1 << 3)) == 0);
|
||||
|
||||
u32 FPSCR = guest_FPSCR_flags;
|
||||
FPSCR |= (guest_MXCSR & 0b0000000000001); // IOC = IE
|
||||
FPSCR |= (guest_MXCSR & 0b0000000000010) << 6; // IDC = DE
|
||||
FPSCR |= (guest_MXCSR & 0b0000000111100) >> 1; // IXC, UFC, OFC, DZC = PE, UE, OE, ZE
|
||||
|
||||
if (!Common::Bit<24>(FPSCR)) {
|
||||
// ARM only sets IDC if FTZ == 1.
|
||||
FPSCR &= ~(1 << 7);
|
||||
FPSCR |= old_FPSCR & (1 << 7);
|
||||
}
|
||||
FPSCR |= FPSCR_IDC;
|
||||
FPSCR |= FPSCR_UFC;
|
||||
|
||||
return FPSCR;
|
||||
}
|
||||
|
||||
void JitState::SetFpscr(u32 FPSCR) {
|
||||
old_FPSCR = FPSCR;
|
||||
|
||||
guest_FPSCR_flags = FPSCR & FPSCR_MASK;
|
||||
|
||||
guest_MXCSR = 0;
|
||||
|
||||
// Exception masks / enables
|
||||
guest_MXCSR |= 0b1111110000000; // mask all
|
||||
//guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE
|
||||
//guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE
|
||||
//guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE
|
||||
|
||||
// RMode
|
||||
const std::array<u32, 4> MXCSR_RMode {0x0, 0x4000, 0x2000, 0x6000};
|
||||
guest_MXCSR |= MXCSR_RMode[(FPSCR >> 22) & 0x3];
|
||||
|
||||
// Cumulative flags IOC, IXC, UFC, OFC, DZC
|
||||
guest_MXCSR |= ( FPSCR ) & 0b0000000000001; // IE = IOC
|
||||
guest_MXCSR |= ( FPSCR >> 6) & 0b0000000000010; // DE = IDC
|
||||
guest_MXCSR |= ( FPSCR << 1) & 0b0000000111100; // PE, UE, OE, ZE = IXC, UFC, OFC, DZC
|
||||
guest_MXCSR |= (~FPSCR >> 1) & 0b0000010000000; // IM = ~IOE
|
||||
guest_MXCSR |= (~FPSCR >> 7) & 0b0000100000000; // DM = ~IDE
|
||||
guest_MXCSR |= (~FPSCR ) & 0b1111000000000; // PM, UM, OM, ZM = ~IXE, ~UFE, ~OFE, ~DZE
|
||||
|
||||
// Cumulative flag IDC, UFC
|
||||
FPSCR_IDC = FPSCR & (1 << 7);
|
||||
FPSCR_UFC = FPSCR & (1 << 3);
|
||||
|
||||
if (Common::Bit<24>(FPSCR)) {
|
||||
// VFP Flush to Zero
|
||||
guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
||||
//guest_MXCSR |= (1 << 15); // SSE Flush to Zero
|
||||
guest_MXCSR |= (1 << 6); // SSE Denormals are Zero
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,8 @@ struct JitState {
|
||||
u64 save_host_RSP = 0;
|
||||
s64 cycles_remaining = 0;
|
||||
|
||||
u32 FPSCR_IDC = 0;
|
||||
u32 FPSCR_UFC = 0;
|
||||
u32 guest_FPSCR_flags = 0;
|
||||
u32 old_FPSCR = 0;
|
||||
u32 Fpscr() const;
|
||||
|
||||
Reference in New Issue
Block a user