A64: Implement ADD_shifted

2026-03-10 20:56:28 +00:00 · 2018-01-07 00:11:57 +00:00
parent d1eb757f93
commit d1cef6ffb0
21 changed files with 451 additions and 75 deletions
--- a/src/backend_x64/emit_x64.cpp
+++ b/src/backend_x64/emit_x64.cpp
@@ -212,7 +212,7 @@ void EmitX64<JST>::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) {
 }

 template <typename JST>
-void EmitX64<JST>::EmitLogicalShiftLeft(EmitContext& ctx, IR::Inst* inst) {
+void EmitX64<JST>::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) {
    auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);

    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@@ -313,7 +313,41 @@ void EmitX64<JST>::EmitLogicalShiftLeft(EmitContext& ctx, IR::Inst* inst) {
 }

 template <typename JST>
-void EmitX64<JST>::EmitLogicalShiftRight(EmitContext& ctx, IR::Inst* inst) {
+void EmitX64<JST>::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    auto& operand_arg = args[0];
+    auto& shift_arg = args[1];
+
+    if (shift_arg.IsImmediate()) {
+        Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
+        u8 shift = shift_arg.GetImmediateU8();
+
+        if (shift < 64) {
+            code->shl(result, shift);
+        } else {
+            code->xor_(result.cvt32(), result.cvt32());
+        }
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    } else {
+        ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
+        Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
+        Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr();
+
+        // The x64 SHL instruction masks the shift count by 0x1F before performing the shift.
+        // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
+
+        code->shl(result, code->cl);
+        code->xor_(zero.cvt32(), zero.cvt32());
+        code->cmp(code->cl, 64);
+        code->cmovnb(result, zero);
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    }
+}
+
+template <typename JST>
+void EmitX64<JST>::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) {
    auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);

    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@@ -418,19 +452,36 @@ void EmitX64<JST>::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) {
    auto& operand_arg = args[0];
    auto& shift_arg = args[1];

-    ASSERT_MSG(shift_arg.IsImmediate(), "variable 64 bit shifts are not implemented");
-    ASSERT_MSG(shift_arg.GetImmediateU8() < 64, "shift width clamping is not implemented");
+    if (shift_arg.IsImmediate()) {
+        Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
+        u8 shift = shift_arg.GetImmediateU8();

-    Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
-    u8 shift = shift_arg.GetImmediateU8();
+        if (shift < 64) {
+            code->shr(result, shift);
+        } else {
+            code->xor_(result.cvt32(), result.cvt32());
+        }

-    code->shr(result.cvt64(), shift);
+        ctx.reg_alloc.DefineValue(inst, result);
+    } else {
+        ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
+        Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
+        Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr();

-    ctx.reg_alloc.DefineValue(inst, result);
+        // The x64 SHR instruction masks the shift count by 0x1F before performing the shift.
+        // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros.
+
+        code->shr(result, code->cl);
+        code->xor_(zero.cvt32(), zero.cvt32());
+        code->cmp(code->cl, 64);
+        code->cmovnb(result, zero);
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    }
 }

 template <typename JST>
-void EmitX64<JST>::EmitArithmeticShiftRight(EmitContext& ctx, IR::Inst* inst) {
+void EmitX64<JST>::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) {
    auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);

    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@@ -519,7 +570,39 @@ void EmitX64<JST>::EmitArithmeticShiftRight(EmitContext& ctx, IR::Inst* inst) {
 }

 template <typename JST>
-void EmitX64<JST>::EmitRotateRight(EmitContext& ctx, IR::Inst* inst) {
+void EmitX64<JST>::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    auto& operand_arg = args[0];
+    auto& shift_arg = args[1];
+
+    if (shift_arg.IsImmediate()) {
+        u8 shift = shift_arg.GetImmediateU8();
+        Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
+
+        code->sar(result, u8(shift < 63 ? shift : 63));
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    } else {
+        ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX);
+        Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
+        Xbyak::Reg64 const63 = ctx.reg_alloc.ScratchGpr();
+
+        // The 64-bit x64 SAR instruction masks the shift count by 0x3F before performing the shift.
+        // ARM differs from the behaviour: It does not mask the count.
+
+        // We note that all shift values above 63 have the same behaviour as 63 does, so we saturate `shift` to 63.
+        code->mov(const63, 63);
+        code->movzx(code->ecx, code->cl);
+        code->cmp(code->ecx, u32(63));
+        code->cmovg(code->ecx, const63);
+        code->sar(result, code->cl);
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    }
+}
+
+template <typename JST>
+void EmitX64<JST>::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) {
    auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);

    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
@@ -598,6 +681,30 @@ void EmitX64<JST>::EmitRotateRight(EmitContext& ctx, IR::Inst* inst) {
    }
 }

+template <typename JST>
+void EmitX64<JST>::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) {
+    auto args = ctx.reg_alloc.GetArgumentInfo(inst);
+    auto& operand_arg = args[0];
+    auto& shift_arg = args[1];
+
+    if (shift_arg.IsImmediate()) {
+        u8 shift = shift_arg.GetImmediateU8();
+        Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
+
+        code->ror(result, u8(shift & 0x3F));
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    } else {
+        ctx.reg_alloc.Use(shift_arg, HostLoc::RCX);
+        Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg);
+
+        // x64 ROR instruction does (shift & 0x3F) for us.
+        code->ror(result, code->cl);
+
+        ctx.reg_alloc.DefineValue(inst, result);
+    }
+}
+
 template <typename JST>
 void EmitX64<JST>::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) {
    auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp);