Implement CLZ

Includes tests
This commit is contained in:
MerryMage
2016-12-04 22:56:33 +00:00
parent 1a1646d962
commit 5c1aab1666
8 changed files with 64 additions and 2 deletions

View File

@@ -23,6 +23,7 @@ set(SRCS
frontend/translate/translate_arm/exception_generating.cpp
frontend/translate/translate_arm/extension.cpp
frontend/translate/translate_arm/load_store.cpp
frontend/translate/translate_arm/misc.cpp
frontend/translate/translate_arm/multiply.cpp
frontend/translate/translate_arm/packing.cpp
frontend/translate/translate_arm/parallel.cpp

View File

@@ -1540,6 +1540,27 @@ void EmitX64::EmitPackedSaturatedSubS16(IR::Block& block, IR::Inst* inst) {
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::psubsw);
}
void EmitX64::EmitCountLeadingZeros(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
if (cpu_info.has(Xbyak::util::Cpu::tLZCNT)) {
Xbyak::Reg32 source = reg_alloc.UseGpr(a).cvt32();
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
code->lzcnt(result, source);
} else {
Xbyak::Reg32 source = reg_alloc.UseScratchGpr(a).cvt32();
Xbyak::Reg32 result = reg_alloc.DefGpr(inst).cvt32();
// The result of a bsr of zero is undefined, but zf is set after it.
code->bsr(result, source);
code->mov(source, 0xFFFFFFFF);
code->cmovz(result, source);
code->neg(result);
code->add(result, 31);
}
}
static void DenormalsAreZero32(BlockOfCode* code, Xbyak::Xmm xmm_value, Xbyak::Reg32 gpr_scratch) {
using namespace Xbyak::util;
Xbyak::Label end;

View File

@@ -386,6 +386,10 @@ Value IREmitter::PackedSaturatedSubS16(const Value& a, const Value& b) {
return Inst(Opcode::PackedSaturatedSubS16, {a, b});
}
Value IREmitter::CountLeadingZeros(const Value& a) {
return Inst(Opcode::CountLeadingZeros, {a});
}
Value IREmitter::TransferToFP32(const Value& a) {
return Inst(Opcode::TransferToFP32, {a});
}

View File

@@ -142,6 +142,7 @@ public:
Value PackedSaturatedAddS16(const Value& a, const Value& b);
Value PackedSaturatedSubU16(const Value& a, const Value& b);
Value PackedSaturatedSubS16(const Value& a, const Value& b);
Value CountLeadingZeros(const Value& a);
Value TransferToFP32(const Value& a);
Value TransferToFP64(const Value& a);

View File

@@ -87,6 +87,7 @@ OPCODE(PackedSaturatedAddU16, T::U32, T::U32, T::U32
OPCODE(PackedSaturatedAddS16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubU16, T::U32, T::U32, T::U32 )
OPCODE(PackedSaturatedSubS16, T::U32, T::U32, T::U32 )
OPCODE(CountLeadingZeros, T::U32, T::U32 )
// Floating-point operations
OPCODE(TransferToFP32, T::F32, T::U32 )

View File

@@ -0,0 +1,22 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#include "translate_arm.h"
namespace Dynarmic {
namespace Arm {
bool ArmTranslatorVisitor::arm_CLZ(Cond cond, Reg d, Reg m) {
if (d == Reg::PC || m == Reg::PC)
return UnpredictableInstruction();
if (ConditionPassed(cond)) {
ir.SetRegister(d, ir.CountLeadingZeros(ir.GetRegister(m)));
}
return true;
}
} // namespace Arm
} // namespace Dynarmic

View File

@@ -209,7 +209,7 @@ struct ArmTranslatorVisitor final {
bool arm_STM_usr();
// Miscellaneous instructions
bool arm_CLZ(Cond cond, Reg d, Reg m) { return InterpretThisInstruction(); }
bool arm_CLZ(Cond cond, Reg d, Reg m);
bool arm_NOP() { return true; }
bool arm_SEL(Cond cond, Reg n, Reg d, Reg m);