Merge pull request #478 from lioncash/stepfused

A64: Handle half-precision variants of FRECPE and FRECPS
This commit is contained in:
Merry
2019-04-14 12:40:18 +01:00
committed by MerryMage
14 changed files with 169 additions and 75 deletions

View File

@@ -88,7 +88,7 @@ struct FPInfo<u64> {
template<typename FPT, bool sign, int exponent, FPT value>
constexpr FPT FPValue() {
if constexpr (value == 0) {
return FPInfo<FPT>::Zero(sign);
return FPT(FPInfo<FPT>::Zero(sign));
}
constexpr int point_position = static_cast<int>(FPInfo<FPT>::explicit_mantissa_width);
@@ -100,7 +100,7 @@ constexpr FPT FPValue() {
constexpr FPT mantissa = (value << offset) & FPInfo<FPT>::mantissa_mask;
constexpr FPT biased_exponent = static_cast<FPT>(normalized_exponent + FPInfo<FPT>::exponent_bias);
return FPInfo<FPT>::Zero(sign) | mantissa | (biased_exponent << FPInfo<FPT>::explicit_mantissa_width);
return FPT(FPInfo<FPT>::Zero(sign) | mantissa | (biased_exponent << FPInfo<FPT>::explicit_mantissa_width));
}
} // namespace Dynarmic::FP

View File

@@ -31,12 +31,12 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
}
if (type == FPType::Infinity) {
return FPInfo<FPT>::Zero(sign);
return FPT(FPInfo<FPT>::Zero(sign));
}
if (type == FPType::Zero) {
FPProcessException(FPExc::DivideByZero, fpcr, fpsr);
return FPInfo<FPT>::Infinity(sign);
return FPT(FPInfo<FPT>::Infinity(sign));
}
if (value.exponent < FPInfo<FPT>::exponent_min - 2) {
@@ -58,13 +58,13 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
FPProcessException(FPExc::Overflow, fpcr, fpsr);
FPProcessException(FPExc::Inexact, fpcr, fpsr);
return overflow_to_inf ? FPInfo<FPT>::Infinity(sign) : FPInfo<FPT>::MaxNormal(sign);
return overflow_to_inf ? FPT(FPInfo<FPT>::Infinity(sign)) : FPT(FPInfo<FPT>::MaxNormal(sign));
}
if ((fpcr.FZ() && !std::is_same_v<FPT, u16>) || (fpcr.FZ16() && std::is_same_v<FPT, u16>)) {
if (value.exponent >= -FPInfo<FPT>::exponent_min) {
fpsr.UFC(true);
return FPInfo<FPT>::Zero(sign);
return FPT(FPInfo<FPT>::Zero(sign));
}
}
@@ -87,12 +87,13 @@ FPT FPRecipEstimate(FPT op, FPCR fpcr, FPSR& fpsr) {
}
}
const FPT bits_sign = FPInfo<FPT>::Zero(sign);
const FPT bits_sign = FPT(FPInfo<FPT>::Zero(sign));
const FPT bits_exponent = static_cast<FPT>(result_exponent + FPInfo<FPT>::exponent_bias);
const FPT bits_mantissa = static_cast<FPT>(estimate);
return (bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask) | bits_sign;
return FPT((bits_exponent << FPInfo<FPT>::explicit_mantissa_width) | (bits_mantissa & FPInfo<FPT>::mantissa_mask) | bits_sign);
}
template u16 FPRecipEstimate<u16>(u16 op, FPCR fpcr, FPSR& fpsr);
template u32 FPRecipEstimate<u32>(u32 op, FPCR fpcr, FPSR& fpsr);
template u64 FPRecipEstimate<u64>(u64 op, FPCR fpcr, FPSR& fpsr);

View File

@@ -21,7 +21,7 @@ FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
const auto [type1, sign1, value1] = FPUnpack<FPT>(op1, fpcr, fpsr);
const auto [type2, sign2, value2] = FPUnpack<FPT>(op2, fpcr, fpsr);
if (const auto maybe_nan = FPProcessNaNs(type1, type2, op1, op2, fpcr, fpsr)) {
return *maybe_nan;
}
@@ -37,18 +37,19 @@ FPT FPRecipStepFused(FPT op1, FPT op2, FPCR fpcr, FPSR& fpsr) {
}
if (inf1 || inf2) {
return FPInfo<FPT>::Infinity(sign1 != sign2);
return FPT(FPInfo<FPT>::Infinity(sign1 != sign2));
}
// result_value = 2.0 + (value1 * value2)
FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 2), value1, value2);
const FPUnpacked result_value = FusedMulAdd(ToNormalized(false, 0, 2), value1, value2);
if (result_value.mantissa == 0) {
return FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity);
return FPT(FPInfo<FPT>::Zero(fpcr.RMode() == RoundingMode::TowardsMinusInfinity));
}
return FPRound<FPT>(result_value, fpcr, fpsr);
}
template u16 FPRecipStepFused<u16>(u16 op1, u16 op2, FPCR fpcr, FPSR& fpsr);
template u32 FPRecipStepFused<u32>(u32 op1, u32 op2, FPCR fpcr, FPSR& fpsr);
template u64 FPRecipStepFused<u64>(u64 op1, u64 op2, FPCR fpcr, FPSR& fpsr);