mirror of
https://git.suyu.dev/suyu/dynarmic.git
synced 2026-03-11 06:16:28 +00:00
emit_x64_vector: Changes to VectorSignedSaturatedDoublingMultiply
* Return both the upper and lower parts of the multiply if required * SSE2 does not support the pmuldq instruction, do sign correction to an unsigned result instead * Improve port utilisation where possible (punpck instructions were a bottleneck)
This commit is contained in:
@@ -432,7 +432,7 @@ bool TranslatorVisitor::SQDMULH_vec_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec V
|
||||
|
||||
const IR::U128 operand1 = V(datasize, Vn);
|
||||
const IR::U128 operand2 = V(datasize, Vm);
|
||||
const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyReturnHigh(esize, operand1, operand2);
|
||||
const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2).upper;
|
||||
|
||||
V(datasize, Vd, result);
|
||||
return true;
|
||||
|
||||
@@ -233,7 +233,7 @@ bool TranslatorVisitor::SQDMULH_elt_2(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, I
|
||||
const IR::U128 operand1 = V(datasize, Vn);
|
||||
const IR::U128 operand2 = V(idxsize, concatenate(Vmhi, Vmlo).ZeroExtend<Vec>());
|
||||
const IR::U128 index_vector = ir.VectorBroadcast(esize, ir.VectorGetElement(esize, operand2, index));
|
||||
const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyReturnHigh(esize, operand1, index_vector);
|
||||
const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, index_vector).upper;
|
||||
|
||||
V(datasize, Vd, result);
|
||||
return true;
|
||||
|
||||
@@ -1575,15 +1575,23 @@ U128 IREmitter::VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128
|
||||
return {};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorSignedSaturatedDoublingMultiplyReturnHigh(size_t esize, const U128& a, const U128& b) {
|
||||
switch (esize) {
|
||||
case 16:
|
||||
return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh16, a, b);
|
||||
case 32:
|
||||
return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh32, a, b);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return {};
|
||||
UpperAndLower IREmitter::VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b) {
|
||||
const Value multiply = [&] {
|
||||
switch (esize) {
|
||||
case 16:
|
||||
return Inst(Opcode::VectorSignedSaturatedDoublingMultiply16, a, b);
|
||||
case 32:
|
||||
return Inst(Opcode::VectorSignedSaturatedDoublingMultiply32, a, b);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return Value{};
|
||||
}
|
||||
}();
|
||||
|
||||
return {
|
||||
Inst<U128>(Opcode::GetUpperFromOp, multiply),
|
||||
Inst<U128>(Opcode::GetLowerFromOp, multiply),
|
||||
};
|
||||
}
|
||||
|
||||
U128 IREmitter::VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a) {
|
||||
|
||||
@@ -273,7 +273,7 @@ public:
|
||||
UpperAndLower VectorSignedMultiply(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorSignedSaturatedAbs(size_t esize, const U128& a);
|
||||
U128 VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorSignedSaturatedDoublingMultiplyReturnHigh(size_t esize, const U128& a, const U128& b);
|
||||
UpperAndLower VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b);
|
||||
U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
|
||||
U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
|
||||
U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
|
||||
|
||||
@@ -361,8 +361,6 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
|
||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned16:
|
||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned32:
|
||||
case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
|
||||
case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh16:
|
||||
case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh32:
|
||||
case Opcode::VectorSignedSaturatedNeg8:
|
||||
case Opcode::VectorSignedSaturatedNeg16:
|
||||
case Opcode::VectorSignedSaturatedNeg32:
|
||||
|
||||
@@ -408,8 +408,8 @@ OPCODE(VectorSignedSaturatedAccumulateUnsigned8, U128, U128,
|
||||
OPCODE(VectorSignedSaturatedAccumulateUnsigned16, U128, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedAccumulateUnsigned32, U128, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedAccumulateUnsigned64, U128, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedDoublingMultiplyReturnHigh16, U128, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedDoublingMultiplyReturnHigh32, U128, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedDoublingMultiply16, Void, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedDoublingMultiply32, Void, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToSigned16, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToSigned32, U128, U128 )
|
||||
OPCODE(VectorSignedSaturatedNarrowToSigned64, U128, U128 )
|
||||
|
||||
Reference in New Issue
Block a user