emit_x64_vector: Changes to VectorSignedSaturatedDoublingMultiply

* Return both the upper and lower parts of the multiply if required * SSE2 does not support the pmuldq instruction, do sign correction to an unsigned result instead * Improve port utilisation where possible (punpck instructions were a bottleneck)
2026-03-11 06:16:28 +00:00 · 2018-09-15 09:04:19 +01:00
parent 08c0e017a5
commit 06b31448aa
8 changed files with 233 additions and 70 deletions
--- a/src/frontend/A64/translate/impl/simd_three_same.cpp
+++ b/src/frontend/A64/translate/impl/simd_three_same.cpp
@@ -432,7 +432,7 @@ bool TranslatorVisitor::SQDMULH_vec_2(bool Q, Imm<2> size, Vec Vm, Vec Vn, Vec V

    const IR::U128 operand1 = V(datasize, Vn);
    const IR::U128 operand2 = V(datasize, Vm);
-    const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyReturnHigh(esize, operand1, operand2);
+    const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, operand2).upper;

    V(datasize, Vd, result);
    return true;
--- a/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp
+++ b/src/frontend/A64/translate/impl/simd_vector_x_indexed_element.cpp
@@ -233,7 +233,7 @@ bool TranslatorVisitor::SQDMULH_elt_2(bool Q, Imm<2> size, Imm<1> L, Imm<1> M, I
    const IR::U128 operand1 = V(datasize, Vn);
    const IR::U128 operand2 = V(idxsize, concatenate(Vmhi, Vmlo).ZeroExtend<Vec>());
    const IR::U128 index_vector = ir.VectorBroadcast(esize, ir.VectorGetElement(esize, operand2, index));
-    const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiplyReturnHigh(esize, operand1, index_vector);
+    const IR::U128 result = ir.VectorSignedSaturatedDoublingMultiply(esize, operand1, index_vector).upper;

    V(datasize, Vd, result);
    return true;
--- a/src/frontend/ir/ir_emitter.cpp
+++ b/src/frontend/ir/ir_emitter.cpp
@@ -1575,15 +1575,23 @@ U128 IREmitter::VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128
    return {};
 }

-U128 IREmitter::VectorSignedSaturatedDoublingMultiplyReturnHigh(size_t esize, const U128& a, const U128& b) {
-    switch (esize) {
-    case 16:
-        return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh16, a, b);
-    case 32:
-        return Inst<U128>(Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh32, a, b);
-    }
-    UNREACHABLE();
-    return {};
+UpperAndLower IREmitter::VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b) {
+    const Value multiply = [&] {
+        switch (esize) {
+        case 16:
+            return Inst(Opcode::VectorSignedSaturatedDoublingMultiply16, a, b);
+        case 32:
+            return Inst(Opcode::VectorSignedSaturatedDoublingMultiply32, a, b);
+        default:
+            UNREACHABLE();
+            return Value{};
+        }
+    }();
+
+    return {
+        Inst<U128>(Opcode::GetUpperFromOp, multiply),
+        Inst<U128>(Opcode::GetLowerFromOp, multiply),
+    };
 }

 U128 IREmitter::VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a) {
--- a/src/frontend/ir/ir_emitter.h
+++ b/src/frontend/ir/ir_emitter.h
@@ -273,7 +273,7 @@ public:
    UpperAndLower VectorSignedMultiply(size_t esize, const U128& a, const U128& b);
    U128 VectorSignedSaturatedAbs(size_t esize, const U128& a);
    U128 VectorSignedSaturatedAccumulateUnsigned(size_t esize, const U128& a, const U128& b);
-    U128 VectorSignedSaturatedDoublingMultiplyReturnHigh(size_t esize, const U128& a, const U128& b);
+    UpperAndLower VectorSignedSaturatedDoublingMultiply(size_t esize, const U128& a, const U128& b);
    U128 VectorSignedSaturatedNarrowToSigned(size_t original_esize, const U128& a);
    U128 VectorSignedSaturatedNarrowToUnsigned(size_t original_esize, const U128& a);
    U128 VectorSignedSaturatedNeg(size_t esize, const U128& a);
--- a/src/frontend/ir/microinstruction.cpp
+++ b/src/frontend/ir/microinstruction.cpp
@@ -361,8 +361,6 @@ bool Inst::WritesToFPSRCumulativeSaturationBit() const {
    case Opcode::VectorSignedSaturatedNarrowToUnsigned16:
    case Opcode::VectorSignedSaturatedNarrowToUnsigned32:
    case Opcode::VectorSignedSaturatedNarrowToUnsigned64:
-    case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh16:
-    case Opcode::VectorSignedSaturatedDoublingMultiplyReturnHigh32:
    case Opcode::VectorSignedSaturatedNeg8:
    case Opcode::VectorSignedSaturatedNeg16:
    case Opcode::VectorSignedSaturatedNeg32:
--- a/src/frontend/ir/opcodes.inc
+++ b/src/frontend/ir/opcodes.inc
@@ -408,8 +408,8 @@ OPCODE(VectorSignedSaturatedAccumulateUnsigned8,           U128,           U128,
 OPCODE(VectorSignedSaturatedAccumulateUnsigned16,          U128,           U128,           U128                                            )
 OPCODE(VectorSignedSaturatedAccumulateUnsigned32,          U128,           U128,           U128                                            )
 OPCODE(VectorSignedSaturatedAccumulateUnsigned64,          U128,           U128,           U128                                            )
-OPCODE(VectorSignedSaturatedDoublingMultiplyReturnHigh16,  U128,           U128,           U128                                            )
-OPCODE(VectorSignedSaturatedDoublingMultiplyReturnHigh32,  U128,           U128,           U128                                            )
+OPCODE(VectorSignedSaturatedDoublingMultiply16,            Void,           U128,           U128                                            )
+OPCODE(VectorSignedSaturatedDoublingMultiply32,            Void,           U128,           U128                                            )
 OPCODE(VectorSignedSaturatedNarrowToSigned16,              U128,           U128                                                            )
 OPCODE(VectorSignedSaturatedNarrowToSigned32,              U128,           U128                                                            )
 OPCODE(VectorSignedSaturatedNarrowToSigned64,              U128,           U128                                                            )