Ticket #17635: fflas-ffpack-simd32bits.patch

File fflas-ffpack-simd32bits.patch, 2.1 KB (added by cpernet, 4 years ago)

Patch for the 32 bits AVX bug

  • fflas-ffpack/fflas/fflas_simd/simd128_int64.inl

    diff --git a/fflas-ffpack/fflas/fflas_simd/simd128_int64.inl b/fflas-ffpack/fflas/fflas_simd/simd128_int64.inl
    index d154c3b..a519389 100644
    a b template <> struct Simd128_impl<true, true, true, 8> { 
    218218     */
    219219    static INLINE CONST vect_t mul(const vect_t a, const vect_t b) { return mullo(a, b); }
    220220
     221#ifdef __X86_64__
    221222    static INLINE CONST vect_t mulhi(const vect_t a, const vect_t b) {
    222223// #pragma warning "The simd mulhi function is emulate, it may impact the performances."
    223 #ifdef __X86_64__
    224224        Converter c0, c1;
    225225        c0.v = a;
    226226        c1.v = b;
    227227        return set((scalar_t)((int128_t(c0.t[0]) * c1.t[0]) >> 64), (scalar_t)((int128_t(c0.t[1]) * c1.t[1]) >> 64));
     228    }
    228229#else
    229         return zero();
     230        #warning "The SIMD mulhi function is disabled on 32 bit architectures"
    230231#endif
    231     }
    232232
    233233    static INLINE CONST vect_t fmadd(const vect_t c, const vect_t a, const vect_t b) { return add(c, mul(a, b)); }
    234234
  • fflas-ffpack/fflas/fflas_simd/simd256_int64.inl

    diff --git a/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl b/fflas-ffpack/fflas/fflas_simd/simd256_int64.inl
    index 6f5b829..1d3d8ee 100644
    a b template <> struct Simd256_impl<true, true, true, 8> { 
    237237     [b0, b1, b2, b3]                                                                    int64_t
    238238     * Return :
    239239     */
     240#ifdef __X86_64__
    240241    static INLINE CONST vect_t mulhi(vect_t a, vect_t b) {
    241242        // ugly solution, but it works.
    242243        // tested with gcc, clang, icc
    template <> struct Simd256_impl<true, true, true, 8> { 
    246247        return set((int128_t(ca.t[0]) * cb.t[0]) >> 64, (int128_t(ca.t[1]) * cb.t[1]) >> 64,
    247248                   (int128_t(ca.t[2]) * cb.t[2]) >> 64, (int128_t(ca.t[3]) * cb.t[3]) >> 64);
    248249    }
     250#else
     251        #warning "The SIMD mulhi function is disabled on 32 bit architectures"
     252#endif
     253
    249254
    250255    /*
    251256     * Multiply packed 64-bit integers in a and b, producing intermediate 128-bit integers, and add the low 64-bits of