diff options
Diffstat (limited to 'astro/boinc-setiathome-v7/files/patch-client_vector_analyzeFuncs__sse.cpp')
-rw-r--r-- | astro/boinc-setiathome-v7/files/patch-client_vector_analyzeFuncs__sse.cpp | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/astro/boinc-setiathome-v7/files/patch-client_vector_analyzeFuncs__sse.cpp b/astro/boinc-setiathome-v7/files/patch-client_vector_analyzeFuncs__sse.cpp new file mode 100644 index 000000000000..65ac60317069 --- /dev/null +++ b/astro/boinc-setiathome-v7/files/patch-client_vector_analyzeFuncs__sse.cpp @@ -0,0 +1,35 @@ +--- client/vector/analyzeFuncs_sse.cpp.orig 2013-06-26 16:56:49 UTC ++++ client/vector/analyzeFuncs_sse.cpp +@@ -191,7 +191,7 @@ inline void v_vsubTranspose4(float *in, + prefetcht0(out+3*yline); + // TODO: figure out why the intrinsic version crashes for MinGW build + // not critical, but shuffle-only _MM_TRANSPOSE4_PS is optimal on some +-#if defined(USE_INTRINSICS) && defined(_MM_TRANSPOSE4_PS) && !defined(__GNUC__) ++#if defined(USE_INTRINSICS) && (defined(_MM_TRANSPOSE4_PS) && !defined(__GNUC__) || defined(__clang__)) + register float4 row0=*(__m128 *)in; + register float4 row1=*(__m128 *)(in+xline); + register float4 row2=*(__m128 *)(in+2*xline); +@@ -273,7 +273,7 @@ inline void v_vsubTranspose4np(float *in + + // TODO: figure out why the intrinsic version crashes for MinGW build + // not critical, but the shuffle-only _MM_TRANSPOSE4_PS is optimal on some +-#if defined(USE_INTRINSICS) && defined(_MM_TRANSPOSE4_PS) && !defined(__GNUC__) ++#if defined(USE_INTRINSICS) && (defined(_MM_TRANSPOSE4_PS) && !defined(__GNUC__) || defined(__clang__)) + register float4 row0=*(__m128 *)in; + register float4 row1=*(__m128 *)(in+xline); + register float4 row2=*(__m128 *)(in+2*xline); +@@ -1592,8 +1592,14 @@ inline float s_maxp2f( __m128 max1 ) { + return ( tMax ); + } + ++#ifdef __clang__ ++#define s_getU( aaaa, ptr ) \ ++ aaaa = _mm_loadl_pi(aaaa, (__m64 *)ptr); \ ++ aaaa = _mm_loadh_pi(aaaa, ((__m64 *)(ptr))+1 ) ++#else + #define s_getU( aaaa, ptr ) \ + aaaa = _mm_loadh_pi( _mm_loadl_pi(aaaa, (__m64 *)ptr), ((__m64 *)(ptr))+1 ) ++#endif + + #define s_putU( ptr, aaaa ) \ + _mm_storel_pi((__m64 *)ptr, aaaa), _mm_storeh_pi( ((__m64 *)ptr)+1 , aaaa) |