diff options
Diffstat (limited to 'graphics/waifu2x-converter-cpp/files/patch-no-sse3')
-rw-r--r-- | graphics/waifu2x-converter-cpp/files/patch-no-sse3 | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/graphics/waifu2x-converter-cpp/files/patch-no-sse3 b/graphics/waifu2x-converter-cpp/files/patch-no-sse3 new file mode 100644 index 000000000000..480665615779 --- /dev/null +++ b/graphics/waifu2x-converter-cpp/files/patch-no-sse3 @@ -0,0 +1,70 @@ +https://github.com/tanakamura/waifu2x-converter-cpp/pull/5 + +diff --git src/Env.cpp src/Env.cpp +index 76c1859..9ad0a3c 100644 +--- src/Env.cpp ++++ src/Env.cpp +@@ -28,6 +28,10 @@ ComputeEnv::ComputeEnv() + #endif + this->flags = 0; + ++ if (ecx & (1<<0)) { ++ this->flags |= ComputeEnv::HAVE_CPU_SSE3; ++ } ++ + if ((ecx & 0x18000000) == 0x18000000) { + this->flags |= ComputeEnv::HAVE_CPU_AVX; + } +diff --git src/Env.hpp src/Env.hpp +index 8087431..0c2e22a 100644 +--- src/Env.hpp ++++ src/Env.hpp +@@ -19,6 +19,7 @@ struct ComputeEnv { + + static const int HAVE_CPU_FMA = 1<<0; + static const int HAVE_CPU_AVX = 1<<1; ++ static const int HAVE_CPU_SSE3 = 1<<2; + + int flags; + +diff --git src/modelHandler.cpp src/modelHandler.cpp +index 2f72442..1e74ba0 100644 +--- src/modelHandler.cpp ++++ src/modelHandler.cpp +@@ -100,6 +100,7 @@ bool Model::filter_AVX_OpenCL(ComputeEnv *env, + + bool have_fma = env->flags & ComputeEnv::HAVE_CPU_FMA; + bool have_avx = env->flags & ComputeEnv::HAVE_CPU_AVX; ++ bool have_sse3 = env->flags & ComputeEnv::HAVE_CPU_SSE3; + + bool gpu = (rt == RUN_OPENCL) || (rt == RUN_CUDA); + +@@ -317,10 +318,12 @@ bool Model::filter_AVX_OpenCL(ComputeEnv *env, + filter_AVX_impl(env, packed_input, packed_output, + nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, + size.width, size.height, nJob); +- } else { ++ } else if (have_sse3) { + filter_SSE_impl(env, packed_input, packed_output, + nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, + size.width, size.height, nJob); ++ } else { ++ filter_CV(env, packed_input_buf, packed_output_buf, size); + } + } + +@@ -379,11 +382,12 @@ bool Model::filter_AVX_OpenCL(ComputeEnv *env, + const float *packed_input = (float*)packed_input_buf->get_read_ptr_host(env, in_size); + float *packed_output = (float*)packed_output_buf->get_write_ptr_host(env); + +- if (!have_avx) { ++ if (!have_sse3) { ++ filter_CV(env, packed_input_buf, packed_output_buf, size); ++ } else if (!have_avx) { + filter_SSE_impl(env, packed_input, packed_output, + nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat, + size.width, size.height, nJob); +- //filter_CV(env, packed_input_buf, packed_output_buf, size); + } else { + if (have_fma) { + filter_FMA_impl(env, packed_input, packed_output, |