summaryrefslogtreecommitdiff
path: root/graphics/waifu2x-converter-cpp/files/patch-no-sse3
diff options
context:
space:
mode:
Diffstat (limited to 'graphics/waifu2x-converter-cpp/files/patch-no-sse3')
-rw-r--r--graphics/waifu2x-converter-cpp/files/patch-no-sse370
1 files changed, 70 insertions, 0 deletions
diff --git a/graphics/waifu2x-converter-cpp/files/patch-no-sse3 b/graphics/waifu2x-converter-cpp/files/patch-no-sse3
new file mode 100644
index 000000000000..480665615779
--- /dev/null
+++ b/graphics/waifu2x-converter-cpp/files/patch-no-sse3
@@ -0,0 +1,70 @@
+https://github.com/tanakamura/waifu2x-converter-cpp/pull/5
+
+diff --git src/Env.cpp src/Env.cpp
+index 76c1859..9ad0a3c 100644
+--- src/Env.cpp
++++ src/Env.cpp
+@@ -28,6 +28,10 @@ ComputeEnv::ComputeEnv()
+ #endif
+ this->flags = 0;
+
++ if (ecx & (1<<0)) {
++ this->flags |= ComputeEnv::HAVE_CPU_SSE3;
++ }
++
+ if ((ecx & 0x18000000) == 0x18000000) {
+ this->flags |= ComputeEnv::HAVE_CPU_AVX;
+ }
+diff --git src/Env.hpp src/Env.hpp
+index 8087431..0c2e22a 100644
+--- src/Env.hpp
++++ src/Env.hpp
+@@ -19,6 +19,7 @@ struct ComputeEnv {
+
+ static const int HAVE_CPU_FMA = 1<<0;
+ static const int HAVE_CPU_AVX = 1<<1;
++ static const int HAVE_CPU_SSE3 = 1<<2;
+
+ int flags;
+
+diff --git src/modelHandler.cpp src/modelHandler.cpp
+index 2f72442..1e74ba0 100644
+--- src/modelHandler.cpp
++++ src/modelHandler.cpp
+@@ -100,6 +100,7 @@ bool Model::filter_AVX_OpenCL(ComputeEnv *env,
+
+ bool have_fma = env->flags & ComputeEnv::HAVE_CPU_FMA;
+ bool have_avx = env->flags & ComputeEnv::HAVE_CPU_AVX;
++ bool have_sse3 = env->flags & ComputeEnv::HAVE_CPU_SSE3;
+
+ bool gpu = (rt == RUN_OPENCL) || (rt == RUN_CUDA);
+
+@@ -317,10 +318,12 @@ bool Model::filter_AVX_OpenCL(ComputeEnv *env,
+ filter_AVX_impl(env, packed_input, packed_output,
+ nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat,
+ size.width, size.height, nJob);
+- } else {
++ } else if (have_sse3) {
+ filter_SSE_impl(env, packed_input, packed_output,
+ nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat,
+ size.width, size.height, nJob);
++ } else {
++ filter_CV(env, packed_input_buf, packed_output_buf, size);
+ }
+ }
+
+@@ -379,11 +382,12 @@ bool Model::filter_AVX_OpenCL(ComputeEnv *env,
+ const float *packed_input = (float*)packed_input_buf->get_read_ptr_host(env, in_size);
+ float *packed_output = (float*)packed_output_buf->get_write_ptr_host(env);
+
+- if (!have_avx) {
++ if (!have_sse3) {
++ filter_CV(env, packed_input_buf, packed_output_buf, size);
++ } else if (!have_avx) {
+ filter_SSE_impl(env, packed_input, packed_output,
+ nInputPlanes, nOutputPlanes, fbiases_flat, weight_flat,
+ size.width, size.height, nJob);
+- //filter_CV(env, packed_input_buf, packed_output_buf, size);
+ } else {
+ if (have_fma) {
+ filter_FMA_impl(env, packed_input, packed_output,