1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
--- filter/yuvdenoise/motion.c.orig 2016-09-18 16:54:49 UTC
+++ filter/yuvdenoise/motion.c
@@ -350,32 +350,32 @@ calc_SAD_half_mmx (uint8_t * ref, uint8_
__asm__ __volatile__
(
- " pxor %%mm0 , %%mm0; /* clear mm0 */"
- " pcmpeqw %%mm6 , %%mm6; /* Build 7f7f7f7f7f7f7f in a register */"
- " psrlw $9 , %%mm6; /* */"
- " packuswb %%mm6 , %%mm6; /* */"
- " ; /* */"
- " .rept 8 ; /* */"
- " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */"
- " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */"
- " movq (%%eax), %%mm3; /* reference to mm3 */"
- " psrlq $1 , %%mm1; /* average source pixels */"
- " psrlq $1 , %%mm2; /* shift right by one (divide by two) */"
- " pand %%mm6 , %%mm1; /* kill downshifted bits */"
- " pand %%mm6 , %%mm2; /* kill downshifted bits */"
- " paddusw %%mm2 , %%mm1; /* add up ... */"
-
- " movq %%mm3 , %%mm4; /* copy reference to mm4 */"
- " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */"
- " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */"
- " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */"
- " paddusw %%mm1 , %%mm0; /* add result to mm0 */"
- " addl %%ecx , %%esi; /* add framewidth to frameaddress */"
- " addl %%ecx , %%edi; /* add framewidth to frameaddress */"
- " addl %%ecx , %%ecx; /* add framewidth to frameaddress */"
- " .endr ; /* */"
- " /* */"
- " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */"
+ " pxor %%mm0 , %%mm0; /* clear mm0 */\n"
+ " pcmpeqw %%mm6 , %%mm6; /* Build 7f7f7f7f7f7f7f in a register */\n"
+ " psrlw $9 , %%mm6; /* */\n"
+ " packuswb %%mm6 , %%mm6; /* */\n"
+ " ; /* */\n"
+ " .rept 8 ; /* */\n"
+ " movq (%%esi), %%mm1; /* 8 Pixels from filtered frame to mm1 */\n"
+ " movq (%%edi), %%mm2; /* 8 Pixels from filtered frame to mm2 (displaced) */\n"
+ " movq (%%eax), %%mm3; /* reference to mm3 */\n"
+ " psrlq $1 , %%mm1; /* average source pixels */\n"
+ " psrlq $1 , %%mm2; /* shift right by one (divide by two) */\n"
+ " pand %%mm6 , %%mm1; /* kill downshifted bits */\n"
+ " pand %%mm6 , %%mm2; /* kill downshifted bits */\n"
+ " paddusw %%mm2 , %%mm1; /* add up ... */\n"
+ " /* */\n"
+ " movq %%mm3 , %%mm4; /* copy reference to mm4 */\n"
+ " psubusb %%mm1 , %%mm3; /* positive differences between mm2 and mm1 */\n"
+ " psubusb %%mm4 , %%mm1; /* positive differences between mm1 and mm3 */\n"
+ " paddusb %%mm3 , %%mm1; /* mm1 now contains abs(mm1-mm2) */\n"
+ " paddusw %%mm1 , %%mm0; /* add result to mm0 */\n"
+ " addl %%ecx , %%esi; /* add framewidth to frameaddress */\n"
+ " addl %%ecx , %%edi; /* add framewidth to frameaddress */\n"
+ " addl %%ecx , %%ecx; /* add framewidth to frameaddress */\n"
+ " .endr ; /* */\n"
+ " /* */\n"
+ " movq %%mm0 , %0 ; /* make mm0 available to gcc ... */\n"
:"=g" (a)
:"S" (frm1),"D" (frm2), "a" (ref), "c" (denoiser.frame.w)
);
|