summaryrefslogtreecommitdiff
path: root/multimedia/x265/files/patch-arm-assembly
blob: e481269c43f0ae6caa58c08638d6711dbafe43f8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
--- source/common/arm/blockcopy8.S	2024-09-30 08:38:43.172350000 +0200
+++ source/common/arm/blockcopy8.S	2024-09-30 09:13:08.635457000 +0200
@@ -833,5 +833,5 @@
     vmov.u32        r0, d0[0]
     uasx            r0, r0, r0
-    mov             r0, r0, lsr 16
+    lsr             r0, r0, #16
     rsb             r0, #1024
     bx              lr
--- source/common/arm/asm.S	2024-04-04 11:39:50.000000000 +0200
+++ source/common/arm/asm.S	2024-09-30 19:45:33.295896000 +0200
@@ -85,4 +85,9 @@
 .endm
 
+.macro MYADRLSUB reg:req, label:req
+sub \reg, pc, #((. - \label) & 0xff00)
+sub \reg, \reg, #((. - \label) - ((. - \label) & 0xff00)) + 4
+.endm
+
 .macro movrel rd, val
 #if HAVE_ARMV6T2 && !defined(PIC)
@@ -90,5 +95,5 @@
         movt            \rd, #:upper16:\val
 #else
-        ldr             \rd, =\val
+        MYADRLSUB       \rd, \val
 #endif
 .endm
--- source/common/arm/ipfilter8.S	2024-04-04 11:39:50.000000000 +0200
+++ source/common/arm/ipfilter8.S	2024-09-30 19:48:31.490019000 +0200
@@ -26,5 +26,5 @@
 #include "asm.S"
 
-.section .rodata
+.text
 .align 4
 
@@ -43,7 +43,4 @@
 .word -2, -2, 16, 16, 54, 54, -4 ,-4
 .word -2, -2, 10, 10, 58, 58, -2, -2
-
-
-.text
 
 // filterPixelToShort(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride)
--- source/common/arm/sad-a.S	2024-04-04 11:39:50.000000000 +0200
+++ source/common/arm/sad-a.S	2024-09-30 19:49:06.534263000 +0200
@@ -26,11 +26,9 @@
 #include "asm.S"
 
-.section .rodata
+.text
 
 .align 4
 sad12_mask:
 .byte 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0
-
-.text
 
 /* sad4x4(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
--- source/test/checkasm-arm.S	2024-04-04 11:39:50.000000000 +0200
+++ source/test/checkasm-arm.S	2024-10-01 02:51:29.815273000 +0200
@@ -27,5 +27,5 @@
 #include "../common/arm/asm.S"
 
-.section .rodata
+.text
 .align 4
 register_init:
@@ -41,6 +41,4 @@
 error_message:
 .asciz "failed to preserve register"
-
-.text
 
 @ max number of args used by any x265 asm function.
--- source/common/cpu.cpp	2024-04-04 11:39:50.000000000 +0200
+++ source/common/cpu.cpp	2024-10-01 02:56:32.094316000 +0200
@@ -339,6 +339,6 @@
 
 extern "C" {
-void PFX(cpu_neon_test)(void);
-int PFX(cpu_fast_neon_mrc_test)(void);
+void x265_cpu_neon_test(void);
+int x265_cpu_fast_neon_mrc_test(void);
 }
 
@@ -361,5 +361,5 @@
 
     canjump = 1;
-    PFX(cpu_neon_test)();
+    x265_cpu_neon_test();
     canjump = 0;
     signal(SIGILL, oldsig);
@@ -377,5 +377,5 @@
     // right now Apple does not seem to support performance counters for this test
 #ifndef __MACH__
-    flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
+    flags |= x265_cpu_fast_neon_mrc_test() ? X265_CPU_FAST_NEON_MRC : 0;
 #endif
     // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
--- source/test/testharness.h	2024-04-04 11:39:50.000000000 +0200
+++ source/test/testharness.h	2024-10-01 03:05:54.786008000 +0200
@@ -83,9 +83,12 @@
     asm volatile("rdtsc" : "=a" (a) ::"edx");
 #elif X265_ARCH_ARM
+    struct timeval tv;
+    gettimeofday(&tv, nullptr);
+    return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
     // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
     // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
 
     // TO-DO: replace clock() function with appropriate ARM cpu instructions
-    a = clock();
+    // a = clock();
 #elif  X265_ARCH_ARM64
     asm volatile("mrs %0, cntvct_el0" : "=r"(a));