1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
--- source/common/arm/blockcopy8.S 2024-09-30 08:38:43.172350000 +0200
+++ source/common/arm/blockcopy8.S 2024-09-30 09:13:08.635457000 +0200
@@ -833,5 +833,5 @@
vmov.u32 r0, d0[0]
uasx r0, r0, r0
- mov r0, r0, lsr 16
+ lsr r0, r0, #16
rsb r0, #1024
bx lr
--- source/common/arm/asm.S 2024-04-04 11:39:50.000000000 +0200
+++ source/common/arm/asm.S 2024-09-30 19:45:33.295896000 +0200
@@ -85,4 +85,9 @@
.endm
+.macro MYADRLSUB reg:req, label:req
+sub \reg, pc, #((. - \label) & 0xff00)
+sub \reg, \reg, #((. - \label) - ((. - \label) & 0xff00)) + 4
+.endm
+
.macro movrel rd, val
#if HAVE_ARMV6T2 && !defined(PIC)
@@ -90,5 +95,5 @@
movt \rd, #:upper16:\val
#else
- ldr \rd, =\val
+ MYADRLSUB \rd, \val
#endif
.endm
--- source/common/arm/ipfilter8.S 2024-04-04 11:39:50.000000000 +0200
+++ source/common/arm/ipfilter8.S 2024-09-30 19:48:31.490019000 +0200
@@ -26,5 +26,5 @@
#include "asm.S"
-.section .rodata
+.text
.align 4
@@ -43,7 +43,4 @@
.word -2, -2, 16, 16, 54, 54, -4 ,-4
.word -2, -2, 10, 10, 58, 58, -2, -2
-
-
-.text
// filterPixelToShort(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride)
--- source/common/arm/sad-a.S 2024-04-04 11:39:50.000000000 +0200
+++ source/common/arm/sad-a.S 2024-09-30 19:49:06.534263000 +0200
@@ -26,11 +26,9 @@
#include "asm.S"
-.section .rodata
+.text
.align 4
sad12_mask:
.byte 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0
-
-.text
/* sad4x4(pixel* dst, intptr_t dstStride, const pixel* src, intptr_t srcStride)
--- source/test/checkasm-arm.S 2024-04-04 11:39:50.000000000 +0200
+++ source/test/checkasm-arm.S 2024-10-01 02:51:29.815273000 +0200
@@ -27,5 +27,5 @@
#include "../common/arm/asm.S"
-.section .rodata
+.text
.align 4
register_init:
@@ -41,6 +41,4 @@
error_message:
.asciz "failed to preserve register"
-
-.text
@ max number of args used by any x265 asm function.
--- source/common/cpu.cpp 2024-04-04 11:39:50.000000000 +0200
+++ source/common/cpu.cpp 2024-10-01 02:56:32.094316000 +0200
@@ -339,6 +339,6 @@
extern "C" {
-void PFX(cpu_neon_test)(void);
-int PFX(cpu_fast_neon_mrc_test)(void);
+void x265_cpu_neon_test(void);
+int x265_cpu_fast_neon_mrc_test(void);
}
@@ -361,5 +361,5 @@
canjump = 1;
- PFX(cpu_neon_test)();
+ x265_cpu_neon_test();
canjump = 0;
signal(SIGILL, oldsig);
@@ -377,5 +377,5 @@
// right now Apple does not seem to support performance counters for this test
#ifndef __MACH__
- flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
+ flags |= x265_cpu_fast_neon_mrc_test() ? X265_CPU_FAST_NEON_MRC : 0;
#endif
// TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
--- source/test/testharness.h 2024-04-04 11:39:50.000000000 +0200
+++ source/test/testharness.h 2024-10-01 03:05:54.786008000 +0200
@@ -83,9 +83,12 @@
asm volatile("rdtsc" : "=a" (a) ::"edx");
#elif X265_ARCH_ARM
+ struct timeval tv;
+ gettimeofday(&tv, nullptr);
+ return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
// TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
// asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
// TO-DO: replace clock() function with appropriate ARM cpu instructions
- a = clock();
+ // a = clock();
#elif X265_ARCH_ARM64
asm volatile("mrs %0, cntvct_el0" : "=r"(a));
|