mips64: time64 for n32 ABI breaks a lot of tests, disable it for now
[uclibc-ng.git] / libc / string / arc / memcmp.S
blob20122a2967ebefb65fc530b9a6de9df0900488f1
1 /*
2  * Copyright (C) 2013, 2022 Synopsys, Inc. (www.synopsys.com)
3  * Copyright (C) 2007 ARC International (UK) LTD
4  *
5  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
6  */
8 #include <sysdep.h>
9 #include <features.h>
11 #ifdef __LITTLE_ENDIAN__
12 #define WORD2 r2
13 #define SHIFT r3
14 #else /* BIG ENDIAN */
15 #define WORD2 r3
16 #define SHIFT r2
17 #endif
19 ENTRY(memcmp)
21 #if defined(__ARC700__) || defined(__ARCHS__)
22         or      r12,r0,r1
23         asl_s   r12,r12,30
24         sub     r3,r2,1
25         brls    r2,r12,.Lbytewise
26         ld      r4,[r0,0]
27         ld      r5,[r1,0]
28         lsr.f   lp_count,r3,3
29 #ifdef __HS__
30         /* In ARCv2 a branch can't be the last instruction in a zero overhead
31          * loop.
32          * So we move the branch to the start of the loop, duplicate it
33          * after the end, and set up r12 so that the branch isn't taken
34          *  initially.
35          */
36         mov_s   r12,WORD2
37         lpne    .Loop_end
38         brne    WORD2,r12,.Lodd
39         ld      WORD2,[r0,4]
40 #else
41         lpne    .Loop_end
42         ld_s    WORD2,[r0,4]
43 #endif
44         ld_s    r12,[r1,4]
45         brne    r4,r5,.Leven
46         ld.a    r4,[r0,8]
47         ld.a    r5,[r1,8]
48 #ifdef __HS__
49 .Loop_end:
50         brne    WORD2,r12,.Lodd
51 #else
52         brne    WORD2,r12,.Lodd
53 .Loop_end:
54 #endif
55         asl_s   SHIFT,SHIFT,3
56         bhs_s   .Last_cmp
57         brne    r4,r5,.Leven
58         ld      r4,[r0,4]
59         ld      r5,[r1,4]
60 #ifdef __LITTLE_ENDIAN__
61         nop_s
62         ; one more load latency cycle
63 .Last_cmp:
64         xor     r0,r4,r5
65         bset    r0,r0,SHIFT
66         sub_s   r1,r0,1
67         bic_s   r1,r1,r0
68         norm    r1,r1
69         b.d     .Leven_cmp
70         and     r1,r1,24
71 .Leven:
72         xor     r0,r4,r5
73         sub_s   r1,r0,1
74         bic_s   r1,r1,r0
75         norm    r1,r1
76         ; slow track insn
77         and     r1,r1,24
78 .Leven_cmp:
79         asl     r2,r4,r1
80         asl     r12,r5,r1
81         lsr_s   r2,r2,1
82         lsr_s   r12,r12,1
83         j_s.d   [blink]
84         sub     r0,r2,r12
85         .balign 4
86 .Lodd:
87         xor     r0,WORD2,r12
88         sub_s   r1,r0,1
89         bic_s   r1,r1,r0
90         norm    r1,r1
91         ; slow track insn
92         and     r1,r1,24
93         asl_s   r2,r2,r1
94         asl_s   r12,r12,r1
95         lsr_s   r2,r2,1
96         lsr_s   r12,r12,1
97         j_s.d   [blink]
98         sub     r0,r2,r12
99 #else /* BIG ENDIAN */
100 .Last_cmp:
101         neg_s   SHIFT,SHIFT
102         lsr     r4,r4,SHIFT
103         lsr     r5,r5,SHIFT
104         ; slow track insn
105 .Leven:
106         sub.f   r0,r4,r5
107         mov.ne  r0,1
108         j_s.d   [blink]
109         bset.cs r0,r0,31
110 .Lodd:
111         cmp_s   WORD2,r12
112         mov_s   r0,1
113         j_s.d   [blink]
114         bset.cs r0,r0,31
115 #endif /* ENDIAN */
116         .balign 4
117 .Lbytewise:
118         breq    r2,0,.Lnil
119         ldb     r4,[r0,0]
120         ldb     r5,[r1,0]
121         lsr.f   lp_count,r3
122 #ifdef __HS__
123         mov     r12,r3
124         lpne    .Lbyte_end
125         brne    r3,r12,.Lbyte_odd
126 #else
127         lpne    .Lbyte_end
128 #endif
129         ldb_s   r3,[r0,1]
130         ldb     r12,[r1,1]
131         brne    r4,r5,.Lbyte_even
132         ldb.a   r4,[r0,2]
133         ldb.a   r5,[r1,2]
134 #ifdef __HS__
135 .Lbyte_end:
136         brne    r3,r12,.Lbyte_odd
137 #else
138         brne    r3,r12,.Lbyte_odd
139 .Lbyte_end:
140 #endif
141         bcc     .Lbyte_even
142         brne    r4,r5,.Lbyte_even
143         ldb_s   r3,[r0,1]
144         ldb_s   r12,[r1,1]
145 .Lbyte_odd:
146         j_s.d   [blink]
147         sub     r0,r3,r12
148 .Lbyte_even:
149         j_s.d   [blink]
150         sub     r0,r4,r5
151 .Lnil:
152         j_s.d   [blink]
153         mov     r0,0
155 #elif (__ARC64_ARCH32__)
156         ;; Based on Synopsys code from newlib's arc64/memcmp.S
157         cmp             r2, 32
158         bls.d   @.L_compare_1_bytes
159         mov             r3, r0  ; "r0" will be used as return value
161         lsr             r12, r2, 4      ; counter for 16-byte chunks
162         xor             r13, r13, r13   ; the mask showing inequal registers
164 .L_compare_16_bytes:
165         ld.ab   r4, [r3, +4]
166         ld.ab   r5, [r1, +4]
167         ld.ab   r6, [r3, +4]
168         ld.ab   r7, [r1, +4]
169         ld.ab   r8, [r3, +4]
170         ld.ab   r9, [r1, +4]
171         ld.ab   r10, [r3, +4]
172         ld.ab   r11, [r1, +4]
173         xor.f   0, r4, r5
174         xor.ne  r13, r13, 0b0001
175         xor.f   0, r6, r7
176         xor.ne  r13, r13, 0b0010
177         xor.f   0, r8, r9
178         xor.ne  r13, r13, 0b0100
179         xor.f   0, r10, r11
180         xor.ne  r13, r13, 0b1000
181         brne    r13, 0, @.L_unequal_find
182         dbnz    r12, @.L_compare_16_bytes
184         ;; Adjusting the pointers because of the extra loads in the end
185         sub             r1, r1, 4
186         sub             r3, r3, 4
187         bmsk_s    r2, r2, 3     ; any remaining bytes to compare
189 .L_compare_1_bytes:
190         cmp             r2, 0
191         jeq.d   [blink]
192         xor_s   r0, r0, r0
195         ldb.ab  r4, [r3, +1]
196         ldb.ab  r5, [r1, +1]
197         sub.f   r0, r4, r5
198         jne             [blink]
199         dbnz    r2, @2b
200         j_s             [blink]
202         ;; At this point, we want to find the _first_ comparison that marked the
203         ;; inequality of "lhs" and "rhs"
204 .L_unequal_find:
205         ffs             r13, r13
206         asl             r13, r13, 2
207         bi              [r13]
208 .L_unequal_r4r5:
209         mov             r1, r4
210         b.d             @.L_diff_byte_in_regs
211         mov             r2, r5
212         nop
213 .L_unequal_r6r7:
214         mov             r1, r6
215         b.d             @.L_diff_byte_in_regs
216         mov             r2, r7
217         nop
218 .L_unequal_r8r9:
219         mov             r1, r8
220         b.d             @.L_diff_byte_in_regs
221         mov             r2, r9
222         nop
223 .L_unequal_r10r11:
224         mov             r1, r10
225         mov             r2, r11
227         ;; fall-through
228         ;; If we're here, that means the two operands are not equal.
229 .L_diff_byte_in_regs:
230         xor             r0, r1, r2
231         ffs             r0, r0
232         and             r0, r0, 0x18
233         lsr             r1, r1, r0
234         lsr             r2, r2, r0
235         bmsk_s  r1, r1, 7
236         bmsk_s  r2, r2, 7
237         j_s.d   [blink]
238         sub             r0, r1, r2
240 #else
241 #error "Unsupported ARC CPU type"
242 #endif
244 END(memcmp)
245 libc_hidden_def(memcmp)
247 #ifdef __UCLIBC_SUSV3_LEGACY__
248 strong_alias(memcmp,bcmp)
249 #endif