LoongArch: Update ulps
[glibc.git] / sysdeps / sparc / sparc64 / strncmp.S
blob2d17f2d120f06567fb0df1210abf59c29d6555f9
1 /* Compare no more than N characters of S1 and S2, returning less than,
2    equal to or greater than zero if S1 is lexicographically less than,
3    equal to or greater than S2.
4    For SPARC v9.
5    Copyright (C) 1997-2024 Free Software Foundation, Inc.
6    This file is part of the GNU C Library.
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, see
20    <https://www.gnu.org/licenses/>.  */
22 #include <sysdep.h>
23 #include <asm/asi.h>
24 #ifndef XCC
25 #define XCC xcc
26 #define USE_BPR
27         .register       %g2, #scratch
28         .register       %g3, #scratch
29         .register       %g6, #scratch
30 #endif
32         /* Normally, this uses
33            ((xword - 0x0101010101010101) & 0x8080808080808080) test
34            to find out if any byte in xword could be zero. This is fast, but
35            also gives false alarm for any byte in range 0x81-0xff. It does
36            not matter for correctness, as if this test tells us there could
37            be some zero byte, we check it byte by byte, but if bytes with
38            high bits set are common in the strings, then this will give poor
39            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
40            will use one tick slower, but more precise test
41            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
42            which does not give any false alarms (but if some bits are set,
43            one cannot assume from it which bytes are zero and which are not).
44            It is yet to be measured, what is the correct default for glibc
45            in these days for an average user.
46          */
48         .text
49         .align          32
50 ENTRY(strncmp)
51 #ifdef USE_BPR
52         brz,pn          %o2, 4f                         /* CTI+IEU1     Group           */
53 #else
54         tst             %o2                             /* IEU1         Group           */
55         be,pn           %XCC, 4f                        /* CTI                          */
56 #endif
57          sethi          %hi(0x1010101), %g1             /* IEU0                         */
58         andcc           %o0, 7, %g0                     /* IEU1         Group           */
59         bne,pn          %icc, 9f                        /* CTI                          */
61          or             %g1, %lo(0x1010101), %g1        /* IEU0                         */
62         andcc           %o1, 7, %g3                     /* IEU1         Group           */
63         bne,pn          %icc, 11f                       /* CTI                          */
64          sllx           %g1, 32, %g2                    /* IEU0                         */
66         ldx             [%o0], %g4                      /* Load         Group           */
67         or              %g1, %g2, %g1                   /* IEU0                         */
68 1:      ldx             [%o1], %o3                      /* Load         Group           */
69         sllx            %g1, 7, %g2                     /* IEU0                         */
71         add             %o0, 8, %o0                     /* IEU1                         */
72 2:      subcc           %o2, 8, %o2                     /* IEU1         Group           */
73         bcs,pn          %XCC, 5f                        /* CTI                          */
74          add            %o1, 8, %o1                     /* IEU0                         */
76         sub             %g4, %g1, %g3                   /* IEU0         Group           */
77         subcc           %g4, %o3, %o4                   /* IEU1                         */
78 #ifdef EIGHTBIT_NOT_RARE
79         andn            %g3, %g4, %g6                   /* IEU0         Group           */
80 #endif
81         bne,pn          %xcc, 6f                        /* CTI                          */
82          ldxa           [%o0] ASI_PNF, %g4              /* Load         Group           */
84         add             %o0, 8, %o0                     /* IEU0                         */
85 #ifdef EIGHTBIT_NOT_RARE
86         andcc           %g6, %g2, %g0                   /* IEU1                         */
87 #else
88         andcc           %g3, %g2, %g0                   /* IEU1                         */
89 #endif
90         be,a,pt         %xcc, 2b                        /* CTI                          */
91          ldxa           [%o1] ASI_PNF, %o3              /* Load         Group           */
93         addcc           %g3, %g1, %o4                   /* IEU1                         */
94 #ifdef EIGHTBIT_NOT_RARE
95         srlx            %g6, 32, %g6                    /* IEU0                         */
96         andcc           %g6, %g2, %g0                   /* IEU1         Group           */
97 #else
98         srlx            %g3, 32, %g3                    /* IEU0                         */
99         andcc           %g3, %g2, %g0                   /* IEU1         Group           */
100 #endif
101         be,pt           %xcc, 3f                        /* CTI                          */
103          srlx           %o4, 56, %o5                    /* IEU0                         */
104         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
105         be,pn           %icc, 4f                        /* CTI                          */
106          srlx           %o4, 48, %o5                    /* IEU0                         */
108         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
109         be,pn           %icc, 4f                        /* CTI                          */
110          srlx           %o4, 40, %o5                    /* IEU0                         */
111         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
113         be,pn           %icc, 4f                        /* CTI                          */
114          srlx           %o4, 32, %o5                    /* IEU0                         */
115         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
116         be,pn           %icc, 4f                        /* CTI                          */
118 3:       srlx           %o4, 24, %o5                    /* IEU0                         */
119         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
120         be,pn           %icc, 4f                        /* CTI                          */
121          srlx           %o4, 16, %o5                    /* IEU0                         */
123         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
124         be,pn           %icc, 4f                        /* CTI                          */
125          srlx           %o4, 8, %o5                     /* IEU0                         */
126         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
128         be,pn           %icc, 4f                        /* CTI                          */
129         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
130         bne,a,pn        %icc, 2b                        /* CTI                          */
131          ldxa           [%o1] ASI_PNF, %o3              /* Load                         */
133 4:      retl                                            /* CTI+IEU1     Group           */
134          clr            %o0                             /* IEU0                         */
136         .align          16
137 5:      srlx            %g4, 56, %o4                    /* IEU0         Group           */
138         cmp             %o2, -8                         /* IEU1                         */
139         be,pn           %XCC, 4b                        /* CTI                          */
140          srlx           %o3, 56, %o5                    /* IEU0         Group           */
142         andcc           %o4, 0xff, %g0                  /* IEU1                         */
143         be,pn           %xcc, 8f                        /* CTI                          */
144          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
145         bne,pn          %xcc, 8f                        /* CTI                          */
147          srlx           %o3, 48, %o5                    /* IEU0                         */
148         cmp             %o2, -7                         /* IEU1         Group           */
149         be,pn           %XCC, 4b                        /* CTI                          */
150          srlx           %g4, 48, %o4                    /* IEU0                         */
152         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
153         be,pn           %xcc, 8f                        /* CTI                          */
154          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
155         bne,pn          %xcc, 8f                        /* CTI                          */
157          srlx           %o3, 40, %o5                    /* IEU0                         */
158         cmp             %o2, -6                         /* IEU1         Group           */
159         be,pn           %XCC, 4b                        /* CTI                          */
160          srlx           %g4, 40, %o4                    /* IEU0                         */
162         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
163         be,pn           %xcc, 8f                        /* CTI                          */
164          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
165         bne,pn          %xcc, 8f                        /* CTI                          */
167          srlx           %o3, 32, %o5                    /* IEU0                         */
168         cmp             %o2, -5                         /* IEU1         Group           */
169         be,pn           %XCC, 4b                        /* CTI                          */
170          srlx           %g4, 32, %o4                    /* IEU0                         */
172         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
173         be,pn           %xcc, 8f                        /* CTI                          */
174          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
175         bne,pn          %xcc, 8f                        /* CTI                          */
177          srlx           %o3, 24, %o5                    /* IEU0                         */
178         cmp             %o2, -4                         /* IEU1         Group           */
179         be,pn           %XCC, 4b                        /* CTI                          */
180          srlx           %g4, 24, %o4                    /* IEU0                         */
182         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
183         be,pn           %xcc, 8f                        /* CTI                          */
184          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
185         bne,pn          %xcc, 8f                        /* CTI                          */
187          srlx           %o3, 16, %o5                    /* IEU0                         */
188         cmp             %o2, -3                         /* IEU1         Group           */
189         be,pn           %XCC, 4b                        /* CTI                          */
190          srlx           %g4, 16, %o4                    /* IEU0                         */
192         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
193         be,pn           %xcc, 8f                        /* CTI                          */
194          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
195         bne,pn          %xcc, 8f                        /* CTI                          */
197          srlx           %o3, 8, %o5                     /* IEU0                         */
198         cmp             %o2, -2                         /* IEU1         Group           */
199         be,pn           %XCC, 4b                        /* CTI                          */
200          srlx           %g4, 8, %o4                     /* IEU0                         */
202         retl                                            /* CTI+IEU1     Group           */
203          sub            %o4, %o5, %o0                   /* IEU0                         */
204 6:      addcc           %o3, %o4, %g4                   /* IEU1                         */
205 7:      srlx            %o3, 56, %o5                    /* IEU0                         */
207         srlx            %g4, 56, %o4                    /* IEU0         Group           */
208         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
209         be,pn           %xcc, 8f                        /* CTI                          */
210          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
212         bne,pn          %xcc, 8f                        /* CTI                          */
213          srlx           %o3, 48, %o5                    /* IEU0                         */
214         srlx            %g4, 48, %o4                    /* IEU0         Group           */
215         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
217         be,pn           %xcc, 8f                        /* CTI                          */
218          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
219         bne,pn          %xcc, 8f                        /* CTI                          */
220          srlx           %o3, 40, %o5                    /* IEU0                         */
222         srlx            %g4, 40, %o4                    /* IEU0         Group           */
223         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
224         be,pn           %xcc, 8f                        /* CTI                          */
225          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
227         bne,pn          %xcc, 8f                        /* CTI                          */
228          srlx           %o3, 32, %o5                    /* IEU0                         */
229         srlx            %g4, 32, %o4                    /* IEU0         Group           */
230         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
232         be,pn           %xcc, 8f                        /* CTI                          */
233          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
234         bne,pn          %xcc, 8f                        /* CTI                          */
235          srlx           %o3, 24, %o5                    /* IEU0                         */
237         srlx            %g4, 24, %o4                    /* IEU0         Group           */
238         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
239         be,pn           %xcc, 8f                        /* CTI                          */
240          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
242         bne,pn          %xcc, 8f                        /* CTI                          */
243          srlx           %o3, 16, %o5                    /* IEU0                         */
244         srlx            %g4, 16, %o4                    /* IEU0         Group           */
245         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
247         be,pn           %xcc, 8f                        /* CTI                          */
248          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
249         bne,pn          %xcc, 8f                        /* CTI                          */
250          srlx           %o3, 8, %o5                     /* IEU0                         */
252         srlx            %g4, 8, %o4                     /* IEU0         Group           */
253         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
254         be,pn           %xcc, 8f                        /* CTI                          */
255          subcc          %o4, %o5, %o4                   /* IEU1         Group           */
257         retl                                            /* CTI+IEU1     Group           */
258          sub            %g4, %o3, %o0                   /* IEU0                         */
259 8:      retl                                            /* CTI+IEU1     Group           */
260          mov            %o4, %o0                        /* IEU0                         */
262 9:      ldub            [%o0], %g4                      /* Load         Group           */
263         add             %o0, 1, %o0                     /* IEU0                         */
264         ldub            [%o1], %o3                      /* Load         Group           */
265         sllx            %g1, 32, %g2                    /* IEU0                         */
267 10:     subcc           %o2, 1, %o2                     /* IEU1         Group           */
268         be,pn           %XCC, 8b                        /* CTI                          */
269          sub            %g4, %o3, %o4                   /* IEU0                         */
270         add             %o1, 1, %o1                     /* IEU0         Group           */
272         cmp             %g4, %o3                        /* IEU1                         */
273         bne,pn          %xcc, 8b                        /* CTI                          */
274          lduba          [%o0] ASI_PNF, %g4              /* Load         Group           */
275         andcc           %o3, 0xff, %g0                  /* IEU1                         */
277         be,pn           %icc, 4b                        /* CTI                          */
278          lduba          [%o1] ASI_PNF, %o3              /* Load         Group           */
279         andcc           %o0, 7, %g0                     /* IEU1                         */
280         bne,a,pn        %icc, 10b                       /* CTI                          */
282          add            %o0, 1, %o0                     /* IEU0         Group           */
283         or              %g1, %g2, %g1                   /* IEU1                         */
284         andcc           %o1, 7, %g3                     /* IEU1         Group           */
285         be,pn           %icc, 1b                        /* CTI                          */
287          ldxa           [%o0] ASI_PNF, %g4              /* Load                         */
288 11:     sllx            %g3, 3, %g5                     /* IEU0         Group           */
289         mov             64, %g6                         /* IEU1                         */
290         or              %g1, %g2, %g1                   /* IEU0         Group           */
291         sub             %o1, %g3, %o1                   /* IEU1                         */
293         sub             %g6, %g5, %g6                   /* IEU0         Group           */
294         ldxa            [%o1] ASI_PNF, %o4              /* Load                         */
295         sllx            %g1, 7, %g2                     /* IEU1                         */
296         add             %o1, 8, %o1                     /* IEU0         Group           */
297                                                         /* %g1 = 0101010101010101
298                                                            %g2 = 8080808080808080
299                                                            %g3 = %o1 alignment
300                                                            %g5 = number of bits to shift left
301                                                            %g6 = number of bits to shift right */
303 12:     sllx            %o4, %g5, %o3                   /* IEU0         Group           */
304         ldxa            [%o1] ASI_PNF, %o4              /* Load                         */
305         add             %o1, 8, %o1                     /* IEU1                         */
306 13:     ldxa            [%o0] ASI_PNF, %g4              /* Load         Group           */
308         addcc           %o0, 8, %o0                     /* IEU1                         */
309         srlx            %o4, %g6, %o5                   /* IEU0                         */
310         subcc           %o2, 8, %o2                     /* IEU1         Group           */
311         bcs,pn          %XCC, 5b                        /* CTI                          */
313          or             %o3, %o5, %o3                   /* IEU0                         */
314         cmp             %g4, %o3                        /* IEU1         Group           */
315         bne,pn          %xcc, 7b                        /* CTI                          */
316          sub            %g4, %g1, %o5                   /* IEU0                         */
318 #ifdef EIGHTBIT_NOT_RARE
319         andn            %o5, %g4, %o5                   /* IEU0         Group           */
320 #endif
321         andcc           %o5, %g2, %g0                   /* IEU1         Group           */
322         be,pt           %xcc, 12b                       /* CTI                          */
323          srlx           %o5, 32, %o5                    /* IEU0                         */
324         andcc           %o5, %g2, %g0                   /* IEU1         Group           */
326         be,pt           %xcc, 14f                       /* CTI                          */
327          srlx           %g4, 56, %o5                    /* IEU0                         */
328         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
329         be,pn           %icc, 4b                        /* CTI                          */
331          srlx           %g4, 48, %o5                    /* IEU0                         */
332         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
333         be,pn           %icc, 4b                        /* CTI                          */
334          srlx           %g4, 40, %o5                    /* IEU0                         */
336         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
337         be,pn           %icc, 4b                        /* CTI                          */
338          srlx           %g4, 32, %o5                    /* IEU0                         */
339         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
341         be,pn           %icc, 4b                        /* CTI                          */
342 14:      srlx           %g4, 24, %o5                    /* IEU0                         */
343         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
344         be,pn           %icc, 4b                        /* CTI                          */
346          srlx           %g4, 16, %o5                    /* IEU0                         */
347         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
348         be,pn           %icc, 4b                        /* CTI                          */
349          srlx           %g4, 8, %o5                     /* IEU0                         */
351         andcc           %o5, 0xff, %g0                  /* IEU1         Group           */
352         be,pn           %icc, 4b                        /* CTI                          */
353          andcc          %g4, 0xff, %g0                  /* IEU1         Group           */
354         be,pn           %icc, 4b                        /* CTI                          */
356          sllx           %o4, %g5, %o3                   /* IEU0                         */
357         ldxa            [%o1] ASI_PNF, %o4              /* Load         Group           */
358         ba,pt           %xcc, 13b                       /* CTI                          */
359          add            %o1, 8, %o1                     /* IEU0                         */
360 END(strncmp)
361 libc_hidden_builtin_def (strncmp)