LoongArch: Update ulps
[glibc.git] / sysdeps / sparc / sparc64 / memchr.S
blob02a806ce5ea3578e833b3a8a3f5e276237ba63f7
1 /* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less
2    than N.
3    For SPARC v9.
4    Copyright (C) 1998-2024 Free Software Foundation, Inc.
5    This file is part of the GNU C Library.
7    The GNU C Library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Lesser General Public
9    License as published by the Free Software Foundation; either
10    version 2.1 of the License, or (at your option) any later version.
12    The GNU C Library is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    Lesser General Public License for more details.
17    You should have received a copy of the GNU Lesser General Public
18    License along with the GNU C Library; if not, see
19    <https://www.gnu.org/licenses/>.  */
21 #include <sysdep.h>
22 #include <asm/asi.h>
23 #ifndef XCC
24 #define XCC xcc
25 #define USE_BPR
26         .register       %g2, #scratch
27         .register       %g3, #scratch
28 #endif
30         /* Normally, this uses
31            ((xword - 0x0101010101010101) & 0x8080808080808080) test
32            to find out if any byte in xword could be zero. This is fast, but
33            also gives false alarm for any byte in range 0x81-0xff. It does
34            not matter for correctness, as if this test tells us there could
35            be some zero byte, we check it byte by byte, but if bytes with
36            high bits set are common in the strings, then this will give poor
37            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38            will use one tick slower, but more precise test
39            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40            which does not give any false alarms (but if some bits are set,
41            one cannot assume from it which bytes are zero and which are not).
42            It is yet to be measured, what is the correct default for glibc
43            in these days for an average user.
44          */
46         .text
47         .align          32
48 ENTRY(__memchr)
49         and             %o1, 0xff, %o1                  /* IEU0         Group           */
50 #ifdef USE_BPR
51         brz,pn          %o2, 12f                        /* CTI+IEU1                     */
52 #else
53         tst             %o2                             /* IEU1                         */
54         be,pn           %XCC, 12f                       /* CTI                          */
55 #endif
56          sll            %o1, 8, %g3                     /* IEU0         Group           */
57         addcc           %o0, %o2, %o2                   /* IEU1                         */
58         movcs           %XCC, -1, %o2                   /* IEU0         Group           */
60         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
61         or              %g3, %o1, %g3                   /* IEU1                         */
62         ldub            [%o0], %o3                      /* Load                         */
63         sllx            %g3, 16, %g5                    /* IEU0         Group           */
65         or              %g1, %lo(0x01010101), %g1       /* IEU1                         */
66         sllx            %g1, 32, %g2                    /* IEU0         Group           */
67         or              %g3, %g5, %g3                   /* IEU1                         */
68         sllx            %g3, 32, %g5                    /* IEU0         Group           */
70         cmp             %o3, %o1                        /* IEU1                         */
71         be,pn           %xcc, 13f                       /* CTI                          */
72          or             %g1, %g2, %g1                   /* IEU0         Group           */
73         andcc           %o0, 7, %g0                     /* IEU1                         */
75         bne,a,pn        %icc, 21f                       /* CTI                          */
76          add            %o0, 1, %o0                     /* IEU0         Group           */
77         ldx             [%o0], %o3                      /* Load         Group           */
78         sllx            %g1, 7, %g2                     /* IEU0                         */
80         or              %g3, %g5, %g3                   /* IEU1                         */
81 1:      add             %o0, 8, %o0                     /* IEU0         Group           */
82         xor             %o3, %g3, %o4                   /* IEU1                         */
83                                                         /* %g1 = 0101010101010101       *
84                                                          * %g2 = 8080088080808080       *
85                                                          * %g3 =  c c c c c c c c       *
86                                                          * %o3 =      value             *
87                                                          * %o4 =   value XOR c          */
88 2:      cmp             %o0, %o2                        /* IEU1         Group           */
90         bgu,pn          %XCC, 11f                       /* CTI                          */
91          ldxa           [%o0] ASI_PNF, %o3              /* Load                         */
92         sub             %o4, %g1, %o5                   /* IEU0         Group           */
93         add             %o0, 8, %o0                     /* IEU1                         */
94 #ifdef EIGHTBIT_NOT_RARE
95         andn            %o5, %o4, %o5                   /* IEU0         Group           */
96 #endif
98         andcc           %o5, %g2, %g0                   /* IEU1         Group           */
99         be,a,pt         %xcc, 2b                        /* CTI                          */
100          xor            %o3, %g3, %o4                   /* IEU0                         */
101         srlx            %o4, 56, %g5                    /* IEU0                         */
103         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
104         be,pn           %icc, 3f                        /* CTI                          */
105          srlx           %o4, 48, %g5                    /* IEU0                         */
106         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
108         be,pn           %icc, 4f                        /* CTI                          */
109          srlx           %o4, 40, %g5                    /* IEU0                         */
110         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
111         be,pn           %icc, 5f                        /* CTI                          */
113          srlx           %o4, 32, %g5                    /* IEU0                         */
114         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
115         be,pn           %icc, 6f                        /* CTI                          */
116          srlx           %o4, 24, %g5                    /* IEU0                         */
118         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
119         be,pn           %icc, 7f                        /* CTI                          */
120          srlx           %o4, 16, %g5                    /* IEU0                         */
121         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
123         be,pn           %icc, 8f                        /* CTI                          */
124          srlx           %o4, 8, %g5                     /* IEU0                         */
125         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
126         be,pn           %icc, 9f                        /* CTI                          */
128          andcc          %o4, 0xff, %g0                  /* IEU1         Group           */
129         bne,pt          %icc, 2b                        /* CTI                          */
130          xor            %o3, %g3, %o4                   /* IEU0                         */
131         retl                                            /* CTI+IEU1     Group           */
133          add            %o0, -9, %o0                    /* IEU0                         */
135         .align          16
136 3:      retl                                            /* CTI+IEU1     Group           */
137          add            %o0, -16, %o0                   /* IEU0                         */
138 4:      retl                                            /* CTI+IEU1     Group           */
139          add            %o0, -15, %o0                   /* IEU0                         */
141 5:      retl                                            /* CTI+IEU1     Group           */
142          add            %o0, -14, %o0                   /* IEU0                         */
143 6:      retl                                            /* CTI+IEU1     Group           */
144          add            %o0, -13, %o0                   /* IEU0                         */
146 7:      retl                                            /* CTI+IEU1     Group           */
147          add            %o0, -12, %o0                   /* IEU0                         */
148 8:      retl                                            /* CTI+IEU1     Group           */
149          add            %o0, -11, %o0                   /* IEU0                         */
151 9:      retl                                            /* CTI+IEU1     Group           */
152          add            %o0, -10, %o0                   /* IEU0                         */
153 11:     sub             %o4, %g1, %o5                   /* IEU0         Group           */
154         sub             %o0, 8, %o0                     /* IEU1                         */
156         andcc           %o5, %g2, %g0                   /* IEU1         Group           */
157         be,pt           %xcc, 12f                       /* CTI                          */
158          sub            %o2, %o0, %o2                   /* IEU0                         */
159         tst             %o2                             /* IEU1         Group           */
161         be,pn           %XCC, 12f                       /* CTI                          */
162          srlx           %o4, 56, %g5                    /* IEU0                         */
163         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
164         be,pn           %icc, 13f                       /* CTI                          */
166          cmp            %o2, 1                          /* IEU0                         */
167         be,pn           %XCC, 12f                       /* CTI          Group           */
168          srlx           %o4, 48, %g5                    /* IEU0                         */
169         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
171         be,pn           %icc, 14f                       /* CTI                          */
172          cmp            %o2, 2                          /* IEU1         Group           */
173         be,pn           %XCC, 12f                       /* CTI                          */
174          srlx           %o4, 40, %g5                    /* IEU0                         */
176         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
177         be,pn           %icc, 15f                       /* CTI                          */
178          cmp            %o2, 3                          /* IEU1         Group           */
179         be,pn           %XCC, 12f                       /* CTI                          */
181          srlx           %o4, 32, %g5                    /* IEU0                         */
182         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
183         be,pn           %icc, 16f                       /* CTI                          */
184          cmp            %o2, 4                          /* IEU1         Group           */
186         be,pn           %XCC, 12f                       /* CTI                          */
187          srlx           %o4, 24, %g5                    /* IEU0                         */
188         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
189         be,pn           %icc, 17f                       /* CTI                          */
191          cmp            %o2, 5                          /* IEU1         Group           */
192         be,pn           %XCC, 12f                       /* CTI                          */
193          srlx           %o4, 16, %g5                    /* IEU0                         */
194         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
196         be,pn           %icc, 18f                       /* CTI                          */
197          cmp            %o2, 6                          /* IEU1         Group           */
198         be,pn           %XCC, 12f                       /* CTI                          */
199          srlx           %o4, 8, %g5                     /* IEU0                         */
201         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
202         be,pn           %icc, 19f                       /* CTI                          */
203          nop                                            /* IEU0                         */
204 12:     retl                                            /* CTI+IEU1     Group           */
206          clr            %o0                             /* IEU0                         */
207         nop                                             /* Stub                         */
208 13:     retl                                            /* CTI+IEU1     Group           */
209          nop                                            /* IEU0                         */
211 14:     retl                                            /* CTI+IEU1     Group           */
212          add            %o0, 1, %o0                     /* IEU0                         */
213 15:     retl                                            /* CTI+IEU1     Group           */
214          add            %o0, 2, %o0                     /* IEU0                         */
216 16:     retl                                            /* CTI+IEU1     Group           */
217          add            %o0, 3, %o0                     /* IEU0                         */
218 17:     retl                                            /* CTI+IEU1     Group           */
219          add            %o0, 4, %o0                     /* IEU0                         */
221 18:     retl                                            /* CTI+IEU1     Group           */
222          add            %o0, 5, %o0                     /* IEU0                         */
223 19:     retl                                            /* CTI+IEU1     Group           */
224          add            %o0, 6, %o0                     /* IEU0                         */
226 21:     cmp             %o0, %o2                        /* IEU1                         */
227         be,pn           %XCC, 12b                       /* CTI                          */
228          sllx           %g1, 7, %g2                     /* IEU0         Group           */
229         ldub            [%o0], %o3                      /* Load                         */
231         or              %g3, %g5, %g3                   /* IEU1                         */
232 22:     andcc           %o0, 7, %g0                     /* IEU1         Group           */
233         be,a,pn         %icc, 1b                        /* CTI                          */
234          ldx            [%o0], %o3                      /* Load                         */
236         cmp             %o3, %o1                        /* IEU1         Group           */
237         be,pn           %xcc, 23f                       /* CTI                          */
238          add            %o0, 1, %o0                     /* IEU0                         */
239         cmp             %o0, %o2                        /* IEU1         Group           */
241         bne,a,pt        %XCC, 22b                       /* CTI                          */
242          ldub           [%o0], %o3                      /* Load                         */
243         retl                                            /* CTI+IEU1     Group           */
244          clr            %o0                             /* IEU0                         */
246 23:     retl                                            /* CTI+IEU1     Group           */
247          add            %o0, -1, %o0                    /* IEU0                         */
248 END(__memchr)
250 weak_alias (__memchr, memchr)
251 libc_hidden_builtin_def (memchr)