Update copyright notices with scripts/update-copyrights
[glibc.git] / sysdeps / sparc / sparc64 / memchr.S
blob7c756228addf16c2673399736330ad29e7690aca
1 /* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less
2    than N.
3    For SPARC v9.
4    Copyright (C) 1998-2014 Free Software Foundation, Inc.
5    This file is part of the GNU C Library.
6    Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
7                   Jakub Jelinek <jj@ultra.linux.cz>.
8    This version is developed using the same algorithm as the fast C
9    version which carries the following introduction:
10    Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
11    with help from Dan Sahlin (dan@sics.se) and
12    commentary by Jim Blandy (jimb@ai.mit.edu);
13    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
14    and implemented by Roland McGrath (roland@ai.mit.edu).
16    The GNU C Library is free software; you can redistribute it and/or
17    modify it under the terms of the GNU Lesser General Public
18    License as published by the Free Software Foundation; either
19    version 2.1 of the License, or (at your option) any later version.
21    The GNU C Library is distributed in the hope that it will be useful,
22    but WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    Lesser General Public License for more details.
26    You should have received a copy of the GNU Lesser General Public
27    License along with the GNU C Library; if not, see
28    <http://www.gnu.org/licenses/>.  */
30 #include <sysdep.h>
31 #include <asm/asi.h>
32 #ifndef XCC
33 #define XCC xcc
34 #define USE_BPR
35         .register       %g2, #scratch
36         .register       %g3, #scratch
37 #endif
39         /* Normally, this uses
40            ((xword - 0x0101010101010101) & 0x8080808080808080) test
41            to find out if any byte in xword could be zero. This is fast, but
42            also gives false alarm for any byte in range 0x81-0xff. It does
43            not matter for correctness, as if this test tells us there could
44            be some zero byte, we check it byte by byte, but if bytes with
45            high bits set are common in the strings, then this will give poor
46            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
47            will use one tick slower, but more precise test
48            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
49            which does not give any false alarms (but if some bits are set,
50            one cannot assume from it which bytes are zero and which are not).
51            It is yet to be measured, what is the correct default for glibc
52            in these days for an average user.
53          */
55         .text
56         .align          32
57 ENTRY(__memchr)
58         and             %o1, 0xff, %o1                  /* IEU0         Group           */
59 #ifdef USE_BPR
60         brz,pn          %o2, 12f                        /* CTI+IEU1                     */
61 #else
62         tst             %o2                             /* IEU1                         */
63         be,pn           %XCC, 12f                       /* CTI                          */
64 #endif
65          sll            %o1, 8, %g3                     /* IEU0         Group           */
66         add             %o0, %o2, %o2                   /* IEU1                         */
68         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
69         or              %g3, %o1, %g3                   /* IEU1                         */
70         ldub            [%o0], %o3                      /* Load                         */
71         sllx            %g3, 16, %g5                    /* IEU0         Group           */
73         or              %g1, %lo(0x01010101), %g1       /* IEU1                         */
74         sllx            %g1, 32, %g2                    /* IEU0         Group           */
75         or              %g3, %g5, %g3                   /* IEU1                         */
76         sllx            %g3, 32, %g5                    /* IEU0         Group           */
78         cmp             %o3, %o1                        /* IEU1                         */
79         be,pn           %xcc, 13f                       /* CTI                          */
80          or             %g1, %g2, %g1                   /* IEU0         Group           */
81         andcc           %o0, 7, %g0                     /* IEU1                         */
83         bne,a,pn        %icc, 21f                       /* CTI                          */
84          add            %o0, 1, %o0                     /* IEU0         Group           */
85         ldx             [%o0], %o3                      /* Load         Group           */
86         sllx            %g1, 7, %g2                     /* IEU0                         */
88         or              %g3, %g5, %g3                   /* IEU1                         */
89 1:      add             %o0, 8, %o0                     /* IEU0         Group           */
90         xor             %o3, %g3, %o4                   /* IEU1                         */
91                                                         /* %g1 = 0101010101010101       *
92                                                          * %g2 = 8080088080808080       *
93                                                          * %g3 =  c c c c c c c c       *
94                                                          * %o3 =      value             *
95                                                          * %o4 =   value XOR c          */
96 2:      cmp             %o0, %o2                        /* IEU1         Group           */
98         bgu,pn          %XCC, 11f                       /* CTI                          */
99          ldxa           [%o0] ASI_PNF, %o3              /* Load                         */
100         sub             %o4, %g1, %o5                   /* IEU0         Group           */
101         add             %o0, 8, %o0                     /* IEU1                         */
102 #ifdef EIGHTBIT_NOT_RARE
103         andn            %o5, %o4, %o5                   /* IEU0         Group           */
104 #endif
106         andcc           %o5, %g2, %g0                   /* IEU1         Group           */
107         be,a,pt         %xcc, 2b                        /* CTI                          */
108          xor            %o3, %g3, %o4                   /* IEU0                         */
109         srlx            %o4, 56, %g5                    /* IEU0                         */
111         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
112         be,pn           %icc, 3f                        /* CTI                          */
113          srlx           %o4, 48, %g5                    /* IEU0                         */
114         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
116         be,pn           %icc, 4f                        /* CTI                          */
117          srlx           %o4, 40, %g5                    /* IEU0                         */
118         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
119         be,pn           %icc, 5f                        /* CTI                          */
121          srlx           %o4, 32, %g5                    /* IEU0                         */
122         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
123         be,pn           %icc, 6f                        /* CTI                          */
124          srlx           %o4, 24, %g5                    /* IEU0                         */
126         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
127         be,pn           %icc, 7f                        /* CTI                          */
128          srlx           %o4, 16, %g5                    /* IEU0                         */
129         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
131         be,pn           %icc, 8f                        /* CTI                          */
132          srlx           %o4, 8, %g5                     /* IEU0                         */
133         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
134         be,pn           %icc, 9f                        /* CTI                          */
136          andcc          %o4, 0xff, %g0                  /* IEU1         Group           */
137         bne,pt          %icc, 2b                        /* CTI                          */
138          xor            %o3, %g3, %o4                   /* IEU0                         */
139         retl                                            /* CTI+IEU1     Group           */
141          add            %o0, -9, %o0                    /* IEU0                         */
143         .align          16
144 3:      retl                                            /* CTI+IEU1     Group           */
145          add            %o0, -16, %o0                   /* IEU0                         */
146 4:      retl                                            /* CTI+IEU1     Group           */
147          add            %o0, -15, %o0                   /* IEU0                         */
149 5:      retl                                            /* CTI+IEU1     Group           */
150          add            %o0, -14, %o0                   /* IEU0                         */
151 6:      retl                                            /* CTI+IEU1     Group           */
152          add            %o0, -13, %o0                   /* IEU0                         */
154 7:      retl                                            /* CTI+IEU1     Group           */
155          add            %o0, -12, %o0                   /* IEU0                         */
156 8:      retl                                            /* CTI+IEU1     Group           */
157          add            %o0, -11, %o0                   /* IEU0                         */
159 9:      retl                                            /* CTI+IEU1     Group           */
160          add            %o0, -10, %o0                   /* IEU0                         */
161 11:     sub             %o4, %g1, %o5                   /* IEU0         Group           */
162         sub             %o0, 8, %o0                     /* IEU1                         */
164         andcc           %o5, %g2, %g0                   /* IEU1         Group           */
165         be,pt           %xcc, 12f                       /* CTI                          */
166          sub            %o2, %o0, %o2                   /* IEU0                         */
167         tst             %o2                             /* IEU1         Group           */
169         be,pn           %XCC, 12f                       /* CTI                          */
170          srlx           %o4, 56, %g5                    /* IEU0                         */
171         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
172         be,pn           %icc, 13f                       /* CTI                          */
174          cmp            %o2, 1                          /* IEU0                         */
175         be,pn           %XCC, 12f                       /* CTI          Group           */
176          srlx           %o4, 48, %g5                    /* IEU0                         */
177         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
179         be,pn           %icc, 14f                       /* CTI                          */
180          cmp            %o2, 2                          /* IEU1         Group           */
181         be,pn           %XCC, 12f                       /* CTI                          */
182          srlx           %o4, 40, %g5                    /* IEU0                         */
184         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
185         be,pn           %icc, 15f                       /* CTI                          */
186          cmp            %o2, 3                          /* IEU1         Group           */
187         be,pn           %XCC, 12f                       /* CTI                          */
189          srlx           %o4, 32, %g5                    /* IEU0                         */
190         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
191         be,pn           %icc, 16f                       /* CTI                          */
192          cmp            %o2, 4                          /* IEU1         Group           */
194         be,pn           %XCC, 12f                       /* CTI                          */
195          srlx           %o4, 24, %g5                    /* IEU0                         */
196         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
197         be,pn           %icc, 17f                       /* CTI                          */
199          cmp            %o2, 5                          /* IEU1         Group           */
200         be,pn           %XCC, 12f                       /* CTI                          */
201          srlx           %o4, 16, %g5                    /* IEU0                         */
202         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
204         be,pn           %icc, 18f                       /* CTI                          */
205          cmp            %o2, 6                          /* IEU1         Group           */
206         be,pn           %XCC, 12f                       /* CTI                          */
207          srlx           %o4, 8, %g5                     /* IEU0                         */
209         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
210         be,pn           %icc, 19f                       /* CTI                          */
211          nop                                            /* IEU0                         */
212 12:     retl                                            /* CTI+IEU1     Group           */
214          clr            %o0                             /* IEU0                         */
215         nop                                             /* Stub                         */
216 13:     retl                                            /* CTI+IEU1     Group           */
217          nop                                            /* IEU0                         */
219 14:     retl                                            /* CTI+IEU1     Group           */
220          add            %o0, 1, %o0                     /* IEU0                         */
221 15:     retl                                            /* CTI+IEU1     Group           */
222          add            %o0, 2, %o0                     /* IEU0                         */
224 16:     retl                                            /* CTI+IEU1     Group           */
225          add            %o0, 3, %o0                     /* IEU0                         */
226 17:     retl                                            /* CTI+IEU1     Group           */
227          add            %o0, 4, %o0                     /* IEU0                         */
229 18:     retl                                            /* CTI+IEU1     Group           */
230          add            %o0, 5, %o0                     /* IEU0                         */
231 19:     retl                                            /* CTI+IEU1     Group           */
232          add            %o0, 6, %o0                     /* IEU0                         */
234 21:     cmp             %o0, %o2                        /* IEU1                         */
235         be,pn           %XCC, 12b                       /* CTI                          */
236          sllx           %g1, 7, %g2                     /* IEU0         Group           */
237         ldub            [%o0], %o3                      /* Load                         */
239         or              %g3, %g5, %g3                   /* IEU1                         */
240 22:     andcc           %o0, 7, %g0                     /* IEU1         Group           */
241         be,a,pn         %icc, 1b                        /* CTI                          */
242          ldx            [%o0], %o3                      /* Load                         */
244         cmp             %o3, %o1                        /* IEU1         Group           */
245         be,pn           %xcc, 23f                       /* CTI                          */
246          add            %o0, 1, %o0                     /* IEU0                         */
247         cmp             %o0, %o2                        /* IEU1         Group           */
249         bne,a,pt        %XCC, 22b                       /* CTI                          */
250          ldub           [%o0], %o3                      /* Load                         */
251         retl                                            /* CTI+IEU1     Group           */
252          clr            %o0                             /* IEU0                         */
254 23:     retl                                            /* CTI+IEU1     Group           */
255          add            %o0, -1, %o0                    /* IEU0                         */
256 END(__memchr)
258 weak_alias (__memchr, memchr)
259 libc_hidden_builtin_def (memchr)