Update copyright notices with scripts/update-copyrights
[glibc.git] / sysdeps / sparc / sparc64 / stpcpy.S
blobbd90d8aeed5fd2a09bfeb99d327dd08cdf240fca
1 /* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
2    For SPARC v9.
3    Copyright (C) 1998-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6                   Jakub Jelinek <jj@ultra.linux.cz>.
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, see
20    <http://www.gnu.org/licenses/>.  */
22 #include <sysdep.h>
23 #include <asm/asi.h>
24 #ifndef XCC
25         .register       %g2, #scratch
26         .register       %g3, #scratch
27         .register       %g6, #scratch
28 #endif
30         /* Normally, this uses
31            ((xword - 0x0101010101010101) & 0x8080808080808080) test
32            to find out if any byte in xword could be zero. This is fast, but
33            also gives false alarm for any byte in range 0x81-0xff. It does
34            not matter for correctness, as if this test tells us there could
35            be some zero byte, we check it byte by byte, but if bytes with
36            high bits set are common in the strings, then this will give poor
37            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38            will use one tick slower, but more precise test
39            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40            which does not give any false alarms (but if some bits are set,
41            one cannot assume from it which bytes are zero and which are not).
42            It is yet to be measured, what is the correct default for glibc
43            in these days for an average user.
44          */
46         .text
47         .align          32
48 ENTRY(__stpcpy)
49         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
50         or              %g1, %lo(0x01010101), %g1       /* IEU0         Group           */
51         andcc           %o0, 7, %g0                     /* IEU1                         */
52         sllx            %g1, 32, %g2                    /* IEU0         Group           */
54         bne,pn          %icc, 12f                       /* CTI                          */
55          andcc          %o1, 7, %g3                     /* IEU1                         */
56         or              %g1, %g2, %g1                   /* IEU0         Group           */
57         bne,pn          %icc, 14f                       /* CTI                          */
59          sllx           %g1, 7, %g2                     /* IEU0         Group           */
60 1:      ldx             [%o1], %o3                      /* Load                         */
61         add             %o1, 8, %o1                     /* IEU1                         */
62 2:      mov             %o3, %g3                        /* IEU0         Group           */
64         sub             %o3, %g1, %o2                   /* IEU1                         */
65 3:      ldxa            [%o1] ASI_PNF, %o3              /* Load                         */
66 #ifdef EIGHTBIT_NOT_RARE
67         andn            %o2, %g3, %o2                   /* IEU0         Group           */
68 #endif
69         add             %o0, 8, %o0                     /* IEU0         Group           */
70         andcc           %o2, %g2, %g0                   /* IEU1                         */
72         add             %o1, 8, %o1                     /* IEU0         Group           */
73         be,a,pt         %xcc, 2b                        /* CTI                          */
74          stx            %g3, [%o0 - 8]                  /* Store                        */
75         srlx            %g3, 56, %g5                    /* IEU0         Group           */
77         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
78         be,pn           %icc, 11f                       /* CTI                          */
79          srlx           %g3, 48, %g4                    /* IEU0                         */
80         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
82         be,pn           %icc, 10f                       /* CTI                          */
83          srlx           %g3, 40, %g5                    /* IEU0                         */
84         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
85         be,pn           %icc, 9f                        /* CTI                          */
87          srlx           %g3, 32, %g4                    /* IEU0                         */
88         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
89         be,pn           %icc, 8f                        /* CTI                          */
90          srlx           %g3, 24, %g5                    /* IEU0                         */
92         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
93         be,pn           %icc, 7f                        /* CTI                          */
94          srlx           %g3, 16, %g4                    /* IEU0                         */
95         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
97         be,pn           %icc, 6f                        /* CTI                          */
98          srlx           %g3, 8, %g5                     /* IEU0                         */
99         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
100         be,pn           %icc, 5f                        /* CTI                          */
102          sub            %o3, %g1, %o2                   /* IEU0                         */
103         stx             %g3, [%o0 - 8]                  /* Store        Group           */
104         andcc           %g3, 0xff, %g0                  /* IEU1                         */
105         bne,pt          %icc, 3b                        /* CTI                          */
107          mov            %o3, %g3                        /* IEU0         Group           */
108 4:      retl                                            /* CTI+IEU1     Group           */
109          sub            %o0, 1, %o0                     /* IEU0                         */
111         .align          16
112 6:      ba,pt           %xcc, 23f                       /* CTI          Group           */
113          sub            %o0, 3, %g6                     /* IEU0                         */
114 5:      sub             %o0, 2, %g6                     /* IEU0         Group           */
115         stb             %g5, [%o0 - 2]                  /* Store                        */
117         srlx            %g3, 16, %g4                    /* IEU0         Group           */
118 23:     sth             %g4, [%o0 - 4]                  /* Store                        */
119         srlx            %g3, 32, %g4                    /* IEU0         Group           */
120         stw             %g4, [%o0 - 8]                  /* Store                        */
122         retl                                            /* CTI+IEU1     Group           */
123          mov            %g6, %o0                        /* IEU0                         */
124 8:      ba,pt           %xcc, 24f                       /* CTI          Group           */
125          sub            %o0, 5, %g6                     /* IEU0                         */
127 7:      sub             %o0, 4, %g6                     /* IEU0         Group           */
128         stb             %g5, [%o0 - 4]                  /* Store                        */
129         srlx            %g3, 32, %g4                    /* IEU0         Group           */
130 24:     stw             %g4, [%o0 - 8]                  /* Store                        */
132         retl                                            /* CTI+IEU1     Group           */
133          mov            %g6, %o0                        /* IEU0                         */
134 10:     ba,pt           %xcc, 25f                       /* CTI          Group           */
135          sub            %o0, 7, %g6                     /* IEU0                         */
137 9:      sub             %o0, 6, %g6                     /* IEU0         Group           */
138         stb             %g5, [%o0 - 6]                  /* Store                        */
139         srlx            %g3, 48, %g4                    /* IEU0                         */
140 25:     sth             %g4, [%o0 - 8]                  /* Store        Group           */
142         retl                                            /* CTI+IEU1     Group           */
143          mov            %g6, %o0                        /* IEU0                         */
144 11:     stb             %g5, [%o0 - 8]                  /* Store        Group           */
145         retl                                            /* CTI+IEU1     Group           */
147          sub            %o0, 8, %o0                     /* IEU0                         */
149         .align          16
150 12:     or              %g1, %g2, %g1                   /* IEU0         Group           */
151         ldub            [%o1], %o3                      /* Load                         */
152         sllx            %g1, 7, %g2                     /* IEU0         Group           */
153         stb             %o3, [%o0]                      /* Store        Group           */
155 13:     add             %o0, 1, %o0                     /* IEU0                         */
156         add             %o1, 1, %o1                     /* IEU1                         */
157         andcc           %o3, 0xff, %g0                  /* IEU1         Group           */
158         be,pn           %icc, 4b                        /* CTI                          */
160          lduba          [%o1] ASI_PNF, %o3              /* Load                         */
161         andcc           %o0, 7, %g0                     /* IEU1         Group           */
162         bne,a,pt        %icc, 13b                       /* CTI                          */
163          stb            %o3, [%o0]                      /* Store                        */
165         andcc           %o1, 7, %g3                     /* IEU1         Group           */
166         be,a,pt         %icc, 1b                        /* CTI                          */
167          ldx            [%o1], %o3                      /* Load                         */
168 14:     orcc            %g0, 64, %g4                    /* IEU1         Group           */
170         sllx            %g3, 3, %g5                     /* IEU0                         */
171         sub             %o1, %g3, %o1                   /* IEU0         Group           */
172         sub             %g4, %g5, %g4                   /* IEU1                         */
173                                                         /* %g1 = 0101010101010101       *
174                                                          * %g2 = 8080808080808080       *
175                                                          * %g3 = source alignment       *
176                                                          * %g5 = number of bits to shift left  *
177                                                          * %g4 = number of bits to shift right */
178         ldxa            [%o1] ASI_PNF, %o5              /* Load         Group           */
180         addcc           %o1, 8, %o1                     /* IEU1                         */
181 15:     sllx            %o5, %g5, %o3                   /* IEU0         Group           */
182         ldxa            [%o1] ASI_PNF, %o5              /* Load                         */
183         srlx            %o5, %g4, %o4                   /* IEU0         Group           */
185         add             %o0, 8, %o0                     /* IEU1                         */
186         or              %o3, %o4, %o3                   /* IEU0         Group           */
187         add             %o1, 8, %o1                     /* IEU1                         */
188         sub             %o3, %g1, %o4                   /* IEU0         Group           */
190 #ifdef EIGHTBIT_NOT_RARE
191         andn            %o4, %o3, %o4                   /* IEU0         Group           */
192 #endif
193         andcc           %o4, %g2, %g0                   /* IEU1         Group           */
194         be,a,pt         %xcc, 15b                       /* CTI                          */
195          stx            %o3, [%o0 - 8]                  /* Store                        */
196         srlx            %o3, 56, %o4                    /* IEU0         Group           */
198         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
199         be,pn           %icc, 22f                       /* CTI                          */
200          srlx           %o3, 48, %o4                    /* IEU0                         */
201         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
203         be,pn           %icc, 21f                       /* CTI                          */
204          srlx           %o3, 40, %o4                    /* IEU0                         */
205         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
206         be,pn           %icc, 20f                       /* CTI                          */
208          srlx           %o3, 32, %o4                    /* IEU0                         */
209         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
210         be,pn           %icc, 19f                       /* CTI                          */
211          srlx           %o3, 24, %o4                    /* IEU0                         */
213         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
214         be,pn           %icc, 18f                       /* CTI                          */
215          srlx           %o3, 16, %o4                    /* IEU0                         */
216         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
218         be,pn           %icc, 17f                       /* CTI                          */
219          srlx           %o3, 8, %o4                     /* IEU0                         */
220         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
221         be,pn           %icc, 16f                       /* CTI                          */
223          andcc          %o3, 0xff, %g0                  /* IEU1         Group           */
224         bne,pn          %icc, 15b                       /* CTI                          */
225          stx            %o3, [%o0 - 8]                  /* Store                        */
226         retl                                            /* CTI+IEU1     Group           */
228          sub            %o0, 1, %o0                     /* IEU0                         */
230         .align          16
231 17:     ba,pt           %xcc, 26f                       /* CTI          Group           */
232          subcc          %o0, 3, %g6                     /* IEU1                         */
233 18:     ba,pt           %xcc, 27f                       /* CTI          Group           */
234          subcc          %o0, 4, %g6                     /* IEU1                         */
236 19:     ba,pt           %xcc, 28f                       /* CTI          Group           */
237          subcc          %o0, 5, %g6                     /* IEU1                         */
238 16:     subcc           %o0, 2, %g6                     /* IEU1         Group           */
239         srlx            %o3, 8, %o4                     /* IEU0                         */
241         stb             %o4, [%o0 - 2]                  /* Store                        */
242 26:     srlx            %o3, 16, %o4                    /* IEU0         Group           */
243         stb             %o4, [%o0 - 3]                  /* Store                        */
244 27:     srlx            %o3, 24, %o4                    /* IEU0         Group           */
246         stb             %o4, [%o0 - 4]                  /* Store                        */
247 28:     srlx            %o3, 32, %o4                    /* IEU0         Group           */
248         stw             %o4, [%o0 - 8]                  /* Store                        */
249         retl                                            /* CTI+IEU1     Group           */
251          mov            %g6, %o0                        /* IEU0                         */
253         .align          16
254 21:     ba,pt           %xcc, 29f                       /* CTI          Group           */
255          subcc          %o0, 7, %g6                     /* IEU1                         */
256 22:     ba,pt           %xcc, 30f                       /* CTI          Group           */
257          subcc          %o0, 8, %g6                     /* IEU1                         */
259 20:     subcc           %o0, 6, %g6                     /* IEU1         Group           */
260         srlx            %o3, 40, %o4                    /* IEU0                         */
261         stb             %o4, [%o0 - 6]                  /* Store                        */
262 29:     srlx            %o3, 48, %o4                    /* IEU0         Group           */
264         stb             %o4, [%o0 - 7]                  /* Store                        */
265 30:     srlx            %o3, 56, %o4                    /* IEU0         Group           */
266         stb             %o4, [%o0 - 8]                  /* Store                        */
267         retl                                            /* CTI+IEU1     Group           */
269          mov            %g6, %o0                        /* IEU0                         */
270 END(__stpcpy)
272 weak_alias (__stpcpy, stpcpy)
273 libc_hidden_def (__stpcpy)
274 libc_hidden_builtin_def (stpcpy)