Update copyright notices with scripts/update-copyrights
[glibc.git] / sysdeps / sparc / sparc64 / strcat.S
blobd1098a261fac435e89f6cdd0619ed7f731a7488c
1 /* strcat (dest, src) -- Append SRC on the end of DEST.
2    For SPARC v9.
3    Copyright (C) 1998-2014 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
6                   Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, see
20    <http://www.gnu.org/licenses/>.  */
22 #include <sysdep.h>
23 #include <asm/asi.h>
24 #ifndef XCC
25 #define XCC xcc
26 #define USE_BPR
27         .register       %g2, #scratch
28         .register       %g3, #scratch
29         .register       %g6, #scratch
30 #endif
32         /* Normally, this uses
33            ((xword - 0x0101010101010101) & 0x8080808080808080) test
34            to find out if any byte in xword could be zero. This is fast, but
35            also gives false alarm for any byte in range 0x81-0xff. It does
36            not matter for correctness, as if this test tells us there could
37            be some zero byte, we check it byte by byte, but if bytes with
38            high bits set are common in the strings, then this will give poor
39            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
40            will use one tick slower, but more precise test
41            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
42            which does not give any false alarms (but if some bits are set,
43            one cannot assume from it which bytes are zero and which are not).
44            It is yet to be measured, what is the correct default for glibc
45            in these days for an average user.
46          */
48         .text
49         .align          32
50 ENTRY(strcat)
51         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
52         ldub            [%o0], %o3                      /* Load                         */
53         or              %g1, %lo(0x01010101), %g1       /* IEU0         Group           */
54         mov             %o0, %g6                        /* IEU1                         */
56         sllx            %g1, 32, %g2                    /* IEU0         Group           */
57         andcc           %o0, 7, %g0                     /* IEU1                         */
58         or              %g1, %g2, %g1                   /* IEU0         Group           */
59         bne,pn          %icc, 32f                       /* CTI                          */
61          sllx           %g1, 7, %g2                     /* IEU0         Group           */
62         brz,pn          %o3, 30f                        /* CTI+IEU1                     */
63          ldx            [%o0], %o3                      /* Load                         */
64 48:     add             %o0, 8, %o0                     /* IEU0         Group           */
66 49:     sub             %o3, %g1, %o2                   /* IEU0         Group           */
67 #ifdef EIGHTBIT_NOT_RARE
68         andn            %o2, %o3, %g5                   /* IEU0         Group           */
69         ldxa            [%o0] ASI_PNF, %o3              /* Load                         */
70         andcc           %g5, %g2, %g0                   /* IEU1         Group           */
71 #else
72         ldxa            [%o0] ASI_PNF, %o3              /* Load                         */
73         andcc           %o2, %g2, %g0                   /* IEU1         Group           */
74 #endif
75         be,pt           %xcc, 49b                       /* CTI                          */
77          add            %o0, 8, %o0                     /* IEU0                         */
78         addcc           %o2, %g1, %g3                   /* IEU1         Group           */
79         srlx            %o2, 32, %o2                    /* IEU0                         */
80 50:     andcc           %o2, %g2, %g0                   /* IEU1         Group           */
82         be,pn           %xcc, 51f                       /* CTI                          */
83          srlx           %g3, 56, %o2                    /* IEU0                         */
84         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
85         be,pn           %icc, 29f                       /* CTI                          */
87          srlx           %g3, 48, %o2                    /* IEU0                         */
88         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
89         be,pn           %icc, 28f                       /* CTI                          */
90          srlx           %g3, 40, %o2                    /* IEU0                         */
92         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
93         be,pn           %icc, 27f                       /* CTI                          */
94          srlx           %g3, 32, %o2                    /* IEU0                         */
95         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
97         be,pn           %icc, 26f                       /* CTI                          */
98 51:      srlx           %g3, 24, %o2                    /* IEU0                         */
99         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
100         be,pn           %icc, 25f                       /* CTI                          */
102          srlx           %g3, 16, %o2                    /* IEU0                         */
103         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
104         be,pn           %icc, 24f                       /* CTI                          */
105          srlx           %g3, 8, %o2                     /* IEU0                         */
107         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
108         be,pn           %icc, 23f                       /* CTI                          */
109          sub            %o3, %g1, %o2                   /* IEU0                         */
110         andcc           %g3, 0xff, %g0                  /* IEU1         Group           */
112         be,pn           %icc, 52f                       /* CTI                          */
113          ldxa           [%o0] ASI_PNF, %o3              /* Load                         */
114         andcc           %o2, %g2, %g0                   /* IEU1         Group           */
115         be,pt           %xcc, 49b                       /* CTI                          */
117          add            %o0, 8, %o0                     /* IEU0                         */
118         addcc           %o2, %g1, %g3                   /* IEU1         Group           */
119         ba,pt           %xcc, 50b                       /* CTI                          */
120          srlx           %o2, 32, %o2                    /* IEU0                         */
122         .align          16
123 52:     ba,pt           %xcc, 12f                       /* CTI          Group           */
124          add            %o0, -9, %o0                    /* IEU0                         */
125 23:     ba,pt           %xcc, 12f                       /* CTI          Group           */
126          add            %o0, -10, %o0                   /* IEU0                         */
128 24:     ba,pt           %xcc, 12f                       /* CTI          Group           */
129          add            %o0, -11, %o0                   /* IEU0                         */
130 25:     ba,pt           %xcc, 12f                       /* CTI          Group           */
131          add            %o0, -12, %o0                   /* IEU0                         */
133 26:     ba,pt           %xcc, 12f                       /* CTI          Group           */
134          add            %o0, -13, %o0                   /* IEU0                         */
135 27:     ba,pt           %xcc, 12f                       /* CTI          Group           */
136          add            %o0, -14, %o0                   /* IEU0                         */
138 28:     ba,pt           %xcc, 12f                       /* CTI          Group           */
139          add            %o0, -15, %o0                   /* IEU0                         */
140 29:     add             %o0, -16, %o0                   /* IEU0         Group           */
141 30:     andcc           %o1, 7, %g3                     /* IEU1                         */
143 31:     bne,pn          %icc, 14f                       /* CTI                          */
144          orcc           %g0, 64, %g4                    /* IEU1         Group           */
145 1:      ldx             [%o1], %o3                      /* Load                         */
146         add             %o1, 8, %o1                     /* IEU1                         */
148 2:      mov             %o3, %g3                        /* IEU0         Group           */
149 3:      sub             %o3, %g1, %o2                   /* IEU1                         */
150         ldxa            [%o1] ASI_PNF, %o3              /* Load                         */
151 #ifdef EIGHTBIT_NOT_RARE
152         andn            %o2, %g3, %o2                   /* IEU0         Group           */
153 #endif
154         add             %o0, 8, %o0                     /* IEU0         Group           */
156         andcc           %o2, %g2, %g0                   /* IEU1                         */
157         add             %o1, 8, %o1                     /* IEU0         Group           */
158         be,a,pt         %xcc, 2b                        /* CTI                          */
159          stx            %g3, [%o0 - 8]                  /* Store                        */
161         srlx            %g3, 56, %g5                    /* IEU0         Group           */
162         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
163         be,pn           %icc, 11f                       /* CTI                          */
164          srlx           %g3, 48, %g4                    /* IEU0                         */
166         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
167         be,pn           %icc, 10f                       /* CTI                          */
168          srlx           %g3, 40, %g5                    /* IEU0                         */
169         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
171         be,pn           %icc, 9f                        /* CTI                          */
172          srlx           %g3, 32, %g4                    /* IEU0                         */
173         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
174         be,pn           %icc, 8f                        /* CTI                          */
176          srlx           %g3, 24, %g5                    /* IEU0                         */
177         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
178         be,pn           %icc, 7f                        /* CTI                          */
179          srlx           %g3, 16, %g4                    /* IEU0                         */
181         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
182         be,pn           %icc, 6f                        /* CTI                          */
183          srlx           %g3, 8, %g5                     /* IEU0                         */
184         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
186         be,pn           %icc, 5f                        /* CTI                          */
187          sub            %o3, %g1, %o2                   /* IEU0                         */
188         stx             %g3, [%o0 - 8]                  /* Store        Group           */
189         andcc           %g3, 0xff, %g0                  /* IEU1                         */
191         bne,pt          %icc, 3b                        /* CTI                          */
192          mov            %o3, %g3                        /* IEU0         Group           */
193 4:      retl                                            /* CTI+IEU1     Group           */
194          mov            %g6, %o0                        /* IEU0                         */
196         .align          16
197 5:      stb             %g5, [%o0 - 2]                  /* Store        Group           */
198         srlx            %g3, 16, %g4                    /* IEU0                         */
199 6:      sth             %g4, [%o0 - 4]                  /* Store        Group           */
200         srlx            %g3, 32, %g4                    /* IEU0                         */
202         stw             %g4, [%o0 - 8]                  /* Store        Group           */
203         retl                                            /* CTI+IEU1     Group           */
204          mov            %g6, %o0                        /* IEU0                         */
205 7:      stb             %g5, [%o0 - 4]                  /* Store        Group           */
207         srlx            %g3, 32, %g4                    /* IEU0                         */
208 8:      stw             %g4, [%o0 - 8]                  /* Store        Group           */
209         retl                                            /* CTI+IEU1     Group           */
210          mov            %g6, %o0                        /* IEU0                         */
212 9:      stb             %g5, [%o0 - 6]                  /* Store        Group           */
213         srlx            %g3, 48, %g4                    /* IEU0                         */
214 10:     sth             %g4, [%o0 - 8]                  /* Store        Group           */
215         retl                                            /* CTI+IEU1     Group           */
217          mov            %g6, %o0                        /* IEU0                         */
218 11:     stb             %g5, [%o0 - 8]                  /* Store        Group           */
219         retl                                            /* CTI+IEU1     Group           */
220          mov            %g6, %o0                        /* IEU0                         */
222         .align          16
223 32:     andcc           %o0, 7, %g0                     /* IEU1         Group           */
224         be,a,pn         %icc, 48b                       /* CTI                          */
225          ldx            [%o0], %o3                      /* Load                         */
226         add             %o0, 1, %o0                     /* IEU0         Group           */
228         brnz,a,pt       %o3, 32b                        /* CTI+IEU1                     */
229          lduba          [%o0] ASI_PNF, %o3              /* Load                         */
230         add             %o0, -1, %o0                    /* IEU0         Group           */
231         andcc           %o0, 7, %g0                     /* IEU1         Group           */
233         be,a,pn         %icc, 31b                       /* CTI                          */
234          andcc          %o1, 7, %g3                     /* IEU1         Group           */
235 12:     ldub            [%o1], %o3                      /* Load                         */
236         stb             %o3, [%o0]                      /* Store        Group           */
238 13:     add             %o0, 1, %o0                     /* IEU0                         */
239         add             %o1, 1, %o1                     /* IEU1                         */
240         andcc           %o3, 0xff, %g0                  /* IEU1         Group           */
241         be,pn           %icc, 4b                        /* CTI                          */
243          lduba          [%o1] ASI_PNF, %o3              /* Load                         */
244         andcc           %o0, 7, %g0                     /* IEU1         Group           */
245         bne,a,pt        %icc, 13b                       /* CTI                          */
246          stb            %o3, [%o0]                      /* Store                        */
248         andcc           %o1, 7, %g3                     /* IEU1         Group           */
249         be,a,pt         %icc, 1b                        /* CTI                          */
250          ldx            [%o1], %o3                      /* Load                         */
251         orcc            %g0, 64, %g4                    /* IEU1         Group           */
253 14:     sllx            %g3, 3, %g5                     /* IEU0                         */
254         sub             %o1, %g3, %o1                   /* IEU0         Group           */
255         sub             %g4, %g5, %g4                   /* IEU1                         */
256                                                         /* %g1 = 0101010101010101       *
257                                                          * %g2 = 8080808080808080       *
258                                                          * %g3 = source alignment       *
259                                                          * %g5 = number of bits to shift left  *
260                                                          * %g4 = number of bits to shift right */
261         ldxa            [%o1] ASI_PNF, %o5              /* Load         Group           */
263         addcc           %o1, 8, %o1                     /* IEU1                         */
264 15:     sllx            %o5, %g5, %o3                   /* IEU0         Group           */
265         ldxa            [%o1] ASI_PNF, %o5              /* Load                         */
266         srlx            %o5, %g4, %o4                   /* IEU0         Group           */
268         add             %o0, 8, %o0                     /* IEU1                         */
269         or              %o3, %o4, %o3                   /* IEU0         Group           */
270         add             %o1, 8, %o1                     /* IEU1                         */
271         sub             %o3, %g1, %o4                   /* IEU0         Group           */
273 #ifdef EIGHTBIT_NOT_RARE
274         andn            %o4, %o3, %o4                   /* IEU0         Group           */
275 #endif
276         andcc           %o4, %g2, %g0                   /* IEU1         Group           */
277         be,a,pt         %xcc, 15b                       /* CTI                          */
278          stx            %o3, [%o0 - 8]                  /* Store                        */
279         srlx            %o3, 56, %o4                    /* IEU0         Group           */
281         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
282         be,pn           %icc, 22f                       /* CTI                          */
283          srlx           %o3, 48, %o4                    /* IEU0                         */
284         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
286         be,pn           %icc, 21f                       /* CTI                          */
287          srlx           %o3, 40, %o4                    /* IEU0                         */
288         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
289         be,pn           %icc, 20f                       /* CTI                          */
291          srlx           %o3, 32, %o4                    /* IEU0                         */
292         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
293         be,pn           %icc, 19f                       /* CTI                          */
294          srlx           %o3, 24, %o4                    /* IEU0                         */
296         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
297         be,pn           %icc, 18f                       /* CTI                          */
298          srlx           %o3, 16, %o4                    /* IEU0                         */
299         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
301         be,pn           %icc, 17f                       /* CTI                          */
302          srlx           %o3, 8, %o4                     /* IEU0                         */
303         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
304         be,pn           %icc, 16f                       /* CTI                          */
306          andcc          %o3, 0xff, %g0                  /* IEU1         Group           */
307         bne,pn          %icc, 15b                       /* CTI                          */
308          stx            %o3, [%o0 - 8]                  /* Store                        */
309         retl                                            /* CTI+IEU1     Group           */
311          mov            %g6, %o0                        /* IEU0                         */
313         .align          16
314 16:     srlx            %o3, 8, %o4                     /* IEU0         Group           */
315         stb             %o4, [%o0 - 2]                  /* Store                        */
316 17:     srlx            %o3, 16, %o4                    /* IEU0         Group           */
317         stb             %o4, [%o0 - 3]                  /* Store                        */
319 18:     srlx            %o3, 24, %o4                    /* IEU0         Group           */
320         stb             %o4, [%o0 - 4]                  /* Store                        */
321 19:     srlx            %o3, 32, %o4                    /* IEU0         Group           */
322         stw             %o4, [%o0 - 8]                  /* Store                        */
324         retl                                            /* CTI+IEU1     Group           */
325          mov            %g6, %o0                        /* IEU0                         */
326         nop
327         nop
329 20:     srlx            %o3, 40, %o4                    /* IEU0         Group           */
330         stb             %o4, [%o0 - 6]                  /* Store                        */
331 21:     srlx            %o3, 48, %o4                    /* IEU0         Group           */
332         stb             %o4, [%o0 - 7]                  /* Store                        */
334 22:     srlx            %o3, 56, %o4                    /* IEU0         Group           */
335         stb             %o4, [%o0 - 8]                  /* Store                        */
336         retl                                            /* CTI+IEU1     Group           */
337          mov            %g6, %o0                        /* IEU0                         */
338 END(strcat)
339 libc_hidden_builtin_def (strcat)