2.9
[glibc/nacl-glibc.git] / sysdeps / sparc / sparc64 / strcat.S
blob43b3d6c176fed2e12f32102f1ae043de1e99bac8
1 /* strcat (dest, src) -- Append SRC on the end of DEST.
2    For SPARC v9.
3    Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
4    This file is part of the GNU C Library.
5    Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
6                   Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
8    The GNU C Library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public
10    License as published by the Free Software Foundation; either
11    version 2.1 of the License, or (at your option) any later version.
13    The GNU C Library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
18    You should have received a copy of the GNU Lesser General Public
19    License along with the GNU C Library; if not, write to the Free
20    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21    02111-1307 USA.  */
23 #include <sysdep.h>
24 #include <asm/asi.h>
25 #ifndef XCC
26 #define XCC xcc
27 #define USE_BPR
28         .register       %g2, #scratch
29         .register       %g3, #scratch
30         .register       %g6, #scratch
31 #endif
33         /* Normally, this uses
34            ((xword - 0x0101010101010101) & 0x8080808080808080) test
35            to find out if any byte in xword could be zero. This is fast, but
36            also gives false alarm for any byte in range 0x81-0xff. It does
37            not matter for correctness, as if this test tells us there could
38            be some zero byte, we check it byte by byte, but if bytes with
39            high bits set are common in the strings, then this will give poor
40            performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
41            will use one tick slower, but more precise test
42            ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
43            which does not give any false alarms (but if some bits are set,
44            one cannot assume from it which bytes are zero and which are not).
45            It is yet to be measured, what is the correct default for glibc
46            in these days for an average user.
47          */
49         .text
50         .align          32
51 ENTRY(strcat)
52         sethi           %hi(0x01010101), %g1            /* IEU0         Group           */
53         ldub            [%o0], %o3                      /* Load                         */
54         or              %g1, %lo(0x01010101), %g1       /* IEU0         Group           */
55         mov             %o0, %g6                        /* IEU1                         */
57         sllx            %g1, 32, %g2                    /* IEU0         Group           */
58         andcc           %o0, 7, %g0                     /* IEU1                         */
59         or              %g1, %g2, %g1                   /* IEU0         Group           */
60         bne,pn          %icc, 32f                       /* CTI                          */
62          sllx           %g1, 7, %g2                     /* IEU0         Group           */
63         brz,pn          %o3, 30f                        /* CTI+IEU1                     */
64          ldx            [%o0], %o3                      /* Load                         */
65 48:     add             %o0, 8, %o0                     /* IEU0         Group           */
67 49:     sub             %o3, %g1, %o2                   /* IEU0         Group           */
68 #ifdef EIGHTBIT_NOT_RARE
69         andn            %o2, %o3, %g5                   /* IEU0         Group           */
70         ldxa            [%o0] ASI_PNF, %o3              /* Load                         */
71         andcc           %g5, %g2, %g0                   /* IEU1         Group           */
72 #else
73         ldxa            [%o0] ASI_PNF, %o3              /* Load                         */
74         andcc           %o2, %g2, %g0                   /* IEU1         Group           */
75 #endif
76         be,pt           %xcc, 49b                       /* CTI                          */
78          add            %o0, 8, %o0                     /* IEU0                         */
79         addcc           %o2, %g1, %g3                   /* IEU1         Group           */
80         srlx            %o2, 32, %o2                    /* IEU0                         */
81 50:     andcc           %o2, %g2, %g0                   /* IEU1         Group           */
83         be,pn           %xcc, 51f                       /* CTI                          */
84          srlx           %g3, 56, %o2                    /* IEU0                         */
85         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
86         be,pn           %icc, 29f                       /* CTI                          */
88          srlx           %g3, 48, %o2                    /* IEU0                         */
89         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
90         be,pn           %icc, 28f                       /* CTI                          */
91          srlx           %g3, 40, %o2                    /* IEU0                         */
93         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
94         be,pn           %icc, 27f                       /* CTI                          */
95          srlx           %g3, 32, %o2                    /* IEU0                         */
96         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
98         be,pn           %icc, 26f                       /* CTI                          */
99 51:      srlx           %g3, 24, %o2                    /* IEU0                         */
100         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
101         be,pn           %icc, 25f                       /* CTI                          */
103          srlx           %g3, 16, %o2                    /* IEU0                         */
104         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
105         be,pn           %icc, 24f                       /* CTI                          */
106          srlx           %g3, 8, %o2                     /* IEU0                         */
108         andcc           %o2, 0xff, %g0                  /* IEU1         Group           */
109         be,pn           %icc, 23f                       /* CTI                          */
110          sub            %o3, %g1, %o2                   /* IEU0                         */
111         andcc           %g3, 0xff, %g0                  /* IEU1         Group           */
113         be,pn           %icc, 52f                       /* CTI                          */
114          ldxa           [%o0] ASI_PNF, %o3              /* Load                         */
115         andcc           %o2, %g2, %g0                   /* IEU1         Group           */
116         be,pt           %xcc, 49b                       /* CTI                          */
118          add            %o0, 8, %o0                     /* IEU0                         */
119         addcc           %o2, %g1, %g3                   /* IEU1         Group           */
120         ba,pt           %xcc, 50b                       /* CTI                          */
121          srlx           %o2, 32, %o2                    /* IEU0                         */
123         .align          16
124 52:     ba,pt           %xcc, 12f                       /* CTI          Group           */
125          add            %o0, -9, %o0                    /* IEU0                         */
126 23:     ba,pt           %xcc, 12f                       /* CTI          Group           */
127          add            %o0, -10, %o0                   /* IEU0                         */
129 24:     ba,pt           %xcc, 12f                       /* CTI          Group           */
130          add            %o0, -11, %o0                   /* IEU0                         */
131 25:     ba,pt           %xcc, 12f                       /* CTI          Group           */
132          add            %o0, -12, %o0                   /* IEU0                         */
134 26:     ba,pt           %xcc, 12f                       /* CTI          Group           */
135          add            %o0, -13, %o0                   /* IEU0                         */
136 27:     ba,pt           %xcc, 12f                       /* CTI          Group           */
137          add            %o0, -14, %o0                   /* IEU0                         */
139 28:     ba,pt           %xcc, 12f                       /* CTI          Group           */
140          add            %o0, -15, %o0                   /* IEU0                         */
141 29:     add             %o0, -16, %o0                   /* IEU0         Group           */
142 30:     andcc           %o1, 7, %g3                     /* IEU1                         */
144 31:     bne,pn          %icc, 14f                       /* CTI                          */
145          orcc           %g0, 64, %g4                    /* IEU1         Group           */
146 1:      ldx             [%o1], %o3                      /* Load                         */
147         add             %o1, 8, %o1                     /* IEU1                         */
149 2:      mov             %o3, %g3                        /* IEU0         Group           */
150 3:      sub             %o3, %g1, %o2                   /* IEU1                         */
151         ldxa            [%o1] ASI_PNF, %o3              /* Load                         */
152 #ifdef EIGHTBIT_NOT_RARE
153         andn            %o2, %g3, %o2                   /* IEU0         Group           */
154 #endif
155         add             %o0, 8, %o0                     /* IEU0         Group           */
157         andcc           %o2, %g2, %g0                   /* IEU1                         */
158         add             %o1, 8, %o1                     /* IEU0         Group           */
159         be,a,pt         %xcc, 2b                        /* CTI                          */
160          stx            %g3, [%o0 - 8]                  /* Store                        */
162         srlx            %g3, 56, %g5                    /* IEU0         Group           */
163         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
164         be,pn           %icc, 11f                       /* CTI                          */
165          srlx           %g3, 48, %g4                    /* IEU0                         */
167         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
168         be,pn           %icc, 10f                       /* CTI                          */
169          srlx           %g3, 40, %g5                    /* IEU0                         */
170         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
172         be,pn           %icc, 9f                        /* CTI                          */
173          srlx           %g3, 32, %g4                    /* IEU0                         */
174         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
175         be,pn           %icc, 8f                        /* CTI                          */
177          srlx           %g3, 24, %g5                    /* IEU0                         */
178         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
179         be,pn           %icc, 7f                        /* CTI                          */
180          srlx           %g3, 16, %g4                    /* IEU0                         */
182         andcc           %g4, 0xff, %g0                  /* IEU1         Group           */
183         be,pn           %icc, 6f                        /* CTI                          */
184          srlx           %g3, 8, %g5                     /* IEU0                         */
185         andcc           %g5, 0xff, %g0                  /* IEU1         Group           */
187         be,pn           %icc, 5f                        /* CTI                          */
188          sub            %o3, %g1, %o2                   /* IEU0                         */
189         stx             %g3, [%o0 - 8]                  /* Store        Group           */
190         andcc           %g3, 0xff, %g0                  /* IEU1                         */
192         bne,pt          %icc, 3b                        /* CTI                          */
193          mov            %o3, %g3                        /* IEU0         Group           */
194 4:      retl                                            /* CTI+IEU1     Group           */
195          mov            %g6, %o0                        /* IEU0                         */
197         .align          16
198 5:      stb             %g5, [%o0 - 2]                  /* Store        Group           */
199         srlx            %g3, 16, %g4                    /* IEU0                         */
200 6:      sth             %g4, [%o0 - 4]                  /* Store        Group           */
201         srlx            %g3, 32, %g4                    /* IEU0                         */
203         stw             %g4, [%o0 - 8]                  /* Store        Group           */
204         retl                                            /* CTI+IEU1     Group           */
205          mov            %g6, %o0                        /* IEU0                         */
206 7:      stb             %g5, [%o0 - 4]                  /* Store        Group           */
208         srlx            %g3, 32, %g4                    /* IEU0                         */
209 8:      stw             %g4, [%o0 - 8]                  /* Store        Group           */
210         retl                                            /* CTI+IEU1     Group           */
211          mov            %g6, %o0                        /* IEU0                         */
213 9:      stb             %g5, [%o0 - 6]                  /* Store        Group           */
214         srlx            %g3, 48, %g4                    /* IEU0                         */
215 10:     sth             %g4, [%o0 - 8]                  /* Store        Group           */
216         retl                                            /* CTI+IEU1     Group           */
218          mov            %g6, %o0                        /* IEU0                         */
219 11:     stb             %g5, [%o0 - 8]                  /* Store        Group           */
220         retl                                            /* CTI+IEU1     Group           */
221          mov            %g6, %o0                        /* IEU0                         */
223         .align          16
224 32:     andcc           %o0, 7, %g0                     /* IEU1         Group           */
225         be,a,pn         %icc, 48b                       /* CTI                          */
226          ldx            [%o0], %o3                      /* Load                         */
227         add             %o0, 1, %o0                     /* IEU0         Group           */
229         brnz,a,pt       %o3, 32b                        /* CTI+IEU1                     */
230          lduba          [%o0] ASI_PNF, %o3              /* Load                         */
231         add             %o0, -1, %o0                    /* IEU0         Group           */
232         andcc           %o0, 7, %g0                     /* IEU1         Group           */
234         be,a,pn         %icc, 31b                       /* CTI                          */
235          andcc          %o1, 7, %g3                     /* IEU1         Group           */
236 12:     ldub            [%o1], %o3                      /* Load                         */
237         stb             %o3, [%o0]                      /* Store        Group           */
239 13:     add             %o0, 1, %o0                     /* IEU0                         */
240         add             %o1, 1, %o1                     /* IEU1                         */
241         andcc           %o3, 0xff, %g0                  /* IEU1         Group           */
242         be,pn           %icc, 4b                        /* CTI                          */
244          lduba          [%o1] ASI_PNF, %o3              /* Load                         */
245         andcc           %o0, 7, %g0                     /* IEU1         Group           */
246         bne,a,pt        %icc, 13b                       /* CTI                          */
247          stb            %o3, [%o0]                      /* Store                        */
249         andcc           %o1, 7, %g3                     /* IEU1         Group           */
250         be,a,pt         %icc, 1b                        /* CTI                          */
251          ldx            [%o1], %o3                      /* Load                         */
252         orcc            %g0, 64, %g4                    /* IEU1         Group           */
254 14:     sllx            %g3, 3, %g5                     /* IEU0                         */
255         sub             %o1, %g3, %o1                   /* IEU0         Group           */
256         sub             %g4, %g5, %g4                   /* IEU1                         */
257                                                         /* %g1 = 0101010101010101       *
258                                                          * %g2 = 8080808080808080       *
259                                                          * %g3 = source alignment       *
260                                                          * %g5 = number of bits to shift left  *
261                                                          * %g4 = number of bits to shift right */
262         ldxa            [%o1] ASI_PNF, %o5              /* Load         Group           */
264         addcc           %o1, 8, %o1                     /* IEU1                         */
265 15:     sllx            %o5, %g5, %o3                   /* IEU0         Group           */
266         ldxa            [%o1] ASI_PNF, %o5              /* Load                         */
267         srlx            %o5, %g4, %o4                   /* IEU0         Group           */
269         add             %o0, 8, %o0                     /* IEU1                         */
270         or              %o3, %o4, %o3                   /* IEU0         Group           */
271         add             %o1, 8, %o1                     /* IEU1                         */
272         sub             %o3, %g1, %o4                   /* IEU0         Group           */
274 #ifdef EIGHTBIT_NOT_RARE
275         andn            %o4, %o3, %o4                   /* IEU0         Group           */
276 #endif
277         andcc           %o4, %g2, %g0                   /* IEU1         Group           */
278         be,a,pt         %xcc, 15b                       /* CTI                          */
279          stx            %o3, [%o0 - 8]                  /* Store                        */
280         srlx            %o3, 56, %o4                    /* IEU0         Group           */
282         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
283         be,pn           %icc, 22f                       /* CTI                          */
284          srlx           %o3, 48, %o4                    /* IEU0                         */
285         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
287         be,pn           %icc, 21f                       /* CTI                          */
288          srlx           %o3, 40, %o4                    /* IEU0                         */
289         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
290         be,pn           %icc, 20f                       /* CTI                          */
292          srlx           %o3, 32, %o4                    /* IEU0                         */
293         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
294         be,pn           %icc, 19f                       /* CTI                          */
295          srlx           %o3, 24, %o4                    /* IEU0                         */
297         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
298         be,pn           %icc, 18f                       /* CTI                          */
299          srlx           %o3, 16, %o4                    /* IEU0                         */
300         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
302         be,pn           %icc, 17f                       /* CTI                          */
303          srlx           %o3, 8, %o4                     /* IEU0                         */
304         andcc           %o4, 0xff, %g0                  /* IEU1         Group           */
305         be,pn           %icc, 16f                       /* CTI                          */
307          andcc          %o3, 0xff, %g0                  /* IEU1         Group           */
308         bne,pn          %icc, 15b                       /* CTI                          */
309          stx            %o3, [%o0 - 8]                  /* Store                        */
310         retl                                            /* CTI+IEU1     Group           */
312          mov            %g6, %o0                        /* IEU0                         */
314         .align          16
315 16:     srlx            %o3, 8, %o4                     /* IEU0         Group           */
316         stb             %o4, [%o0 - 2]                  /* Store                        */
317 17:     srlx            %o3, 16, %o4                    /* IEU0         Group           */
318         stb             %o4, [%o0 - 3]                  /* Store                        */
320 18:     srlx            %o3, 24, %o4                    /* IEU0         Group           */
321         stb             %o4, [%o0 - 4]                  /* Store                        */
322 19:     srlx            %o3, 32, %o4                    /* IEU0         Group           */
323         stw             %o4, [%o0 - 8]                  /* Store                        */
325         retl                                            /* CTI+IEU1     Group           */
326          mov            %g6, %o0                        /* IEU0                         */
327         nop
328         nop
330 20:     srlx            %o3, 40, %o4                    /* IEU0         Group           */
331         stb             %o4, [%o0 - 6]                  /* Store                        */
332 21:     srlx            %o3, 48, %o4                    /* IEU0         Group           */
333         stb             %o4, [%o0 - 7]                  /* Store                        */
335 22:     srlx            %o3, 56, %o4                    /* IEU0         Group           */
336         stb             %o4, [%o0 - 8]                  /* Store                        */
337         retl                                            /* CTI+IEU1     Group           */
338          mov            %g6, %o0                        /* IEU0                         */
339 END(strcat)
340 libc_hidden_builtin_def (strcat)