1 /* strcat (dest, src) -- Append SRC on the end of DEST.
3 Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
6 Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 .register %g2, #scratch
29 .register %g3, #scratch
30 .register %g6, #scratch
33 /* Normally, this uses
34 ((xword - 0x0101010101010101) & 0x8080808080808080) test
35 to find out if any byte in xword could be zero. This is fast, but
36 also gives false alarm for any byte in range 0x81-0xff. It does
37 not matter for correctness, as if this test tells us there could
38 be some zero byte, we check it byte by byte, but if bytes with
39 high bits set are common in the strings, then this will give poor
40 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
41 will use one tick slower, but more precise test
42 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
43 which does not give any false alarms (but if some bits are set,
44 one cannot assume from it which bytes are zero and which are not).
45 It is yet to be measured, what is the correct default for glibc
46 in these days for an average user.
52 sethi %hi(0x01010101), %g1 /* IEU0 Group */
53 ldub [%o0], %o3 /* Load */
54 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
55 mov %o0, %g6 /* IEU1 */
57 sllx %g1, 32, %g2 /* IEU0 Group */
58 andcc %o0, 7, %g0 /* IEU1 */
59 or %g1, %g2, %g1 /* IEU0 Group */
60 bne,pn %icc, 32f /* CTI */
62 sllx %g1, 7, %g2 /* IEU0 Group */
63 brz,pn %o3, 30f /* CTI+IEU1 */
64 ldx [%o0], %o3 /* Load */
65 48: add %o0, 8, %o0 /* IEU0 Group */
67 49: sub %o3, %g1, %o2 /* IEU0 Group */
68 #ifdef EIGHTBIT_NOT_RARE
69 andn %o2, %o3, %g5 /* IEU0 Group */
70 ldxa [%o0] ASI_PNF, %o3 /* Load */
71 andcc %g5, %g2, %g0 /* IEU1 Group */
73 ldxa [%o0] ASI_PNF, %o3 /* Load */
74 andcc %o2, %g2, %g0 /* IEU1 Group */
76 be,pt %xcc, 49b /* CTI */
78 add %o0, 8, %o0 /* IEU0 */
79 addcc %o2, %g1, %g3 /* IEU1 Group */
80 srlx %o2, 32, %o2 /* IEU0 */
81 50: andcc %o2, %g2, %g0 /* IEU1 Group */
83 be,pn %xcc, 51f /* CTI */
84 srlx %g3, 56, %o2 /* IEU0 */
85 andcc %o2, 0xff, %g0 /* IEU1 Group */
86 be,pn %icc, 29f /* CTI */
88 srlx %g3, 48, %o2 /* IEU0 */
89 andcc %o2, 0xff, %g0 /* IEU1 Group */
90 be,pn %icc, 28f /* CTI */
91 srlx %g3, 40, %o2 /* IEU0 */
93 andcc %o2, 0xff, %g0 /* IEU1 Group */
94 be,pn %icc, 27f /* CTI */
95 srlx %g3, 32, %o2 /* IEU0 */
96 andcc %o2, 0xff, %g0 /* IEU1 Group */
98 be,pn %icc, 26f /* CTI */
99 51: srlx %g3, 24, %o2 /* IEU0 */
100 andcc %o2, 0xff, %g0 /* IEU1 Group */
101 be,pn %icc, 25f /* CTI */
103 srlx %g3, 16, %o2 /* IEU0 */
104 andcc %o2, 0xff, %g0 /* IEU1 Group */
105 be,pn %icc, 24f /* CTI */
106 srlx %g3, 8, %o2 /* IEU0 */
108 andcc %o2, 0xff, %g0 /* IEU1 Group */
109 be,pn %icc, 23f /* CTI */
110 sub %o3, %g1, %o2 /* IEU0 */
111 andcc %g3, 0xff, %g0 /* IEU1 Group */
113 be,pn %icc, 52f /* CTI */
114 ldxa [%o0] ASI_PNF, %o3 /* Load */
115 andcc %o2, %g2, %g0 /* IEU1 Group */
116 be,pt %xcc, 49b /* CTI */
118 add %o0, 8, %o0 /* IEU0 */
119 addcc %o2, %g1, %g3 /* IEU1 Group */
120 ba,pt %xcc, 50b /* CTI */
121 srlx %o2, 32, %o2 /* IEU0 */
124 52: ba,pt %xcc, 12f /* CTI Group */
125 add %o0, -9, %o0 /* IEU0 */
126 23: ba,pt %xcc, 12f /* CTI Group */
127 add %o0, -10, %o0 /* IEU0 */
129 24: ba,pt %xcc, 12f /* CTI Group */
130 add %o0, -11, %o0 /* IEU0 */
131 25: ba,pt %xcc, 12f /* CTI Group */
132 add %o0, -12, %o0 /* IEU0 */
134 26: ba,pt %xcc, 12f /* CTI Group */
135 add %o0, -13, %o0 /* IEU0 */
136 27: ba,pt %xcc, 12f /* CTI Group */
137 add %o0, -14, %o0 /* IEU0 */
139 28: ba,pt %xcc, 12f /* CTI Group */
140 add %o0, -15, %o0 /* IEU0 */
141 29: add %o0, -16, %o0 /* IEU0 Group */
142 30: andcc %o1, 7, %g3 /* IEU1 */
144 31: bne,pn %icc, 14f /* CTI */
145 orcc %g0, 64, %g4 /* IEU1 Group */
146 1: ldx [%o1], %o3 /* Load */
147 add %o1, 8, %o1 /* IEU1 */
149 2: mov %o3, %g3 /* IEU0 Group */
150 3: sub %o3, %g1, %o2 /* IEU1 */
151 ldxa [%o1] ASI_PNF, %o3 /* Load */
152 #ifdef EIGHTBIT_NOT_RARE
153 andn %o2, %g3, %o2 /* IEU0 Group */
155 add %o0, 8, %o0 /* IEU0 Group */
157 andcc %o2, %g2, %g0 /* IEU1 */
158 add %o1, 8, %o1 /* IEU0 Group */
159 be,a,pt %xcc, 2b /* CTI */
160 stx %g3, [%o0 - 8] /* Store */
162 srlx %g3, 56, %g5 /* IEU0 Group */
163 andcc %g5, 0xff, %g0 /* IEU1 Group */
164 be,pn %icc, 11f /* CTI */
165 srlx %g3, 48, %g4 /* IEU0 */
167 andcc %g4, 0xff, %g0 /* IEU1 Group */
168 be,pn %icc, 10f /* CTI */
169 srlx %g3, 40, %g5 /* IEU0 */
170 andcc %g5, 0xff, %g0 /* IEU1 Group */
172 be,pn %icc, 9f /* CTI */
173 srlx %g3, 32, %g4 /* IEU0 */
174 andcc %g4, 0xff, %g0 /* IEU1 Group */
175 be,pn %icc, 8f /* CTI */
177 srlx %g3, 24, %g5 /* IEU0 */
178 andcc %g5, 0xff, %g0 /* IEU1 Group */
179 be,pn %icc, 7f /* CTI */
180 srlx %g3, 16, %g4 /* IEU0 */
182 andcc %g4, 0xff, %g0 /* IEU1 Group */
183 be,pn %icc, 6f /* CTI */
184 srlx %g3, 8, %g5 /* IEU0 */
185 andcc %g5, 0xff, %g0 /* IEU1 Group */
187 be,pn %icc, 5f /* CTI */
188 sub %o3, %g1, %o2 /* IEU0 */
189 stx %g3, [%o0 - 8] /* Store Group */
190 andcc %g3, 0xff, %g0 /* IEU1 */
192 bne,pt %icc, 3b /* CTI */
193 mov %o3, %g3 /* IEU0 Group */
194 4: retl /* CTI+IEU1 Group */
195 mov %g6, %o0 /* IEU0 */
198 5: stb %g5, [%o0 - 2] /* Store Group */
199 srlx %g3, 16, %g4 /* IEU0 */
200 6: sth %g4, [%o0 - 4] /* Store Group */
201 srlx %g3, 32, %g4 /* IEU0 */
203 stw %g4, [%o0 - 8] /* Store Group */
204 retl /* CTI+IEU1 Group */
205 mov %g6, %o0 /* IEU0 */
206 7: stb %g5, [%o0 - 4] /* Store Group */
208 srlx %g3, 32, %g4 /* IEU0 */
209 8: stw %g4, [%o0 - 8] /* Store Group */
210 retl /* CTI+IEU1 Group */
211 mov %g6, %o0 /* IEU0 */
213 9: stb %g5, [%o0 - 6] /* Store Group */
214 srlx %g3, 48, %g4 /* IEU0 */
215 10: sth %g4, [%o0 - 8] /* Store Group */
216 retl /* CTI+IEU1 Group */
218 mov %g6, %o0 /* IEU0 */
219 11: stb %g5, [%o0 - 8] /* Store Group */
220 retl /* CTI+IEU1 Group */
221 mov %g6, %o0 /* IEU0 */
224 32: andcc %o0, 7, %g0 /* IEU1 Group */
225 be,a,pn %icc, 48b /* CTI */
226 ldx [%o0], %o3 /* Load */
227 add %o0, 1, %o0 /* IEU0 Group */
229 brnz,a,pt %o3, 32b /* CTI+IEU1 */
230 lduba [%o0] ASI_PNF, %o3 /* Load */
231 add %o0, -1, %o0 /* IEU0 Group */
232 andcc %o0, 7, %g0 /* IEU1 Group */
234 be,a,pn %icc, 31b /* CTI */
235 andcc %o1, 7, %g3 /* IEU1 Group */
236 12: ldub [%o1], %o3 /* Load */
237 stb %o3, [%o0] /* Store Group */
239 13: add %o0, 1, %o0 /* IEU0 */
240 add %o1, 1, %o1 /* IEU1 */
241 andcc %o3, 0xff, %g0 /* IEU1 Group */
242 be,pn %icc, 4b /* CTI */
244 lduba [%o1] ASI_PNF, %o3 /* Load */
245 andcc %o0, 7, %g0 /* IEU1 Group */
246 bne,a,pt %icc, 13b /* CTI */
247 stb %o3, [%o0] /* Store */
249 andcc %o1, 7, %g3 /* IEU1 Group */
250 be,a,pt %icc, 1b /* CTI */
251 ldx [%o1], %o3 /* Load */
252 orcc %g0, 64, %g4 /* IEU1 Group */
254 14: sllx %g3, 3, %g5 /* IEU0 */
255 sub %o1, %g3, %o1 /* IEU0 Group */
256 sub %g4, %g5, %g4 /* IEU1 */
257 /* %g1 = 0101010101010101 *
258 * %g2 = 8080808080808080 *
259 * %g3 = source alignment *
260 * %g5 = number of bits to shift left *
261 * %g4 = number of bits to shift right */
262 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
264 addcc %o1, 8, %o1 /* IEU1 */
265 15: sllx %o5, %g5, %o3 /* IEU0 Group */
266 ldxa [%o1] ASI_PNF, %o5 /* Load */
267 srlx %o5, %g4, %o4 /* IEU0 Group */
269 add %o0, 8, %o0 /* IEU1 */
270 or %o3, %o4, %o3 /* IEU0 Group */
271 add %o1, 8, %o1 /* IEU1 */
272 sub %o3, %g1, %o4 /* IEU0 Group */
274 #ifdef EIGHTBIT_NOT_RARE
275 andn %o4, %o3, %o4 /* IEU0 Group */
277 andcc %o4, %g2, %g0 /* IEU1 Group */
278 be,a,pt %xcc, 15b /* CTI */
279 stx %o3, [%o0 - 8] /* Store */
280 srlx %o3, 56, %o4 /* IEU0 Group */
282 andcc %o4, 0xff, %g0 /* IEU1 Group */
283 be,pn %icc, 22f /* CTI */
284 srlx %o3, 48, %o4 /* IEU0 */
285 andcc %o4, 0xff, %g0 /* IEU1 Group */
287 be,pn %icc, 21f /* CTI */
288 srlx %o3, 40, %o4 /* IEU0 */
289 andcc %o4, 0xff, %g0 /* IEU1 Group */
290 be,pn %icc, 20f /* CTI */
292 srlx %o3, 32, %o4 /* IEU0 */
293 andcc %o4, 0xff, %g0 /* IEU1 Group */
294 be,pn %icc, 19f /* CTI */
295 srlx %o3, 24, %o4 /* IEU0 */
297 andcc %o4, 0xff, %g0 /* IEU1 Group */
298 be,pn %icc, 18f /* CTI */
299 srlx %o3, 16, %o4 /* IEU0 */
300 andcc %o4, 0xff, %g0 /* IEU1 Group */
302 be,pn %icc, 17f /* CTI */
303 srlx %o3, 8, %o4 /* IEU0 */
304 andcc %o4, 0xff, %g0 /* IEU1 Group */
305 be,pn %icc, 16f /* CTI */
307 andcc %o3, 0xff, %g0 /* IEU1 Group */
308 bne,pn %icc, 15b /* CTI */
309 stx %o3, [%o0 - 8] /* Store */
310 retl /* CTI+IEU1 Group */
312 mov %g6, %o0 /* IEU0 */
315 16: srlx %o3, 8, %o4 /* IEU0 Group */
316 stb %o4, [%o0 - 2] /* Store */
317 17: srlx %o3, 16, %o4 /* IEU0 Group */
318 stb %o4, [%o0 - 3] /* Store */
320 18: srlx %o3, 24, %o4 /* IEU0 Group */
321 stb %o4, [%o0 - 4] /* Store */
322 19: srlx %o3, 32, %o4 /* IEU0 Group */
323 stw %o4, [%o0 - 8] /* Store */
325 retl /* CTI+IEU1 Group */
326 mov %g6, %o0 /* IEU0 */
330 20: srlx %o3, 40, %o4 /* IEU0 Group */
331 stb %o4, [%o0 - 6] /* Store */
332 21: srlx %o3, 48, %o4 /* IEU0 Group */
333 stb %o4, [%o0 - 7] /* Store */
335 22: srlx %o3, 56, %o4 /* IEU0 Group */
336 stb %o4, [%o0 - 8] /* Store */
337 retl /* CTI+IEU1 Group */
338 mov %g6, %o0 /* IEU0 */
340 libc_hidden_builtin_def (strcat)