1 /* strcat (dest, src) -- Append SRC on the end of DEST.
3 Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
6 Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
26 .register %g2, #scratch
27 .register %g3, #scratch
28 .register %g6, #scratch
31 /* Normally, this uses
32 ((xword - 0x0101010101010101) & 0x8080808080808080) test
33 to find out if any byte in xword could be zero. This is fast, but
34 also gives false alarm for any byte in range 0x81-0xff. It does
35 not matter for correctness, as if this test tells us there could
36 be some zero byte, we check it byte by byte, but if bytes with
37 high bits set are common in the strings, then this will give poor
38 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
39 will use one tick slower, but more precise test
40 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
41 which does not give any false alarms (but if some bits are set,
42 one cannot assume from it which bytes are zero and which are not).
43 It is yet to be measured, what is the correct default for glibc
44 in these days for an average user.
50 sethi %hi(0x01010101), %g1 /* IEU0 Group */
51 ldub [%o0], %o3 /* Load */
52 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
53 mov %o0, %g6 /* IEU1 */
55 sllx %g1, 32, %g2 /* IEU0 Group */
56 andcc %o0, 7, %g0 /* IEU1 */
57 or %g1, %g2, %g1 /* IEU0 Group */
58 bne,pn %icc, 32f /* CTI */
60 sllx %g1, 7, %g2 /* IEU0 Group */
61 brz,pn %o3, 30f /* CTI+IEU1 */
62 ldx [%o0], %o3 /* Load */
63 48: add %o0, 8, %o0 /* IEU0 Group */
65 49: sub %o3, %g1, %o2 /* IEU0 Group */
66 #ifdef EIGHTBIT_NOT_RARE
67 andn %o2, %o3, %g5 /* IEU0 Group */
68 ldxa [%o0] ASI_PNF, %o3 /* Load */
69 andcc %g5, %g2, %g0 /* IEU1 Group */
71 ldxa [%o0] ASI_PNF, %o3 /* Load */
72 andcc %o2, %g2, %g0 /* IEU1 Group */
74 be,pt %xcc, 49b /* CTI */
76 add %o0, 8, %o0 /* IEU0 */
77 addcc %o2, %g1, %g3 /* IEU1 Group */
78 srlx %o2, 32, %o2 /* IEU0 */
79 50: andcc %o2, %g2, %g0 /* IEU1 Group */
81 be,pn %xcc, 51f /* CTI */
82 srlx %g3, 56, %o2 /* IEU0 */
83 andcc %o2, 0xff, %g0 /* IEU1 Group */
84 be,pn %icc, 29f /* CTI */
86 srlx %g3, 48, %o2 /* IEU0 */
87 andcc %o2, 0xff, %g0 /* IEU1 Group */
88 be,pn %icc, 28f /* CTI */
89 srlx %g3, 40, %o2 /* IEU0 */
91 andcc %o2, 0xff, %g0 /* IEU1 Group */
92 be,pn %icc, 27f /* CTI */
93 srlx %g3, 32, %o2 /* IEU0 */
94 andcc %o2, 0xff, %g0 /* IEU1 Group */
96 be,pn %icc, 26f /* CTI */
97 51: srlx %g3, 24, %o2 /* IEU0 */
98 andcc %o2, 0xff, %g0 /* IEU1 Group */
99 be,pn %icc, 25f /* CTI */
101 srlx %g3, 16, %o2 /* IEU0 */
102 andcc %o2, 0xff, %g0 /* IEU1 Group */
103 be,pn %icc, 24f /* CTI */
104 srlx %g3, 8, %o2 /* IEU0 */
106 andcc %o2, 0xff, %g0 /* IEU1 Group */
107 be,pn %icc, 23f /* CTI */
108 sub %o3, %g1, %o2 /* IEU0 */
109 andcc %g3, 0xff, %g0 /* IEU1 Group */
111 be,pn %icc, 52f /* CTI */
112 ldxa [%o0] ASI_PNF, %o3 /* Load */
113 andcc %o2, %g2, %g0 /* IEU1 Group */
114 be,pt %xcc, 49b /* CTI */
116 add %o0, 8, %o0 /* IEU0 */
117 addcc %o2, %g1, %g3 /* IEU1 Group */
118 ba,pt %xcc, 50b /* CTI */
119 srlx %o2, 32, %o2 /* IEU0 */
122 52: ba,pt %xcc, 12f /* CTI Group */
123 add %o0, -9, %o0 /* IEU0 */
124 23: ba,pt %xcc, 12f /* CTI Group */
125 add %o0, -10, %o0 /* IEU0 */
127 24: ba,pt %xcc, 12f /* CTI Group */
128 add %o0, -11, %o0 /* IEU0 */
129 25: ba,pt %xcc, 12f /* CTI Group */
130 add %o0, -12, %o0 /* IEU0 */
132 26: ba,pt %xcc, 12f /* CTI Group */
133 add %o0, -13, %o0 /* IEU0 */
134 27: ba,pt %xcc, 12f /* CTI Group */
135 add %o0, -14, %o0 /* IEU0 */
137 28: ba,pt %xcc, 12f /* CTI Group */
138 add %o0, -15, %o0 /* IEU0 */
139 29: add %o0, -16, %o0 /* IEU0 Group */
140 30: andcc %o1, 7, %g3 /* IEU1 */
142 31: bne,pn %icc, 14f /* CTI */
143 orcc %g0, 64, %g4 /* IEU1 Group */
144 1: ldx [%o1], %o3 /* Load */
145 add %o1, 8, %o1 /* IEU1 */
147 2: mov %o3, %g3 /* IEU0 Group */
148 3: sub %o3, %g1, %o2 /* IEU1 */
149 ldxa [%o1] ASI_PNF, %o3 /* Load */
150 #ifdef EIGHTBIT_NOT_RARE
151 andn %o2, %g3, %o2 /* IEU0 Group */
153 add %o0, 8, %o0 /* IEU0 Group */
155 andcc %o2, %g2, %g0 /* IEU1 */
156 add %o1, 8, %o1 /* IEU0 Group */
157 be,a,pt %xcc, 2b /* CTI */
158 stx %g3, [%o0 - 8] /* Store */
160 srlx %g3, 56, %g5 /* IEU0 Group */
161 andcc %g5, 0xff, %g0 /* IEU1 Group */
162 be,pn %icc, 11f /* CTI */
163 srlx %g3, 48, %g4 /* IEU0 */
165 andcc %g4, 0xff, %g0 /* IEU1 Group */
166 be,pn %icc, 10f /* CTI */
167 srlx %g3, 40, %g5 /* IEU0 */
168 andcc %g5, 0xff, %g0 /* IEU1 Group */
170 be,pn %icc, 9f /* CTI */
171 srlx %g3, 32, %g4 /* IEU0 */
172 andcc %g4, 0xff, %g0 /* IEU1 Group */
173 be,pn %icc, 8f /* CTI */
175 srlx %g3, 24, %g5 /* IEU0 */
176 andcc %g5, 0xff, %g0 /* IEU1 Group */
177 be,pn %icc, 7f /* CTI */
178 srlx %g3, 16, %g4 /* IEU0 */
180 andcc %g4, 0xff, %g0 /* IEU1 Group */
181 be,pn %icc, 6f /* CTI */
182 srlx %g3, 8, %g5 /* IEU0 */
183 andcc %g5, 0xff, %g0 /* IEU1 Group */
185 be,pn %icc, 5f /* CTI */
186 sub %o3, %g1, %o2 /* IEU0 */
187 stx %g3, [%o0 - 8] /* Store Group */
188 andcc %g3, 0xff, %g0 /* IEU1 */
190 bne,pt %icc, 3b /* CTI */
191 mov %o3, %g3 /* IEU0 Group */
192 4: retl /* CTI+IEU1 Group */
193 mov %g6, %o0 /* IEU0 */
196 5: stb %g5, [%o0 - 2] /* Store Group */
197 srlx %g3, 16, %g4 /* IEU0 */
198 6: sth %g4, [%o0 - 4] /* Store Group */
199 srlx %g3, 32, %g4 /* IEU0 */
201 stw %g4, [%o0 - 8] /* Store Group */
202 retl /* CTI+IEU1 Group */
203 mov %g6, %o0 /* IEU0 */
204 7: stb %g5, [%o0 - 4] /* Store Group */
206 srlx %g3, 32, %g4 /* IEU0 */
207 8: stw %g4, [%o0 - 8] /* Store Group */
208 retl /* CTI+IEU1 Group */
209 mov %g6, %o0 /* IEU0 */
211 9: stb %g5, [%o0 - 6] /* Store Group */
212 srlx %g3, 48, %g4 /* IEU0 */
213 10: sth %g4, [%o0 - 8] /* Store Group */
214 retl /* CTI+IEU1 Group */
216 mov %g6, %o0 /* IEU0 */
217 11: stb %g5, [%o0 - 8] /* Store Group */
218 retl /* CTI+IEU1 Group */
219 mov %g6, %o0 /* IEU0 */
222 32: andcc %o0, 7, %g0 /* IEU1 Group */
223 be,a,pn %icc, 48b /* CTI */
224 ldx [%o0], %o3 /* Load */
225 add %o0, 1, %o0 /* IEU0 Group */
227 brnz,a,pt %o3, 32b /* CTI+IEU1 */
228 lduba [%o0] ASI_PNF, %o3 /* Load */
229 add %o0, -1, %o0 /* IEU0 Group */
230 andcc %o0, 7, %g0 /* IEU1 Group */
232 be,a,pn %icc, 31b /* CTI */
233 andcc %o1, 7, %g3 /* IEU1 Group */
234 12: ldub [%o1], %o3 /* Load */
235 stb %o3, [%o0] /* Store Group */
237 13: add %o0, 1, %o0 /* IEU0 */
238 add %o1, 1, %o1 /* IEU1 */
239 andcc %o3, 0xff, %g0 /* IEU1 Group */
240 be,pn %icc, 4b /* CTI */
242 lduba [%o1] ASI_PNF, %o3 /* Load */
243 andcc %o0, 7, %g0 /* IEU1 Group */
244 bne,a,pt %icc, 13b /* CTI */
245 stb %o3, [%o0] /* Store */
247 andcc %o1, 7, %g3 /* IEU1 Group */
248 be,a,pt %icc, 1b /* CTI */
249 ldx [%o1], %o3 /* Load */
250 orcc %g0, 64, %g4 /* IEU1 Group */
252 14: sllx %g3, 3, %g5 /* IEU0 */
253 sub %o1, %g3, %o1 /* IEU0 Group */
254 sub %g4, %g5, %g4 /* IEU1 */
255 /* %g1 = 0101010101010101 *
256 * %g2 = 8080808080808080 *
257 * %g3 = source alignment *
258 * %g5 = number of bits to shift left *
259 * %g4 = number of bits to shift right */
260 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
262 addcc %o1, 8, %o1 /* IEU1 */
263 15: sllx %o5, %g5, %o3 /* IEU0 Group */
264 ldxa [%o1] ASI_PNF, %o5 /* Load */
265 srlx %o5, %g4, %o4 /* IEU0 Group */
267 add %o0, 8, %o0 /* IEU1 */
268 or %o3, %o4, %o3 /* IEU0 Group */
269 add %o1, 8, %o1 /* IEU1 */
270 sub %o3, %g1, %o4 /* IEU0 Group */
272 #ifdef EIGHTBIT_NOT_RARE
273 andn %o4, %o3, %o4 /* IEU0 Group */
275 andcc %o4, %g2, %g0 /* IEU1 Group */
276 be,a,pt %xcc, 15b /* CTI */
277 stx %o3, [%o0 - 8] /* Store */
278 srlx %o3, 56, %o4 /* IEU0 Group */
280 andcc %o4, 0xff, %g0 /* IEU1 Group */
281 be,pn %icc, 22f /* CTI */
282 srlx %o3, 48, %o4 /* IEU0 */
283 andcc %o4, 0xff, %g0 /* IEU1 Group */
285 be,pn %icc, 21f /* CTI */
286 srlx %o3, 40, %o4 /* IEU0 */
287 andcc %o4, 0xff, %g0 /* IEU1 Group */
288 be,pn %icc, 20f /* CTI */
290 srlx %o3, 32, %o4 /* IEU0 */
291 andcc %o4, 0xff, %g0 /* IEU1 Group */
292 be,pn %icc, 19f /* CTI */
293 srlx %o3, 24, %o4 /* IEU0 */
295 andcc %o4, 0xff, %g0 /* IEU1 Group */
296 be,pn %icc, 18f /* CTI */
297 srlx %o3, 16, %o4 /* IEU0 */
298 andcc %o4, 0xff, %g0 /* IEU1 Group */
300 be,pn %icc, 17f /* CTI */
301 srlx %o3, 8, %o4 /* IEU0 */
302 andcc %o4, 0xff, %g0 /* IEU1 Group */
303 be,pn %icc, 16f /* CTI */
305 andcc %o3, 0xff, %g0 /* IEU1 Group */
306 bne,pn %icc, 15b /* CTI */
307 stx %o3, [%o0 - 8] /* Store */
308 retl /* CTI+IEU1 Group */
310 mov %g6, %o0 /* IEU0 */
313 16: srlx %o3, 8, %o4 /* IEU0 Group */
314 stb %o4, [%o0 - 2] /* Store */
315 17: srlx %o3, 16, %o4 /* IEU0 Group */
316 stb %o4, [%o0 - 3] /* Store */
318 18: srlx %o3, 24, %o4 /* IEU0 Group */
319 stb %o4, [%o0 - 4] /* Store */
320 19: srlx %o3, 32, %o4 /* IEU0 Group */
321 stw %o4, [%o0 - 8] /* Store */
323 retl /* CTI+IEU1 Group */
324 mov %g6, %o0 /* IEU0 */
328 20: srlx %o3, 40, %o4 /* IEU0 Group */
329 stb %o4, [%o0 - 6] /* Store */
330 21: srlx %o3, 48, %o4 /* IEU0 Group */
331 stb %o4, [%o0 - 7] /* Store */
333 22: srlx %o3, 56, %o4 /* IEU0 Group */
334 stb %o4, [%o0 - 8] /* Store */
335 retl /* CTI+IEU1 Group */
336 mov %g6, %o0 /* IEU0 */
338 libc_hidden_def(strcat)