1 /* strcat (dest, src) -- Append SRC on the end of DEST.
3 Copyright (C) 1998-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 ldub [%o0], %o3 /* Load */
51 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
52 mov %o0, %g6 /* IEU1 */
54 sllx %g1, 32, %g2 /* IEU0 Group */
55 andcc %o0, 7, %g0 /* IEU1 */
56 or %g1, %g2, %g1 /* IEU0 Group */
57 bne,pn %icc, 32f /* CTI */
59 sllx %g1, 7, %g2 /* IEU0 Group */
60 brz,pn %o3, 30f /* CTI+IEU1 */
61 ldx [%o0], %o3 /* Load */
62 48: add %o0, 8, %o0 /* IEU0 Group */
64 49: sub %o3, %g1, %o2 /* IEU0 Group */
65 #ifdef EIGHTBIT_NOT_RARE
66 andn %o2, %o3, %g5 /* IEU0 Group */
67 ldxa [%o0] ASI_PNF, %o3 /* Load */
68 andcc %g5, %g2, %g0 /* IEU1 Group */
70 ldxa [%o0] ASI_PNF, %o3 /* Load */
71 andcc %o2, %g2, %g0 /* IEU1 Group */
73 be,pt %xcc, 49b /* CTI */
75 add %o0, 8, %o0 /* IEU0 */
76 addcc %o2, %g1, %g3 /* IEU1 Group */
77 srlx %o2, 32, %o2 /* IEU0 */
78 50: andcc %o2, %g2, %g0 /* IEU1 Group */
80 be,pn %xcc, 51f /* CTI */
81 srlx %g3, 56, %o2 /* IEU0 */
82 andcc %o2, 0xff, %g0 /* IEU1 Group */
83 be,pn %icc, 29f /* CTI */
85 srlx %g3, 48, %o2 /* IEU0 */
86 andcc %o2, 0xff, %g0 /* IEU1 Group */
87 be,pn %icc, 28f /* CTI */
88 srlx %g3, 40, %o2 /* IEU0 */
90 andcc %o2, 0xff, %g0 /* IEU1 Group */
91 be,pn %icc, 27f /* CTI */
92 srlx %g3, 32, %o2 /* IEU0 */
93 andcc %o2, 0xff, %g0 /* IEU1 Group */
95 be,pn %icc, 26f /* CTI */
96 51: srlx %g3, 24, %o2 /* IEU0 */
97 andcc %o2, 0xff, %g0 /* IEU1 Group */
98 be,pn %icc, 25f /* CTI */
100 srlx %g3, 16, %o2 /* IEU0 */
101 andcc %o2, 0xff, %g0 /* IEU1 Group */
102 be,pn %icc, 24f /* CTI */
103 srlx %g3, 8, %o2 /* IEU0 */
105 andcc %o2, 0xff, %g0 /* IEU1 Group */
106 be,pn %icc, 23f /* CTI */
107 sub %o3, %g1, %o2 /* IEU0 */
108 andcc %g3, 0xff, %g0 /* IEU1 Group */
110 be,pn %icc, 52f /* CTI */
111 ldxa [%o0] ASI_PNF, %o3 /* Load */
112 andcc %o2, %g2, %g0 /* IEU1 Group */
113 be,pt %xcc, 49b /* CTI */
115 add %o0, 8, %o0 /* IEU0 */
116 addcc %o2, %g1, %g3 /* IEU1 Group */
117 ba,pt %xcc, 50b /* CTI */
118 srlx %o2, 32, %o2 /* IEU0 */
121 52: ba,pt %xcc, 12f /* CTI Group */
122 add %o0, -9, %o0 /* IEU0 */
123 23: ba,pt %xcc, 12f /* CTI Group */
124 add %o0, -10, %o0 /* IEU0 */
126 24: ba,pt %xcc, 12f /* CTI Group */
127 add %o0, -11, %o0 /* IEU0 */
128 25: ba,pt %xcc, 12f /* CTI Group */
129 add %o0, -12, %o0 /* IEU0 */
131 26: ba,pt %xcc, 12f /* CTI Group */
132 add %o0, -13, %o0 /* IEU0 */
133 27: ba,pt %xcc, 12f /* CTI Group */
134 add %o0, -14, %o0 /* IEU0 */
136 28: ba,pt %xcc, 12f /* CTI Group */
137 add %o0, -15, %o0 /* IEU0 */
138 29: add %o0, -16, %o0 /* IEU0 Group */
139 30: andcc %o1, 7, %g3 /* IEU1 */
141 31: bne,pn %icc, 14f /* CTI */
142 orcc %g0, 64, %g4 /* IEU1 Group */
143 1: ldx [%o1], %o3 /* Load */
144 add %o1, 8, %o1 /* IEU1 */
146 2: mov %o3, %g3 /* IEU0 Group */
147 3: sub %o3, %g1, %o2 /* IEU1 */
148 ldxa [%o1] ASI_PNF, %o3 /* Load */
149 #ifdef EIGHTBIT_NOT_RARE
150 andn %o2, %g3, %o2 /* IEU0 Group */
152 add %o0, 8, %o0 /* IEU0 Group */
154 andcc %o2, %g2, %g0 /* IEU1 */
155 add %o1, 8, %o1 /* IEU0 Group */
156 be,a,pt %xcc, 2b /* CTI */
157 stx %g3, [%o0 - 8] /* Store */
159 srlx %g3, 56, %g5 /* IEU0 Group */
160 andcc %g5, 0xff, %g0 /* IEU1 Group */
161 be,pn %icc, 11f /* CTI */
162 srlx %g3, 48, %g4 /* IEU0 */
164 andcc %g4, 0xff, %g0 /* IEU1 Group */
165 be,pn %icc, 10f /* CTI */
166 srlx %g3, 40, %g5 /* IEU0 */
167 andcc %g5, 0xff, %g0 /* IEU1 Group */
169 be,pn %icc, 9f /* CTI */
170 srlx %g3, 32, %g4 /* IEU0 */
171 andcc %g4, 0xff, %g0 /* IEU1 Group */
172 be,pn %icc, 8f /* CTI */
174 srlx %g3, 24, %g5 /* IEU0 */
175 andcc %g5, 0xff, %g0 /* IEU1 Group */
176 be,pn %icc, 7f /* CTI */
177 srlx %g3, 16, %g4 /* IEU0 */
179 andcc %g4, 0xff, %g0 /* IEU1 Group */
180 be,pn %icc, 6f /* CTI */
181 srlx %g3, 8, %g5 /* IEU0 */
182 andcc %g5, 0xff, %g0 /* IEU1 Group */
184 be,pn %icc, 5f /* CTI */
185 sub %o3, %g1, %o2 /* IEU0 */
186 stx %g3, [%o0 - 8] /* Store Group */
187 andcc %g3, 0xff, %g0 /* IEU1 */
189 bne,pt %icc, 3b /* CTI */
190 mov %o3, %g3 /* IEU0 Group */
191 4: retl /* CTI+IEU1 Group */
192 mov %g6, %o0 /* IEU0 */
195 5: stb %g5, [%o0 - 2] /* Store Group */
196 srlx %g3, 16, %g4 /* IEU0 */
197 6: sth %g4, [%o0 - 4] /* Store Group */
198 srlx %g3, 32, %g4 /* IEU0 */
200 stw %g4, [%o0 - 8] /* Store Group */
201 retl /* CTI+IEU1 Group */
202 mov %g6, %o0 /* IEU0 */
203 7: stb %g5, [%o0 - 4] /* Store Group */
205 srlx %g3, 32, %g4 /* IEU0 */
206 8: stw %g4, [%o0 - 8] /* Store Group */
207 retl /* CTI+IEU1 Group */
208 mov %g6, %o0 /* IEU0 */
210 9: stb %g5, [%o0 - 6] /* Store Group */
211 srlx %g3, 48, %g4 /* IEU0 */
212 10: sth %g4, [%o0 - 8] /* Store Group */
213 retl /* CTI+IEU1 Group */
215 mov %g6, %o0 /* IEU0 */
216 11: stb %g5, [%o0 - 8] /* Store Group */
217 retl /* CTI+IEU1 Group */
218 mov %g6, %o0 /* IEU0 */
221 32: andcc %o0, 7, %g0 /* IEU1 Group */
222 be,a,pn %icc, 48b /* CTI */
223 ldx [%o0], %o3 /* Load */
224 add %o0, 1, %o0 /* IEU0 Group */
226 brnz,a,pt %o3, 32b /* CTI+IEU1 */
227 lduba [%o0] ASI_PNF, %o3 /* Load */
228 add %o0, -1, %o0 /* IEU0 Group */
229 andcc %o0, 7, %g0 /* IEU1 Group */
231 be,a,pn %icc, 31b /* CTI */
232 andcc %o1, 7, %g3 /* IEU1 Group */
233 12: ldub [%o1], %o3 /* Load */
234 stb %o3, [%o0] /* Store Group */
236 13: add %o0, 1, %o0 /* IEU0 */
237 add %o1, 1, %o1 /* IEU1 */
238 andcc %o3, 0xff, %g0 /* IEU1 Group */
239 be,pn %icc, 4b /* CTI */
241 lduba [%o1] ASI_PNF, %o3 /* Load */
242 andcc %o0, 7, %g0 /* IEU1 Group */
243 bne,a,pt %icc, 13b /* CTI */
244 stb %o3, [%o0] /* Store */
246 andcc %o1, 7, %g3 /* IEU1 Group */
247 be,a,pt %icc, 1b /* CTI */
248 ldx [%o1], %o3 /* Load */
249 orcc %g0, 64, %g4 /* IEU1 Group */
251 14: sllx %g3, 3, %g5 /* IEU0 */
252 sub %o1, %g3, %o1 /* IEU0 Group */
253 sub %g4, %g5, %g4 /* IEU1 */
254 /* %g1 = 0101010101010101 *
255 * %g2 = 8080808080808080 *
256 * %g3 = source alignment *
257 * %g5 = number of bits to shift left *
258 * %g4 = number of bits to shift right */
259 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
261 addcc %o1, 8, %o1 /* IEU1 */
262 15: sllx %o5, %g5, %o3 /* IEU0 Group */
263 ldxa [%o1] ASI_PNF, %o5 /* Load */
264 srlx %o5, %g4, %o4 /* IEU0 Group */
266 add %o0, 8, %o0 /* IEU1 */
267 or %o3, %o4, %o3 /* IEU0 Group */
268 add %o1, 8, %o1 /* IEU1 */
269 sub %o3, %g1, %o4 /* IEU0 Group */
271 #ifdef EIGHTBIT_NOT_RARE
272 andn %o4, %o3, %o4 /* IEU0 Group */
274 andcc %o4, %g2, %g0 /* IEU1 Group */
275 be,a,pt %xcc, 15b /* CTI */
276 stx %o3, [%o0 - 8] /* Store */
277 srlx %o3, 56, %o4 /* IEU0 Group */
279 andcc %o4, 0xff, %g0 /* IEU1 Group */
280 be,pn %icc, 22f /* CTI */
281 srlx %o3, 48, %o4 /* IEU0 */
282 andcc %o4, 0xff, %g0 /* IEU1 Group */
284 be,pn %icc, 21f /* CTI */
285 srlx %o3, 40, %o4 /* IEU0 */
286 andcc %o4, 0xff, %g0 /* IEU1 Group */
287 be,pn %icc, 20f /* CTI */
289 srlx %o3, 32, %o4 /* IEU0 */
290 andcc %o4, 0xff, %g0 /* IEU1 Group */
291 be,pn %icc, 19f /* CTI */
292 srlx %o3, 24, %o4 /* IEU0 */
294 andcc %o4, 0xff, %g0 /* IEU1 Group */
295 be,pn %icc, 18f /* CTI */
296 srlx %o3, 16, %o4 /* IEU0 */
297 andcc %o4, 0xff, %g0 /* IEU1 Group */
299 be,pn %icc, 17f /* CTI */
300 srlx %o3, 8, %o4 /* IEU0 */
301 andcc %o4, 0xff, %g0 /* IEU1 Group */
302 be,pn %icc, 16f /* CTI */
304 andcc %o3, 0xff, %g0 /* IEU1 Group */
305 bne,pn %icc, 15b /* CTI */
306 stx %o3, [%o0 - 8] /* Store */
307 retl /* CTI+IEU1 Group */
309 mov %g6, %o0 /* IEU0 */
312 16: srlx %o3, 8, %o4 /* IEU0 Group */
313 stb %o4, [%o0 - 2] /* Store */
314 17: srlx %o3, 16, %o4 /* IEU0 Group */
315 stb %o4, [%o0 - 3] /* Store */
317 18: srlx %o3, 24, %o4 /* IEU0 Group */
318 stb %o4, [%o0 - 4] /* Store */
319 19: srlx %o3, 32, %o4 /* IEU0 Group */
320 stw %o4, [%o0 - 8] /* Store */
322 retl /* CTI+IEU1 Group */
323 mov %g6, %o0 /* IEU0 */
327 20: srlx %o3, 40, %o4 /* IEU0 Group */
328 stb %o4, [%o0 - 6] /* Store */
329 21: srlx %o3, 48, %o4 /* IEU0 Group */
330 stb %o4, [%o0 - 7] /* Store */
332 22: srlx %o3, 56, %o4 /* IEU0 Group */
333 stb %o4, [%o0 - 8] /* Store */
334 retl /* CTI+IEU1 Group */
335 mov %g6, %o0 /* IEU0 */
337 libc_hidden_builtin_def (strcat)