1 /* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
3 Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
51 andcc %o0, 7, %g0 /* IEU1 */
52 sllx %g1, 32, %g2 /* IEU0 Group */
54 bne,pn %icc, 12f /* CTI */
55 andcc %o1, 7, %g3 /* IEU1 */
56 or %g1, %g2, %g1 /* IEU0 Group */
57 bne,pn %icc, 14f /* CTI */
59 sllx %g1, 7, %g2 /* IEU0 Group */
60 1: ldx [%o1], %o3 /* Load */
61 add %o1, 8, %o1 /* IEU1 */
62 2: mov %o3, %g3 /* IEU0 Group */
64 sub %o3, %g1, %o2 /* IEU1 */
65 3: ldxa [%o1] ASI_PNF, %o3 /* Load */
66 #ifdef EIGHTBIT_NOT_RARE
67 andn %o2, %g3, %o2 /* IEU0 Group */
69 add %o0, 8, %o0 /* IEU0 Group */
70 andcc %o2, %g2, %g0 /* IEU1 */
72 add %o1, 8, %o1 /* IEU0 Group */
73 be,a,pt %xcc, 2b /* CTI */
74 stx %g3, [%o0 - 8] /* Store */
75 srlx %g3, 56, %g5 /* IEU0 Group */
77 andcc %g5, 0xff, %g0 /* IEU1 Group */
78 be,pn %icc, 11f /* CTI */
79 srlx %g3, 48, %g4 /* IEU0 */
80 andcc %g4, 0xff, %g0 /* IEU1 Group */
82 be,pn %icc, 10f /* CTI */
83 srlx %g3, 40, %g5 /* IEU0 */
84 andcc %g5, 0xff, %g0 /* IEU1 Group */
85 be,pn %icc, 9f /* CTI */
87 srlx %g3, 32, %g4 /* IEU0 */
88 andcc %g4, 0xff, %g0 /* IEU1 Group */
89 be,pn %icc, 8f /* CTI */
90 srlx %g3, 24, %g5 /* IEU0 */
92 andcc %g5, 0xff, %g0 /* IEU1 Group */
93 be,pn %icc, 7f /* CTI */
94 srlx %g3, 16, %g4 /* IEU0 */
95 andcc %g4, 0xff, %g0 /* IEU1 Group */
97 be,pn %icc, 6f /* CTI */
98 srlx %g3, 8, %g5 /* IEU0 */
99 andcc %g5, 0xff, %g0 /* IEU1 Group */
100 be,pn %icc, 5f /* CTI */
102 sub %o3, %g1, %o2 /* IEU0 */
103 stx %g3, [%o0 - 8] /* Store Group */
104 andcc %g3, 0xff, %g0 /* IEU1 */
105 bne,pt %icc, 3b /* CTI */
107 mov %o3, %g3 /* IEU0 Group */
108 4: retl /* CTI+IEU1 Group */
109 sub %o0, 1, %o0 /* IEU0 */
112 6: ba,pt %xcc, 23f /* CTI Group */
113 sub %o0, 3, %g6 /* IEU0 */
114 5: sub %o0, 2, %g6 /* IEU0 Group */
115 stb %g5, [%o0 - 2] /* Store */
117 srlx %g3, 16, %g4 /* IEU0 Group */
118 23: sth %g4, [%o0 - 4] /* Store */
119 srlx %g3, 32, %g4 /* IEU0 Group */
120 stw %g4, [%o0 - 8] /* Store */
122 retl /* CTI+IEU1 Group */
123 mov %g6, %o0 /* IEU0 */
124 8: ba,pt %xcc, 24f /* CTI Group */
125 sub %o0, 5, %g6 /* IEU0 */
127 7: sub %o0, 4, %g6 /* IEU0 Group */
128 stb %g5, [%o0 - 4] /* Store */
129 srlx %g3, 32, %g4 /* IEU0 Group */
130 24: stw %g4, [%o0 - 8] /* Store */
132 retl /* CTI+IEU1 Group */
133 mov %g6, %o0 /* IEU0 */
134 10: ba,pt %xcc, 25f /* CTI Group */
135 sub %o0, 7, %g6 /* IEU0 */
137 9: sub %o0, 6, %g6 /* IEU0 Group */
138 stb %g5, [%o0 - 6] /* Store */
139 srlx %g3, 48, %g4 /* IEU0 */
140 25: sth %g4, [%o0 - 8] /* Store Group */
142 retl /* CTI+IEU1 Group */
143 mov %g6, %o0 /* IEU0 */
144 11: stb %g5, [%o0 - 8] /* Store Group */
145 retl /* CTI+IEU1 Group */
147 sub %o0, 8, %o0 /* IEU0 */
150 12: or %g1, %g2, %g1 /* IEU0 Group */
151 ldub [%o1], %o3 /* Load */
152 sllx %g1, 7, %g2 /* IEU0 Group */
153 stb %o3, [%o0] /* Store Group */
155 13: add %o0, 1, %o0 /* IEU0 */
156 add %o1, 1, %o1 /* IEU1 */
157 andcc %o3, 0xff, %g0 /* IEU1 Group */
158 be,pn %icc, 4b /* CTI */
160 lduba [%o1] ASI_PNF, %o3 /* Load */
161 andcc %o0, 7, %g0 /* IEU1 Group */
162 bne,a,pt %icc, 13b /* CTI */
163 stb %o3, [%o0] /* Store */
165 andcc %o1, 7, %g3 /* IEU1 Group */
166 be,a,pt %icc, 1b /* CTI */
167 ldx [%o1], %o3 /* Load */
168 14: orcc %g0, 64, %g4 /* IEU1 Group */
170 sllx %g3, 3, %g5 /* IEU0 */
171 sub %o1, %g3, %o1 /* IEU0 Group */
172 sub %g4, %g5, %g4 /* IEU1 */
173 /* %g1 = 0101010101010101 *
174 * %g2 = 8080808080808080 *
175 * %g3 = source alignment *
176 * %g5 = number of bits to shift left *
177 * %g4 = number of bits to shift right */
178 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
180 addcc %o1, 8, %o1 /* IEU1 */
181 15: sllx %o5, %g5, %o3 /* IEU0 Group */
182 ldxa [%o1] ASI_PNF, %o5 /* Load */
183 srlx %o5, %g4, %o4 /* IEU0 Group */
185 add %o0, 8, %o0 /* IEU1 */
186 or %o3, %o4, %o3 /* IEU0 Group */
187 add %o1, 8, %o1 /* IEU1 */
188 sub %o3, %g1, %o4 /* IEU0 Group */
190 #ifdef EIGHTBIT_NOT_RARE
191 andn %o4, %o3, %o4 /* IEU0 Group */
193 andcc %o4, %g2, %g0 /* IEU1 Group */
194 be,a,pt %xcc, 15b /* CTI */
195 stx %o3, [%o0 - 8] /* Store */
196 srlx %o3, 56, %o4 /* IEU0 Group */
198 andcc %o4, 0xff, %g0 /* IEU1 Group */
199 be,pn %icc, 22f /* CTI */
200 srlx %o3, 48, %o4 /* IEU0 */
201 andcc %o4, 0xff, %g0 /* IEU1 Group */
203 be,pn %icc, 21f /* CTI */
204 srlx %o3, 40, %o4 /* IEU0 */
205 andcc %o4, 0xff, %g0 /* IEU1 Group */
206 be,pn %icc, 20f /* CTI */
208 srlx %o3, 32, %o4 /* IEU0 */
209 andcc %o4, 0xff, %g0 /* IEU1 Group */
210 be,pn %icc, 19f /* CTI */
211 srlx %o3, 24, %o4 /* IEU0 */
213 andcc %o4, 0xff, %g0 /* IEU1 Group */
214 be,pn %icc, 18f /* CTI */
215 srlx %o3, 16, %o4 /* IEU0 */
216 andcc %o4, 0xff, %g0 /* IEU1 Group */
218 be,pn %icc, 17f /* CTI */
219 srlx %o3, 8, %o4 /* IEU0 */
220 andcc %o4, 0xff, %g0 /* IEU1 Group */
221 be,pn %icc, 16f /* CTI */
223 andcc %o3, 0xff, %g0 /* IEU1 Group */
224 bne,pn %icc, 15b /* CTI */
225 stx %o3, [%o0 - 8] /* Store */
226 retl /* CTI+IEU1 Group */
228 sub %o0, 1, %o0 /* IEU0 */
231 17: ba,pt %xcc, 26f /* CTI Group */
232 subcc %o0, 3, %g6 /* IEU1 */
233 18: ba,pt %xcc, 27f /* CTI Group */
234 subcc %o0, 4, %g6 /* IEU1 */
236 19: ba,pt %xcc, 28f /* CTI Group */
237 subcc %o0, 5, %g6 /* IEU1 */
238 16: subcc %o0, 2, %g6 /* IEU1 Group */
239 srlx %o3, 8, %o4 /* IEU0 */
241 stb %o4, [%o0 - 2] /* Store */
242 26: srlx %o3, 16, %o4 /* IEU0 Group */
243 stb %o4, [%o0 - 3] /* Store */
244 27: srlx %o3, 24, %o4 /* IEU0 Group */
246 stb %o4, [%o0 - 4] /* Store */
247 28: srlx %o3, 32, %o4 /* IEU0 Group */
248 stw %o4, [%o0 - 8] /* Store */
249 retl /* CTI+IEU1 Group */
251 mov %g6, %o0 /* IEU0 */
254 21: ba,pt %xcc, 29f /* CTI Group */
255 subcc %o0, 7, %g6 /* IEU1 */
256 22: ba,pt %xcc, 30f /* CTI Group */
257 subcc %o0, 8, %g6 /* IEU1 */
259 20: subcc %o0, 6, %g6 /* IEU1 Group */
260 srlx %o3, 40, %o4 /* IEU0 */
261 stb %o4, [%o0 - 6] /* Store */
262 29: srlx %o3, 48, %o4 /* IEU0 Group */
264 stb %o4, [%o0 - 7] /* Store */
265 30: srlx %o3, 56, %o4 /* IEU0 Group */
266 stb %o4, [%o0 - 8] /* Store */
267 retl /* CTI+IEU1 Group */
269 mov %g6, %o0 /* IEU0 */
272 weak_alias (__stpcpy, stpcpy)
273 libc_hidden_def (__stpcpy)
274 libc_hidden_builtin_def (stpcpy)