1 /* Copy SRC to DEST returning DEST.
3 Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 .register %g2, #scratch
27 .register %g3, #scratch
28 .register %g6, #scratch
31 /* Normally, this uses
32 ((xword - 0x0101010101010101) & 0x8080808080808080) test
33 to find out if any byte in xword could be zero. This is fast, but
34 also gives false alarm for any byte in range 0x81-0xff. It does
35 not matter for correctness, as if this test tells us there could
36 be some zero byte, we check it byte by byte, but if bytes with
37 high bits set are common in the strings, then this will give poor
38 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
39 will use one tick slower, but more precise test
40 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
41 which does not give any false alarms (but if some bits are set,
42 one cannot assume from it which bytes are zero and which are not).
43 It is yet to be measured, what is the correct default for glibc
44 in these days for an average user.
50 sethi %hi(0x01010101), %g1 /* IEU0 Group */
51 mov %o0, %g6 /* IEU1 */
52 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
53 andcc %o0, 7, %g0 /* IEU1 */
55 sllx %g1, 32, %g2 /* IEU0 Group */
56 bne,pn %icc, 12f /* CTI */
57 andcc %o1, 7, %g3 /* IEU1 */
58 or %g1, %g2, %g1 /* IEU0 Group */
60 bne,pn %icc, 14f /* CTI */
61 sllx %g1, 7, %g2 /* IEU0 Group */
62 1: ldx [%o1], %o3 /* Load */
63 add %o1, 8, %o1 /* IEU1 */
65 2: mov %o3, %g3 /* IEU0 Group */
66 3: sub %o3, %g1, %o2 /* IEU1 */
67 ldxa [%o1] ASI_PNF, %o3 /* Load */
68 #ifdef EIGHTBIT_NOT_RARE
69 andn %o2, %g3, %o2 /* IEU0 Group */
71 add %o0, 8, %o0 /* IEU0 Group */
73 andcc %o2, %g2, %g0 /* IEU1 */
74 add %o1, 8, %o1 /* IEU0 Group */
75 be,a,pt %xcc, 2b /* CTI */
76 stx %g3, [%o0 - 8] /* Store */
78 srlx %g3, 56, %g5 /* IEU0 Group */
79 andcc %g5, 0xff, %g0 /* IEU1 Group */
80 be,pn %icc, 11f /* CTI */
81 srlx %g3, 48, %g4 /* IEU0 */
83 andcc %g4, 0xff, %g0 /* IEU1 Group */
84 be,pn %icc, 10f /* CTI */
85 srlx %g3, 40, %g5 /* IEU0 */
86 andcc %g5, 0xff, %g0 /* IEU1 Group */
88 be,pn %icc, 9f /* CTI */
89 srlx %g3, 32, %g4 /* IEU0 */
90 andcc %g4, 0xff, %g0 /* IEU1 Group */
91 be,pn %icc, 8f /* CTI */
93 srlx %g3, 24, %g5 /* IEU0 */
94 andcc %g5, 0xff, %g0 /* IEU1 Group */
95 be,pn %icc, 7f /* CTI */
96 srlx %g3, 16, %g4 /* IEU0 */
98 andcc %g4, 0xff, %g0 /* IEU1 Group */
99 be,pn %icc, 6f /* CTI */
100 srlx %g3, 8, %g5 /* IEU0 */
101 andcc %g5, 0xff, %g0 /* IEU1 Group */
103 be,pn %icc, 5f /* CTI */
104 sub %o3, %g1, %o2 /* IEU0 */
105 stx %g3, [%o0 - 8] /* Store Group */
106 andcc %g3, 0xff, %g0 /* IEU1 */
108 bne,pt %icc, 3b /* CTI */
109 mov %o3, %g3 /* IEU0 Group */
110 4: retl /* CTI+IEU1 Group */
111 mov %g6, %o0 /* IEU0 */
114 5: stb %g5, [%o0 - 2] /* Store Group */
115 srlx %g3, 16, %g4 /* IEU0 */
116 6: sth %g4, [%o0 - 4] /* Store Group */
117 srlx %g3, 32, %g4 /* IEU0 */
119 stw %g4, [%o0 - 8] /* Store Group */
120 retl /* CTI+IEU1 Group */
121 mov %g6, %o0 /* IEU0 */
122 7: stb %g5, [%o0 - 4] /* Store Group */
124 srlx %g3, 32, %g4 /* IEU0 */
125 8: stw %g4, [%o0 - 8] /* Store Group */
126 retl /* CTI+IEU1 Group */
127 mov %g6, %o0 /* IEU0 */
129 9: stb %g5, [%o0 - 6] /* Store Group */
130 srlx %g3, 48, %g4 /* IEU0 */
131 10: sth %g4, [%o0 - 8] /* Store Group */
132 retl /* CTI+IEU1 Group */
134 mov %g6, %o0 /* IEU0 */
135 11: stb %g5, [%o0 - 8] /* Store Group */
136 retl /* CTI+IEU1 Group */
137 mov %g6, %o0 /* IEU0 */
139 12: or %g1, %g2, %g1 /* IEU0 Group */
140 ldub [%o1], %o3 /* Load */
141 sllx %g1, 7, %g2 /* IEU0 Group */
142 stb %o3, [%o0] /* Store Group */
144 13: add %o0, 1, %o0 /* IEU0 */
145 add %o1, 1, %o1 /* IEU1 */
146 andcc %o3, 0xff, %g0 /* IEU1 Group */
147 be,pn %icc, 4b /* CTI */
149 lduba [%o1] ASI_PNF, %o3 /* Load */
150 andcc %o0, 7, %g0 /* IEU1 Group */
151 bne,a,pt %icc, 13b /* CTI */
152 stb %o3, [%o0] /* Store */
154 andcc %o1, 7, %g3 /* IEU1 Group */
155 be,a,pt %icc, 1b /* CTI */
156 ldx [%o1], %o3 /* Load */
157 14: orcc %g0, 64, %g4 /* IEU1 Group */
159 sllx %g3, 3, %g5 /* IEU0 */
160 sub %o1, %g3, %o1 /* IEU0 Group */
161 sub %g4, %g5, %g4 /* IEU1 */
162 /* %g1 = 0101010101010101 *
163 * %g2 = 8080808080808080 *
164 * %g3 = source alignment *
165 * %g5 = number of bits to shift left *
166 * %g4 = number of bits to shift right */
167 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
169 addcc %o1, 8, %o1 /* IEU1 */
170 15: sllx %o5, %g5, %o3 /* IEU0 Group */
171 ldxa [%o1] ASI_PNF, %o5 /* Load */
172 srlx %o5, %g4, %o4 /* IEU0 Group */
174 add %o0, 8, %o0 /* IEU1 */
175 or %o3, %o4, %o3 /* IEU0 Group */
176 add %o1, 8, %o1 /* IEU1 */
177 sub %o3, %g1, %o4 /* IEU0 Group */
179 #ifdef EIGHTBIT_NOT_RARE
180 andn %o4, %o3, %o4 /* IEU0 Group */
182 andcc %o4, %g2, %g0 /* IEU1 Group */
183 be,a,pt %xcc, 15b /* CTI */
184 stx %o3, [%o0 - 8] /* Store */
185 srlx %o3, 56, %o4 /* IEU0 Group */
187 andcc %o4, 0xff, %g0 /* IEU1 Group */
188 be,pn %icc, 22f /* CTI */
189 srlx %o3, 48, %o4 /* IEU0 */
190 andcc %o4, 0xff, %g0 /* IEU1 Group */
192 be,pn %icc, 21f /* CTI */
193 srlx %o3, 40, %o4 /* IEU0 */
194 andcc %o4, 0xff, %g0 /* IEU1 Group */
195 be,pn %icc, 20f /* CTI */
197 srlx %o3, 32, %o4 /* IEU0 */
198 andcc %o4, 0xff, %g0 /* IEU1 Group */
199 be,pn %icc, 19f /* CTI */
200 srlx %o3, 24, %o4 /* IEU0 */
202 andcc %o4, 0xff, %g0 /* IEU1 Group */
203 be,pn %icc, 18f /* CTI */
204 srlx %o3, 16, %o4 /* IEU0 */
205 andcc %o4, 0xff, %g0 /* IEU1 Group */
207 be,pn %icc, 17f /* CTI */
208 srlx %o3, 8, %o4 /* IEU0 */
209 andcc %o4, 0xff, %g0 /* IEU1 Group */
210 be,pn %icc, 16f /* CTI */
212 andcc %o3, 0xff, %g0 /* IEU1 Group */
213 bne,pn %icc, 15b /* CTI */
214 stx %o3, [%o0 - 8] /* Store */
215 retl /* CTI+IEU1 Group */
217 mov %g6, %o0 /* IEU0 */
220 16: srlx %o3, 8, %o4 /* IEU0 Group */
221 stb %o4, [%o0 - 2] /* Store */
222 17: srlx %o3, 16, %o4 /* IEU0 Group */
223 stb %o4, [%o0 - 3] /* Store */
225 18: srlx %o3, 24, %o4 /* IEU0 Group */
226 stb %o4, [%o0 - 4] /* Store */
227 19: srlx %o3, 32, %o4 /* IEU0 Group */
228 stw %o4, [%o0 - 8] /* Store */
230 retl /* CTI+IEU1 Group */
231 mov %g6, %o0 /* IEU0 */
235 20: srlx %o3, 40, %o4 /* IEU0 Group */
236 stb %o4, [%o0 - 6] /* Store */
237 21: srlx %o3, 48, %o4 /* IEU0 Group */
238 stb %o4, [%o0 - 7] /* Store */
240 22: srlx %o3, 56, %o4 /* IEU0 Group */
241 stb %o4, [%o0 - 8] /* Store */
242 retl /* CTI+IEU1 Group */
243 mov %g6, %o0 /* IEU0 */
245 libc_hidden_builtin_def (strcpy)