1 /* Copy SRC to DEST returning DEST.
3 Copyright (C) 1998-2015 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
25 .register %g2, #scratch
26 .register %g3, #scratch
27 .register %g6, #scratch
30 /* Normally, this uses
31 ((xword - 0x0101010101010101) & 0x8080808080808080) test
32 to find out if any byte in xword could be zero. This is fast, but
33 also gives false alarm for any byte in range 0x81-0xff. It does
34 not matter for correctness, as if this test tells us there could
35 be some zero byte, we check it byte by byte, but if bytes with
36 high bits set are common in the strings, then this will give poor
37 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
38 will use one tick slower, but more precise test
39 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
40 which does not give any false alarms (but if some bits are set,
41 one cannot assume from it which bytes are zero and which are not).
42 It is yet to be measured, what is the correct default for glibc
43 in these days for an average user.
49 sethi %hi(0x01010101), %g1 /* IEU0 Group */
50 mov %o0, %g6 /* IEU1 */
51 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
52 andcc %o0, 7, %g0 /* IEU1 */
54 sllx %g1, 32, %g2 /* IEU0 Group */
55 bne,pn %icc, 12f /* CTI */
56 andcc %o1, 7, %g3 /* IEU1 */
57 or %g1, %g2, %g1 /* IEU0 Group */
59 bne,pn %icc, 14f /* CTI */
60 sllx %g1, 7, %g2 /* IEU0 Group */
61 1: ldx [%o1], %o3 /* Load */
62 add %o1, 8, %o1 /* IEU1 */
64 2: mov %o3, %g3 /* IEU0 Group */
65 3: sub %o3, %g1, %o2 /* IEU1 */
66 ldxa [%o1] ASI_PNF, %o3 /* Load */
67 #ifdef EIGHTBIT_NOT_RARE
68 andn %o2, %g3, %o2 /* IEU0 Group */
70 add %o0, 8, %o0 /* IEU0 Group */
72 andcc %o2, %g2, %g0 /* IEU1 */
73 add %o1, 8, %o1 /* IEU0 Group */
74 be,a,pt %xcc, 2b /* CTI */
75 stx %g3, [%o0 - 8] /* Store */
77 srlx %g3, 56, %g5 /* IEU0 Group */
78 andcc %g5, 0xff, %g0 /* IEU1 Group */
79 be,pn %icc, 11f /* CTI */
80 srlx %g3, 48, %g4 /* IEU0 */
82 andcc %g4, 0xff, %g0 /* IEU1 Group */
83 be,pn %icc, 10f /* CTI */
84 srlx %g3, 40, %g5 /* IEU0 */
85 andcc %g5, 0xff, %g0 /* IEU1 Group */
87 be,pn %icc, 9f /* CTI */
88 srlx %g3, 32, %g4 /* IEU0 */
89 andcc %g4, 0xff, %g0 /* IEU1 Group */
90 be,pn %icc, 8f /* CTI */
92 srlx %g3, 24, %g5 /* IEU0 */
93 andcc %g5, 0xff, %g0 /* IEU1 Group */
94 be,pn %icc, 7f /* CTI */
95 srlx %g3, 16, %g4 /* IEU0 */
97 andcc %g4, 0xff, %g0 /* IEU1 Group */
98 be,pn %icc, 6f /* CTI */
99 srlx %g3, 8, %g5 /* IEU0 */
100 andcc %g5, 0xff, %g0 /* IEU1 Group */
102 be,pn %icc, 5f /* CTI */
103 sub %o3, %g1, %o2 /* IEU0 */
104 stx %g3, [%o0 - 8] /* Store Group */
105 andcc %g3, 0xff, %g0 /* IEU1 */
107 bne,pt %icc, 3b /* CTI */
108 mov %o3, %g3 /* IEU0 Group */
109 4: retl /* CTI+IEU1 Group */
110 mov %g6, %o0 /* IEU0 */
113 5: stb %g5, [%o0 - 2] /* Store Group */
114 srlx %g3, 16, %g4 /* IEU0 */
115 6: sth %g4, [%o0 - 4] /* Store Group */
116 srlx %g3, 32, %g4 /* IEU0 */
118 stw %g4, [%o0 - 8] /* Store Group */
119 retl /* CTI+IEU1 Group */
120 mov %g6, %o0 /* IEU0 */
121 7: stb %g5, [%o0 - 4] /* Store Group */
123 srlx %g3, 32, %g4 /* IEU0 */
124 8: stw %g4, [%o0 - 8] /* Store Group */
125 retl /* CTI+IEU1 Group */
126 mov %g6, %o0 /* IEU0 */
128 9: stb %g5, [%o0 - 6] /* Store Group */
129 srlx %g3, 48, %g4 /* IEU0 */
130 10: sth %g4, [%o0 - 8] /* Store Group */
131 retl /* CTI+IEU1 Group */
133 mov %g6, %o0 /* IEU0 */
134 11: stb %g5, [%o0 - 8] /* Store Group */
135 retl /* CTI+IEU1 Group */
136 mov %g6, %o0 /* IEU0 */
138 12: or %g1, %g2, %g1 /* IEU0 Group */
139 ldub [%o1], %o3 /* Load */
140 sllx %g1, 7, %g2 /* IEU0 Group */
141 stb %o3, [%o0] /* Store Group */
143 13: add %o0, 1, %o0 /* IEU0 */
144 add %o1, 1, %o1 /* IEU1 */
145 andcc %o3, 0xff, %g0 /* IEU1 Group */
146 be,pn %icc, 4b /* CTI */
148 lduba [%o1] ASI_PNF, %o3 /* Load */
149 andcc %o0, 7, %g0 /* IEU1 Group */
150 bne,a,pt %icc, 13b /* CTI */
151 stb %o3, [%o0] /* Store */
153 andcc %o1, 7, %g3 /* IEU1 Group */
154 be,a,pt %icc, 1b /* CTI */
155 ldx [%o1], %o3 /* Load */
156 14: orcc %g0, 64, %g4 /* IEU1 Group */
158 sllx %g3, 3, %g5 /* IEU0 */
159 sub %o1, %g3, %o1 /* IEU0 Group */
160 sub %g4, %g5, %g4 /* IEU1 */
161 /* %g1 = 0101010101010101 *
162 * %g2 = 8080808080808080 *
163 * %g3 = source alignment *
164 * %g5 = number of bits to shift left *
165 * %g4 = number of bits to shift right */
166 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
168 addcc %o1, 8, %o1 /* IEU1 */
169 15: sllx %o5, %g5, %o3 /* IEU0 Group */
170 ldxa [%o1] ASI_PNF, %o5 /* Load */
171 srlx %o5, %g4, %o4 /* IEU0 Group */
173 add %o0, 8, %o0 /* IEU1 */
174 or %o3, %o4, %o3 /* IEU0 Group */
175 add %o1, 8, %o1 /* IEU1 */
176 sub %o3, %g1, %o4 /* IEU0 Group */
178 #ifdef EIGHTBIT_NOT_RARE
179 andn %o4, %o3, %o4 /* IEU0 Group */
181 andcc %o4, %g2, %g0 /* IEU1 Group */
182 be,a,pt %xcc, 15b /* CTI */
183 stx %o3, [%o0 - 8] /* Store */
184 srlx %o3, 56, %o4 /* IEU0 Group */
186 andcc %o4, 0xff, %g0 /* IEU1 Group */
187 be,pn %icc, 22f /* CTI */
188 srlx %o3, 48, %o4 /* IEU0 */
189 andcc %o4, 0xff, %g0 /* IEU1 Group */
191 be,pn %icc, 21f /* CTI */
192 srlx %o3, 40, %o4 /* IEU0 */
193 andcc %o4, 0xff, %g0 /* IEU1 Group */
194 be,pn %icc, 20f /* CTI */
196 srlx %o3, 32, %o4 /* IEU0 */
197 andcc %o4, 0xff, %g0 /* IEU1 Group */
198 be,pn %icc, 19f /* CTI */
199 srlx %o3, 24, %o4 /* IEU0 */
201 andcc %o4, 0xff, %g0 /* IEU1 Group */
202 be,pn %icc, 18f /* CTI */
203 srlx %o3, 16, %o4 /* IEU0 */
204 andcc %o4, 0xff, %g0 /* IEU1 Group */
206 be,pn %icc, 17f /* CTI */
207 srlx %o3, 8, %o4 /* IEU0 */
208 andcc %o4, 0xff, %g0 /* IEU1 Group */
209 be,pn %icc, 16f /* CTI */
211 andcc %o3, 0xff, %g0 /* IEU1 Group */
212 bne,pn %icc, 15b /* CTI */
213 stx %o3, [%o0 - 8] /* Store */
214 retl /* CTI+IEU1 Group */
216 mov %g6, %o0 /* IEU0 */
219 16: srlx %o3, 8, %o4 /* IEU0 Group */
220 stb %o4, [%o0 - 2] /* Store */
221 17: srlx %o3, 16, %o4 /* IEU0 Group */
222 stb %o4, [%o0 - 3] /* Store */
224 18: srlx %o3, 24, %o4 /* IEU0 Group */
225 stb %o4, [%o0 - 4] /* Store */
226 19: srlx %o3, 32, %o4 /* IEU0 Group */
227 stw %o4, [%o0 - 8] /* Store */
229 retl /* CTI+IEU1 Group */
230 mov %g6, %o0 /* IEU0 */
234 20: srlx %o3, 40, %o4 /* IEU0 Group */
235 stb %o4, [%o0 - 6] /* Store */
236 21: srlx %o3, 48, %o4 /* IEU0 Group */
237 stb %o4, [%o0 - 7] /* Store */
239 22: srlx %o3, 56, %o4 /* IEU0 Group */
240 stb %o4, [%o0 - 8] /* Store */
241 retl /* CTI+IEU1 Group */
242 mov %g6, %o0 /* IEU0 */
244 libc_hidden_builtin_def (strcpy)