* malloc/malloc.c: Add branch prediction for use of the hooks.
[glibc.git] / string / wordcopy.c
blob0c9a4be4f67c439030e68f1f44f1ab6ff984c3f8
1 /* _memcopy.c -- subroutines for memory copy functions.
2 Copyright (C) 1991, 1996 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Torbjorn Granlund (tege@sics.se).
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
23 #include <stddef.h>
24 #include <memcopy.h>
26 /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
27 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
28 Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
30 void
31 _wordcopy_fwd_aligned (dstp, srcp, len)
32 long int dstp;
33 long int srcp;
34 size_t len;
36 op_t a0, a1;
38 switch (len % 8)
40 case 2:
41 a0 = ((op_t *) srcp)[0];
42 srcp -= 6 * OPSIZ;
43 dstp -= 7 * OPSIZ;
44 len += 6;
45 goto do1;
46 case 3:
47 a1 = ((op_t *) srcp)[0];
48 srcp -= 5 * OPSIZ;
49 dstp -= 6 * OPSIZ;
50 len += 5;
51 goto do2;
52 case 4:
53 a0 = ((op_t *) srcp)[0];
54 srcp -= 4 * OPSIZ;
55 dstp -= 5 * OPSIZ;
56 len += 4;
57 goto do3;
58 case 5:
59 a1 = ((op_t *) srcp)[0];
60 srcp -= 3 * OPSIZ;
61 dstp -= 4 * OPSIZ;
62 len += 3;
63 goto do4;
64 case 6:
65 a0 = ((op_t *) srcp)[0];
66 srcp -= 2 * OPSIZ;
67 dstp -= 3 * OPSIZ;
68 len += 2;
69 goto do5;
70 case 7:
71 a1 = ((op_t *) srcp)[0];
72 srcp -= 1 * OPSIZ;
73 dstp -= 2 * OPSIZ;
74 len += 1;
75 goto do6;
77 case 0:
78 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
79 return;
80 a0 = ((op_t *) srcp)[0];
81 srcp -= 0 * OPSIZ;
82 dstp -= 1 * OPSIZ;
83 goto do7;
84 case 1:
85 a1 = ((op_t *) srcp)[0];
86 srcp -=-1 * OPSIZ;
87 dstp -= 0 * OPSIZ;
88 len -= 1;
89 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
90 goto do0;
91 goto do8; /* No-op. */
96 do8:
97 a0 = ((op_t *) srcp)[0];
98 ((op_t *) dstp)[0] = a1;
99 do7:
100 a1 = ((op_t *) srcp)[1];
101 ((op_t *) dstp)[1] = a0;
102 do6:
103 a0 = ((op_t *) srcp)[2];
104 ((op_t *) dstp)[2] = a1;
105 do5:
106 a1 = ((op_t *) srcp)[3];
107 ((op_t *) dstp)[3] = a0;
108 do4:
109 a0 = ((op_t *) srcp)[4];
110 ((op_t *) dstp)[4] = a1;
111 do3:
112 a1 = ((op_t *) srcp)[5];
113 ((op_t *) dstp)[5] = a0;
114 do2:
115 a0 = ((op_t *) srcp)[6];
116 ((op_t *) dstp)[6] = a1;
117 do1:
118 a1 = ((op_t *) srcp)[7];
119 ((op_t *) dstp)[7] = a0;
121 srcp += 8 * OPSIZ;
122 dstp += 8 * OPSIZ;
123 len -= 8;
125 while (len != 0);
127 /* This is the right position for do0. Please don't move
128 it into the loop. */
129 do0:
130 ((op_t *) dstp)[0] = a1;
133 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
134 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
135 DSTP should be aligned for memory operations on `op_t's, but SRCP must
136 *not* be aligned. */
138 void
139 _wordcopy_fwd_dest_aligned (dstp, srcp, len)
140 long int dstp;
141 long int srcp;
142 size_t len;
144 op_t a0, a1, a2, a3;
145 int sh_1, sh_2;
147 /* Calculate how to shift a word read at the memory operation
148 aligned srcp to make it aligned for copy. */
150 sh_1 = 8 * (srcp % OPSIZ);
151 sh_2 = 8 * OPSIZ - sh_1;
153 /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
154 it points in the middle of. */
155 srcp &= -OPSIZ;
157 switch (len % 4)
159 case 2:
160 a1 = ((op_t *) srcp)[0];
161 a2 = ((op_t *) srcp)[1];
162 srcp -= 1 * OPSIZ;
163 dstp -= 3 * OPSIZ;
164 len += 2;
165 goto do1;
166 case 3:
167 a0 = ((op_t *) srcp)[0];
168 a1 = ((op_t *) srcp)[1];
169 srcp -= 0 * OPSIZ;
170 dstp -= 2 * OPSIZ;
171 len += 1;
172 goto do2;
173 case 0:
174 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
175 return;
176 a3 = ((op_t *) srcp)[0];
177 a0 = ((op_t *) srcp)[1];
178 srcp -=-1 * OPSIZ;
179 dstp -= 1 * OPSIZ;
180 len += 0;
181 goto do3;
182 case 1:
183 a2 = ((op_t *) srcp)[0];
184 a3 = ((op_t *) srcp)[1];
185 srcp -=-2 * OPSIZ;
186 dstp -= 0 * OPSIZ;
187 len -= 1;
188 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
189 goto do0;
190 goto do4; /* No-op. */
195 do4:
196 a0 = ((op_t *) srcp)[0];
197 ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
198 do3:
199 a1 = ((op_t *) srcp)[1];
200 ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
201 do2:
202 a2 = ((op_t *) srcp)[2];
203 ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
204 do1:
205 a3 = ((op_t *) srcp)[3];
206 ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
208 srcp += 4 * OPSIZ;
209 dstp += 4 * OPSIZ;
210 len -= 4;
212 while (len != 0);
214 /* This is the right position for do0. Please don't move
215 it into the loop. */
216 do0:
217 ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
220 /* _wordcopy_bwd_aligned -- Copy block finishing right before
221 SRCP to block finishing right before DSTP with LEN `op_t' words
222 (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
223 operations on `op_t's. */
225 void
226 _wordcopy_bwd_aligned (dstp, srcp, len)
227 long int dstp;
228 long int srcp;
229 size_t len;
231 op_t a0, a1;
233 switch (len % 8)
235 case 2:
236 srcp -= 2 * OPSIZ;
237 dstp -= 1 * OPSIZ;
238 a0 = ((op_t *) srcp)[1];
239 len += 6;
240 goto do1;
241 case 3:
242 srcp -= 3 * OPSIZ;
243 dstp -= 2 * OPSIZ;
244 a1 = ((op_t *) srcp)[2];
245 len += 5;
246 goto do2;
247 case 4:
248 srcp -= 4 * OPSIZ;
249 dstp -= 3 * OPSIZ;
250 a0 = ((op_t *) srcp)[3];
251 len += 4;
252 goto do3;
253 case 5:
254 srcp -= 5 * OPSIZ;
255 dstp -= 4 * OPSIZ;
256 a1 = ((op_t *) srcp)[4];
257 len += 3;
258 goto do4;
259 case 6:
260 srcp -= 6 * OPSIZ;
261 dstp -= 5 * OPSIZ;
262 a0 = ((op_t *) srcp)[5];
263 len += 2;
264 goto do5;
265 case 7:
266 srcp -= 7 * OPSIZ;
267 dstp -= 6 * OPSIZ;
268 a1 = ((op_t *) srcp)[6];
269 len += 1;
270 goto do6;
272 case 0:
273 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
274 return;
275 srcp -= 8 * OPSIZ;
276 dstp -= 7 * OPSIZ;
277 a0 = ((op_t *) srcp)[7];
278 goto do7;
279 case 1:
280 srcp -= 9 * OPSIZ;
281 dstp -= 8 * OPSIZ;
282 a1 = ((op_t *) srcp)[8];
283 len -= 1;
284 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
285 goto do0;
286 goto do8; /* No-op. */
291 do8:
292 a0 = ((op_t *) srcp)[7];
293 ((op_t *) dstp)[7] = a1;
294 do7:
295 a1 = ((op_t *) srcp)[6];
296 ((op_t *) dstp)[6] = a0;
297 do6:
298 a0 = ((op_t *) srcp)[5];
299 ((op_t *) dstp)[5] = a1;
300 do5:
301 a1 = ((op_t *) srcp)[4];
302 ((op_t *) dstp)[4] = a0;
303 do4:
304 a0 = ((op_t *) srcp)[3];
305 ((op_t *) dstp)[3] = a1;
306 do3:
307 a1 = ((op_t *) srcp)[2];
308 ((op_t *) dstp)[2] = a0;
309 do2:
310 a0 = ((op_t *) srcp)[1];
311 ((op_t *) dstp)[1] = a1;
312 do1:
313 a1 = ((op_t *) srcp)[0];
314 ((op_t *) dstp)[0] = a0;
316 srcp -= 8 * OPSIZ;
317 dstp -= 8 * OPSIZ;
318 len -= 8;
320 while (len != 0);
322 /* This is the right position for do0. Please don't move
323 it into the loop. */
324 do0:
325 ((op_t *) dstp)[7] = a1;
328 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
329 before SRCP to block finishing right before DSTP with LEN `op_t'
330 words (not LEN bytes!). DSTP should be aligned for memory
331 operations on `op_t', but SRCP must *not* be aligned. */
333 void
334 _wordcopy_bwd_dest_aligned (dstp, srcp, len)
335 long int dstp;
336 long int srcp;
337 size_t len;
339 op_t a0, a1, a2, a3;
340 int sh_1, sh_2;
342 /* Calculate how to shift a word read at the memory operation
343 aligned srcp to make it aligned for copy. */
345 sh_1 = 8 * (srcp % OPSIZ);
346 sh_2 = 8 * OPSIZ - sh_1;
348 /* Make srcp aligned by rounding it down to the beginning of the op_t
349 it points in the middle of. */
350 srcp &= -OPSIZ;
351 srcp += OPSIZ;
353 switch (len % 4)
355 case 2:
356 srcp -= 3 * OPSIZ;
357 dstp -= 1 * OPSIZ;
358 a2 = ((op_t *) srcp)[2];
359 a1 = ((op_t *) srcp)[1];
360 len += 2;
361 goto do1;
362 case 3:
363 srcp -= 4 * OPSIZ;
364 dstp -= 2 * OPSIZ;
365 a3 = ((op_t *) srcp)[3];
366 a2 = ((op_t *) srcp)[2];
367 len += 1;
368 goto do2;
369 case 0:
370 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
371 return;
372 srcp -= 5 * OPSIZ;
373 dstp -= 3 * OPSIZ;
374 a0 = ((op_t *) srcp)[4];
375 a3 = ((op_t *) srcp)[3];
376 goto do3;
377 case 1:
378 srcp -= 6 * OPSIZ;
379 dstp -= 4 * OPSIZ;
380 a1 = ((op_t *) srcp)[5];
381 a0 = ((op_t *) srcp)[4];
382 len -= 1;
383 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
384 goto do0;
385 goto do4; /* No-op. */
390 do4:
391 a3 = ((op_t *) srcp)[3];
392 ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
393 do3:
394 a2 = ((op_t *) srcp)[2];
395 ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
396 do2:
397 a1 = ((op_t *) srcp)[1];
398 ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
399 do1:
400 a0 = ((op_t *) srcp)[0];
401 ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
403 srcp -= 4 * OPSIZ;
404 dstp -= 4 * OPSIZ;
405 len -= 4;
407 while (len != 0);
409 /* This is the right position for do0. Please don't move
410 it into the loop. */
411 do0:
412 ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);