aarch64: Fix f{max,min}{f} build for GCC 4.9 and 5
[glibc.git] / string / wordcopy.c
blob65961cd03a3fff776c0d8e871becbb8cf70579e7
1 /* _memcopy.c -- subroutines for memory copy functions.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Torbjorn Granlund (tege@sics.se).
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
22 #include <stddef.h>
23 #include <memcopy.h>
25 /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
26 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
27 Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
29 #ifndef WORDCOPY_FWD_ALIGNED
30 # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned
31 #endif
33 void
34 WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len)
36 op_t a0, a1;
38 switch (len % 8)
40 case 2:
41 a0 = ((op_t *) srcp)[0];
42 srcp -= 6 * OPSIZ;
43 dstp -= 7 * OPSIZ;
44 len += 6;
45 goto do1;
46 case 3:
47 a1 = ((op_t *) srcp)[0];
48 srcp -= 5 * OPSIZ;
49 dstp -= 6 * OPSIZ;
50 len += 5;
51 goto do2;
52 case 4:
53 a0 = ((op_t *) srcp)[0];
54 srcp -= 4 * OPSIZ;
55 dstp -= 5 * OPSIZ;
56 len += 4;
57 goto do3;
58 case 5:
59 a1 = ((op_t *) srcp)[0];
60 srcp -= 3 * OPSIZ;
61 dstp -= 4 * OPSIZ;
62 len += 3;
63 goto do4;
64 case 6:
65 a0 = ((op_t *) srcp)[0];
66 srcp -= 2 * OPSIZ;
67 dstp -= 3 * OPSIZ;
68 len += 2;
69 goto do5;
70 case 7:
71 a1 = ((op_t *) srcp)[0];
72 srcp -= 1 * OPSIZ;
73 dstp -= 2 * OPSIZ;
74 len += 1;
75 goto do6;
77 case 0:
78 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
79 return;
80 a0 = ((op_t *) srcp)[0];
81 srcp -= 0 * OPSIZ;
82 dstp -= 1 * OPSIZ;
83 goto do7;
84 case 1:
85 a1 = ((op_t *) srcp)[0];
86 srcp -=-1 * OPSIZ;
87 dstp -= 0 * OPSIZ;
88 len -= 1;
89 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
90 goto do0;
91 goto do8; /* No-op. */
96 do8:
97 a0 = ((op_t *) srcp)[0];
98 ((op_t *) dstp)[0] = a1;
99 do7:
100 a1 = ((op_t *) srcp)[1];
101 ((op_t *) dstp)[1] = a0;
102 do6:
103 a0 = ((op_t *) srcp)[2];
104 ((op_t *) dstp)[2] = a1;
105 do5:
106 a1 = ((op_t *) srcp)[3];
107 ((op_t *) dstp)[3] = a0;
108 do4:
109 a0 = ((op_t *) srcp)[4];
110 ((op_t *) dstp)[4] = a1;
111 do3:
112 a1 = ((op_t *) srcp)[5];
113 ((op_t *) dstp)[5] = a0;
114 do2:
115 a0 = ((op_t *) srcp)[6];
116 ((op_t *) dstp)[6] = a1;
117 do1:
118 a1 = ((op_t *) srcp)[7];
119 ((op_t *) dstp)[7] = a0;
121 srcp += 8 * OPSIZ;
122 dstp += 8 * OPSIZ;
123 len -= 8;
125 while (len != 0);
127 /* This is the right position for do0. Please don't move
128 it into the loop. */
129 do0:
130 ((op_t *) dstp)[0] = a1;
133 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
134 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
135 DSTP should be aligned for memory operations on `op_t's, but SRCP must
136 *not* be aligned. */
138 #ifndef WORDCOPY_FWD_DEST_ALIGNED
139 # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned
140 #endif
142 void
143 WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len)
145 op_t a0, a1, a2, a3;
146 int sh_1, sh_2;
148 /* Calculate how to shift a word read at the memory operation
149 aligned srcp to make it aligned for copy. */
151 sh_1 = 8 * (srcp % OPSIZ);
152 sh_2 = 8 * OPSIZ - sh_1;
154 /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
155 it points in the middle of. */
156 srcp &= -OPSIZ;
158 switch (len % 4)
160 case 2:
161 a1 = ((op_t *) srcp)[0];
162 a2 = ((op_t *) srcp)[1];
163 srcp -= 1 * OPSIZ;
164 dstp -= 3 * OPSIZ;
165 len += 2;
166 goto do1;
167 case 3:
168 a0 = ((op_t *) srcp)[0];
169 a1 = ((op_t *) srcp)[1];
170 srcp -= 0 * OPSIZ;
171 dstp -= 2 * OPSIZ;
172 len += 1;
173 goto do2;
174 case 0:
175 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
176 return;
177 a3 = ((op_t *) srcp)[0];
178 a0 = ((op_t *) srcp)[1];
179 srcp -=-1 * OPSIZ;
180 dstp -= 1 * OPSIZ;
181 len += 0;
182 goto do3;
183 case 1:
184 a2 = ((op_t *) srcp)[0];
185 a3 = ((op_t *) srcp)[1];
186 srcp -=-2 * OPSIZ;
187 dstp -= 0 * OPSIZ;
188 len -= 1;
189 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
190 goto do0;
191 goto do4; /* No-op. */
196 do4:
197 a0 = ((op_t *) srcp)[0];
198 ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
199 do3:
200 a1 = ((op_t *) srcp)[1];
201 ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
202 do2:
203 a2 = ((op_t *) srcp)[2];
204 ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
205 do1:
206 a3 = ((op_t *) srcp)[3];
207 ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
209 srcp += 4 * OPSIZ;
210 dstp += 4 * OPSIZ;
211 len -= 4;
213 while (len != 0);
215 /* This is the right position for do0. Please don't move
216 it into the loop. */
217 do0:
218 ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
221 /* _wordcopy_bwd_aligned -- Copy block finishing right before
222 SRCP to block finishing right before DSTP with LEN `op_t' words
223 (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
224 operations on `op_t's. */
226 #ifndef WORDCOPY_BWD_ALIGNED
227 # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned
228 #endif
230 void
231 WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len)
233 op_t a0, a1;
235 switch (len % 8)
237 case 2:
238 srcp -= 2 * OPSIZ;
239 dstp -= 1 * OPSIZ;
240 a0 = ((op_t *) srcp)[1];
241 len += 6;
242 goto do1;
243 case 3:
244 srcp -= 3 * OPSIZ;
245 dstp -= 2 * OPSIZ;
246 a1 = ((op_t *) srcp)[2];
247 len += 5;
248 goto do2;
249 case 4:
250 srcp -= 4 * OPSIZ;
251 dstp -= 3 * OPSIZ;
252 a0 = ((op_t *) srcp)[3];
253 len += 4;
254 goto do3;
255 case 5:
256 srcp -= 5 * OPSIZ;
257 dstp -= 4 * OPSIZ;
258 a1 = ((op_t *) srcp)[4];
259 len += 3;
260 goto do4;
261 case 6:
262 srcp -= 6 * OPSIZ;
263 dstp -= 5 * OPSIZ;
264 a0 = ((op_t *) srcp)[5];
265 len += 2;
266 goto do5;
267 case 7:
268 srcp -= 7 * OPSIZ;
269 dstp -= 6 * OPSIZ;
270 a1 = ((op_t *) srcp)[6];
271 len += 1;
272 goto do6;
274 case 0:
275 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
276 return;
277 srcp -= 8 * OPSIZ;
278 dstp -= 7 * OPSIZ;
279 a0 = ((op_t *) srcp)[7];
280 goto do7;
281 case 1:
282 srcp -= 9 * OPSIZ;
283 dstp -= 8 * OPSIZ;
284 a1 = ((op_t *) srcp)[8];
285 len -= 1;
286 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
287 goto do0;
288 goto do8; /* No-op. */
293 do8:
294 a0 = ((op_t *) srcp)[7];
295 ((op_t *) dstp)[7] = a1;
296 do7:
297 a1 = ((op_t *) srcp)[6];
298 ((op_t *) dstp)[6] = a0;
299 do6:
300 a0 = ((op_t *) srcp)[5];
301 ((op_t *) dstp)[5] = a1;
302 do5:
303 a1 = ((op_t *) srcp)[4];
304 ((op_t *) dstp)[4] = a0;
305 do4:
306 a0 = ((op_t *) srcp)[3];
307 ((op_t *) dstp)[3] = a1;
308 do3:
309 a1 = ((op_t *) srcp)[2];
310 ((op_t *) dstp)[2] = a0;
311 do2:
312 a0 = ((op_t *) srcp)[1];
313 ((op_t *) dstp)[1] = a1;
314 do1:
315 a1 = ((op_t *) srcp)[0];
316 ((op_t *) dstp)[0] = a0;
318 srcp -= 8 * OPSIZ;
319 dstp -= 8 * OPSIZ;
320 len -= 8;
322 while (len != 0);
324 /* This is the right position for do0. Please don't move
325 it into the loop. */
326 do0:
327 ((op_t *) dstp)[7] = a1;
330 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
331 before SRCP to block finishing right before DSTP with LEN `op_t'
332 words (not LEN bytes!). DSTP should be aligned for memory
333 operations on `op_t', but SRCP must *not* be aligned. */
335 #ifndef WORDCOPY_BWD_DEST_ALIGNED
336 # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned
337 #endif
339 void
340 WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len)
342 op_t a0, a1, a2, a3;
343 int sh_1, sh_2;
345 /* Calculate how to shift a word read at the memory operation
346 aligned srcp to make it aligned for copy. */
348 sh_1 = 8 * (srcp % OPSIZ);
349 sh_2 = 8 * OPSIZ - sh_1;
351 /* Make srcp aligned by rounding it down to the beginning of the op_t
352 it points in the middle of. */
353 srcp &= -OPSIZ;
354 srcp += OPSIZ;
356 switch (len % 4)
358 case 2:
359 srcp -= 3 * OPSIZ;
360 dstp -= 1 * OPSIZ;
361 a2 = ((op_t *) srcp)[2];
362 a1 = ((op_t *) srcp)[1];
363 len += 2;
364 goto do1;
365 case 3:
366 srcp -= 4 * OPSIZ;
367 dstp -= 2 * OPSIZ;
368 a3 = ((op_t *) srcp)[3];
369 a2 = ((op_t *) srcp)[2];
370 len += 1;
371 goto do2;
372 case 0:
373 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
374 return;
375 srcp -= 5 * OPSIZ;
376 dstp -= 3 * OPSIZ;
377 a0 = ((op_t *) srcp)[4];
378 a3 = ((op_t *) srcp)[3];
379 goto do3;
380 case 1:
381 srcp -= 6 * OPSIZ;
382 dstp -= 4 * OPSIZ;
383 a1 = ((op_t *) srcp)[5];
384 a0 = ((op_t *) srcp)[4];
385 len -= 1;
386 if (OP_T_THRES <= 3 * OPSIZ && len == 0)
387 goto do0;
388 goto do4; /* No-op. */
393 do4:
394 a3 = ((op_t *) srcp)[3];
395 ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
396 do3:
397 a2 = ((op_t *) srcp)[2];
398 ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
399 do2:
400 a1 = ((op_t *) srcp)[1];
401 ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
402 do1:
403 a0 = ((op_t *) srcp)[0];
404 ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
406 srcp -= 4 * OPSIZ;
407 dstp -= 4 * OPSIZ;
408 len -= 4;
410 while (len != 0);
412 /* This is the right position for do0. Please don't move
413 it into the loop. */
414 do0:
415 ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);