1 /* wordcopy.c -- subroutines for memory copy functions. Tile version.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 /* To optimize for tile, we make the following changes from the
20 default glibc version:
21 - Use the double align instruction instead of the MERGE macro.
22 - Since we don't have offset addressing mode, make sure the loads /
23 stores in the inner loop always have indices of 0.
24 - Use post-increment addresses in the inner loops, which yields
27 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
32 /* Provide the appropriate dblalign builtin to shift two registers
33 based on the alignment of a pointer held in a third register. */
35 #define DBLALIGN __insn_dblalign
37 #define DBLALIGN __insn_dword_align
40 /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
41 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
42 Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
45 _wordcopy_fwd_aligned (long int dstp
, long int srcp
, size_t len
)
52 a0
= ((op_t
*) srcp
)[0];
57 a1
= ((op_t
*) srcp
)[0];
62 a0
= ((op_t
*) srcp
)[0];
67 a1
= ((op_t
*) srcp
)[0];
72 a0
= ((op_t
*) srcp
)[0];
77 a1
= ((op_t
*) srcp
)[0];
83 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
85 a0
= ((op_t
*) srcp
)[0];
89 a1
= ((op_t
*) srcp
)[0];
92 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
94 goto do8
; /* No-op. */
100 a0
= ((op_t
*) srcp
)[0];
101 ((op_t
*) dstp
)[0] = a1
;
105 a1
= ((op_t
*) srcp
)[0];
106 ((op_t
*) dstp
)[0] = a0
;
110 a0
= ((op_t
*) srcp
)[0];
111 ((op_t
*) dstp
)[0] = a1
;
115 a1
= ((op_t
*) srcp
)[0];
116 ((op_t
*) dstp
)[0] = a0
;
120 a0
= ((op_t
*) srcp
)[0];
121 ((op_t
*) dstp
)[0] = a1
;
125 a1
= ((op_t
*) srcp
)[0];
126 ((op_t
*) dstp
)[0] = a0
;
130 a0
= ((op_t
*) srcp
)[0];
131 ((op_t
*) dstp
)[0] = a1
;
135 a1
= ((op_t
*) srcp
)[0];
136 ((op_t
*) dstp
)[0] = a0
;
144 /* This is the right position for do0. Please don't move
147 ((op_t
*) dstp
)[0] = a1
;
150 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
151 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
152 DSTP should be aligned for memory operations on `op_t's, but SRCP must
156 _wordcopy_fwd_dest_aligned (long int dstp
, long int srcp
, size_t len
)
161 /* Save the initial source pointer so we know the number of bytes to
162 shift for merging two unaligned results. */
163 srci
= (void *) srcp
;
165 /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
166 it points in the middle of. */
172 a1
= ((op_t
*) srcp
)[0];
173 a2
= ((op_t
*) srcp
)[1];
178 a0
= ((op_t
*) srcp
)[0];
179 a1
= ((op_t
*) srcp
)[1];
184 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
186 a3
= ((op_t
*) srcp
)[0];
187 a0
= ((op_t
*) srcp
)[1];
192 a2
= ((op_t
*) srcp
)[0];
193 a3
= ((op_t
*) srcp
)[1];
196 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
198 goto do4
; /* No-op. */
204 a0
= ((op_t
*) srcp
)[0];
205 a2
= DBLALIGN (a2
, a3
, srci
);
206 ((op_t
*) dstp
)[0] = a2
;
210 a1
= ((op_t
*) srcp
)[0];
211 a3
= DBLALIGN (a3
, a0
, srci
);
212 ((op_t
*) dstp
)[0] = a3
;
216 a2
= ((op_t
*) srcp
)[0];
217 a0
= DBLALIGN (a0
, a1
, srci
);
218 ((op_t
*) dstp
)[0] = a0
;
222 a3
= ((op_t
*) srcp
)[0];
223 a1
= DBLALIGN (a1
, a2
, srci
);
224 ((op_t
*) dstp
)[0] = a1
;
231 /* This is the right position for do0. Please don't move
234 ((op_t
*) dstp
)[0] = DBLALIGN (a2
, a3
, srci
);
237 /* _wordcopy_bwd_aligned -- Copy block finishing right before
238 SRCP to block finishing right before DSTP with LEN `op_t' words
239 (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
240 operations on `op_t's. */
243 _wordcopy_bwd_aligned (long int dstp
, long int srcp
, size_t len
)
248 srcp1
= srcp
- 1 * OPSIZ
;
255 a0
= ((op_t
*) srcp1
)[0];
259 a1
= ((op_t
*) srcp1
)[0];
263 a0
= ((op_t
*) srcp1
)[0];
267 a1
= ((op_t
*) srcp1
)[0];
271 a0
= ((op_t
*) srcp1
)[0];
275 a1
= ((op_t
*) srcp1
)[0];
280 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
282 a0
= ((op_t
*) srcp1
)[0];
285 a1
= ((op_t
*) srcp1
)[0];
287 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
289 goto do8
; /* No-op. */
295 a0
= ((op_t
*) srcp
)[0];
296 ((op_t
*) dstp
)[0] = a1
;
300 a1
= ((op_t
*) srcp
)[0];
301 ((op_t
*) dstp
)[0] = a0
;
305 a0
= ((op_t
*) srcp
)[0];
306 ((op_t
*) dstp
)[0] = a1
;
310 a1
= ((op_t
*) srcp
)[0];
311 ((op_t
*) dstp
)[0] = a0
;
315 a0
= ((op_t
*) srcp
)[0];
316 ((op_t
*) dstp
)[0] = a1
;
320 a1
= ((op_t
*) srcp
)[0];
321 ((op_t
*) dstp
)[0] = a0
;
325 a0
= ((op_t
*) srcp
)[0];
326 ((op_t
*) dstp
)[0] = a1
;
330 a1
= ((op_t
*) srcp
)[0];
331 ((op_t
*) dstp
)[0] = a0
;
339 /* This is the right position for do0. Please don't move
342 ((op_t
*) dstp
)[0] = a1
;
345 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
346 before SRCP to block finishing right before DSTP with LEN `op_t'
347 words (not LEN bytes!). DSTP should be aligned for memory
348 operations on `op_t', but SRCP must *not* be aligned. */
351 _wordcopy_bwd_dest_aligned (long int dstp
, long int srcp
, size_t len
)
357 /* Save the initial source pointer so we know the number of bytes to
358 shift for merging two unaligned results. */
359 srci
= (void *) srcp
;
361 /* Make SRCP aligned by rounding it down to the beginning of the op_t
362 it points in the middle of. */
371 b2
= ((op_t
*) srcp
)[2];
372 b1
= a1
= ((op_t
*) srcp
)[1];
378 b3
= ((op_t
*) srcp
)[2];
379 b2
= a2
= ((op_t
*) srcp
)[1];
383 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
387 b0
= ((op_t
*) srcp
)[2];
388 b3
= a3
= ((op_t
*) srcp
)[1];
393 b1
= ((op_t
*) srcp
)[2];
394 b0
= a0
= ((op_t
*) srcp
)[1];
396 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
398 goto do4
; /* No-op. */
404 b3
= a3
= ((op_t
*) srcp
)[0];
405 a0
= DBLALIGN (a0
, b1
, srci
);
406 ((op_t
*) dstp
)[0] = a0
;
410 b2
= a2
= ((op_t
*) srcp
)[0];
411 a3
= DBLALIGN (a3
, b0
, srci
);
412 ((op_t
*) dstp
)[0] = a3
;
416 b1
= a1
= ((op_t
*) srcp
)[0];
417 a2
= DBLALIGN (a2
, b3
, srci
);
418 ((op_t
*) dstp
)[0] = a2
;
422 b0
= a0
= ((op_t
*) srcp
)[0];
423 a1
= DBLALIGN (a1
, b2
, srci
);
424 ((op_t
*) dstp
)[0] = a1
;
432 /* This is the right position for do0. Please don't move
435 a0
= DBLALIGN (a0
, b1
, srci
);
436 ((op_t
*) dstp
)[0] = a0
;