1 /* wordcopy.c -- subroutines for memory copy functions. Tile version.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 /* To optimize for tile, we make the following changes from the
20 default glibc version:
21 - Use the double align instruction instead of the MERGE macro.
22 - Since we don't have offset addressing mode, make sure the loads /
23 stores in the inner loop always have indices of 0.
24 - Use post-increment addresses in the inner loops, which yields
27 /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
32 /* Provide the appropriate dblalign builtin to shift two registers
33 based on the alignment of a pointer held in a third register. */
34 #define DBLALIGN __insn_dblalign
36 /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
37 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
38 Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
41 _wordcopy_fwd_aligned (long int dstp
, long int srcp
, size_t len
)
48 a0
= ((op_t
*) srcp
)[0];
53 a1
= ((op_t
*) srcp
)[0];
58 a0
= ((op_t
*) srcp
)[0];
63 a1
= ((op_t
*) srcp
)[0];
68 a0
= ((op_t
*) srcp
)[0];
73 a1
= ((op_t
*) srcp
)[0];
79 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
81 a0
= ((op_t
*) srcp
)[0];
85 a1
= ((op_t
*) srcp
)[0];
88 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
90 goto do8
; /* No-op. */
96 a0
= ((op_t
*) srcp
)[0];
97 ((op_t
*) dstp
)[0] = a1
;
101 a1
= ((op_t
*) srcp
)[0];
102 ((op_t
*) dstp
)[0] = a0
;
106 a0
= ((op_t
*) srcp
)[0];
107 ((op_t
*) dstp
)[0] = a1
;
111 a1
= ((op_t
*) srcp
)[0];
112 ((op_t
*) dstp
)[0] = a0
;
116 a0
= ((op_t
*) srcp
)[0];
117 ((op_t
*) dstp
)[0] = a1
;
121 a1
= ((op_t
*) srcp
)[0];
122 ((op_t
*) dstp
)[0] = a0
;
126 a0
= ((op_t
*) srcp
)[0];
127 ((op_t
*) dstp
)[0] = a1
;
131 a1
= ((op_t
*) srcp
)[0];
132 ((op_t
*) dstp
)[0] = a0
;
140 /* This is the right position for do0. Please don't move
143 ((op_t
*) dstp
)[0] = a1
;
146 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
147 block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
148 DSTP should be aligned for memory operations on `op_t's, but SRCP must
152 _wordcopy_fwd_dest_aligned (long int dstp
, long int srcp
, size_t len
)
157 /* Save the initial source pointer so we know the number of bytes to
158 shift for merging two unaligned results. */
159 srci
= (void *) srcp
;
161 /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
162 it points in the middle of. */
168 a1
= ((op_t
*) srcp
)[0];
169 a2
= ((op_t
*) srcp
)[1];
174 a0
= ((op_t
*) srcp
)[0];
175 a1
= ((op_t
*) srcp
)[1];
180 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
182 a3
= ((op_t
*) srcp
)[0];
183 a0
= ((op_t
*) srcp
)[1];
188 a2
= ((op_t
*) srcp
)[0];
189 a3
= ((op_t
*) srcp
)[1];
192 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
194 goto do4
; /* No-op. */
200 a0
= ((op_t
*) srcp
)[0];
201 a2
= DBLALIGN (a2
, a3
, srci
);
202 ((op_t
*) dstp
)[0] = a2
;
206 a1
= ((op_t
*) srcp
)[0];
207 a3
= DBLALIGN (a3
, a0
, srci
);
208 ((op_t
*) dstp
)[0] = a3
;
212 a2
= ((op_t
*) srcp
)[0];
213 a0
= DBLALIGN (a0
, a1
, srci
);
214 ((op_t
*) dstp
)[0] = a0
;
218 a3
= ((op_t
*) srcp
)[0];
219 a1
= DBLALIGN (a1
, a2
, srci
);
220 ((op_t
*) dstp
)[0] = a1
;
227 /* This is the right position for do0. Please don't move
230 ((op_t
*) dstp
)[0] = DBLALIGN (a2
, a3
, srci
);
233 /* _wordcopy_bwd_aligned -- Copy block finishing right before
234 SRCP to block finishing right before DSTP with LEN `op_t' words
235 (not LEN bytes!). Both SRCP and DSTP should be aligned for memory
236 operations on `op_t's. */
239 _wordcopy_bwd_aligned (long int dstp
, long int srcp
, size_t len
)
244 srcp1
= srcp
- 1 * OPSIZ
;
251 a0
= ((op_t
*) srcp1
)[0];
255 a1
= ((op_t
*) srcp1
)[0];
259 a0
= ((op_t
*) srcp1
)[0];
263 a1
= ((op_t
*) srcp1
)[0];
267 a0
= ((op_t
*) srcp1
)[0];
271 a1
= ((op_t
*) srcp1
)[0];
276 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
278 a0
= ((op_t
*) srcp1
)[0];
281 a1
= ((op_t
*) srcp1
)[0];
283 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
285 goto do8
; /* No-op. */
291 a0
= ((op_t
*) srcp
)[0];
292 ((op_t
*) dstp
)[0] = a1
;
296 a1
= ((op_t
*) srcp
)[0];
297 ((op_t
*) dstp
)[0] = a0
;
301 a0
= ((op_t
*) srcp
)[0];
302 ((op_t
*) dstp
)[0] = a1
;
306 a1
= ((op_t
*) srcp
)[0];
307 ((op_t
*) dstp
)[0] = a0
;
311 a0
= ((op_t
*) srcp
)[0];
312 ((op_t
*) dstp
)[0] = a1
;
316 a1
= ((op_t
*) srcp
)[0];
317 ((op_t
*) dstp
)[0] = a0
;
321 a0
= ((op_t
*) srcp
)[0];
322 ((op_t
*) dstp
)[0] = a1
;
326 a1
= ((op_t
*) srcp
)[0];
327 ((op_t
*) dstp
)[0] = a0
;
335 /* This is the right position for do0. Please don't move
338 ((op_t
*) dstp
)[0] = a1
;
341 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
342 before SRCP to block finishing right before DSTP with LEN `op_t'
343 words (not LEN bytes!). DSTP should be aligned for memory
344 operations on `op_t', but SRCP must *not* be aligned. */
347 _wordcopy_bwd_dest_aligned (long int dstp
, long int srcp
, size_t len
)
353 /* Save the initial source pointer so we know the number of bytes to
354 shift for merging two unaligned results. */
355 srci
= (void *) srcp
;
357 /* Make SRCP aligned by rounding it down to the beginning of the op_t
358 it points in the middle of. */
367 b2
= ((op_t
*) srcp
)[2];
368 b1
= a1
= ((op_t
*) srcp
)[1];
374 b3
= ((op_t
*) srcp
)[2];
375 b2
= a2
= ((op_t
*) srcp
)[1];
379 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
383 b0
= ((op_t
*) srcp
)[2];
384 b3
= a3
= ((op_t
*) srcp
)[1];
389 b1
= ((op_t
*) srcp
)[2];
390 b0
= a0
= ((op_t
*) srcp
)[1];
392 if (OP_T_THRES
<= 3 * OPSIZ
&& len
== 0)
394 goto do4
; /* No-op. */
400 b3
= a3
= ((op_t
*) srcp
)[0];
401 a0
= DBLALIGN (a0
, b1
, srci
);
402 ((op_t
*) dstp
)[0] = a0
;
406 b2
= a2
= ((op_t
*) srcp
)[0];
407 a3
= DBLALIGN (a3
, b0
, srci
);
408 ((op_t
*) dstp
)[0] = a3
;
412 b1
= a1
= ((op_t
*) srcp
)[0];
413 a2
= DBLALIGN (a2
, b3
, srci
);
414 ((op_t
*) dstp
)[0] = a2
;
418 b0
= a0
= ((op_t
*) srcp
)[0];
419 a1
= DBLALIGN (a1
, b2
, srci
);
420 ((op_t
*) dstp
)[0] = a1
;
428 /* This is the right position for do0. Please don't move
431 a0
= DBLALIGN (a0
, b1
, srci
);
432 ((op_t
*) dstp
)[0] = a0
;