1 /* Optimized memcpy implementation for PowerPC64.
2 Copyright (C) 2003 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
27 Memcpy handles short copies (< 32-bytes) using an unaligned
28 word lwz/stw loop. The tail (remaining 1-3) bytes is handled with the
29 appropriate combination of byte and halfword load/stores. There is no
30 attempt to optimize the alignment of short moves. The 64-bit
31 implementations of POWER3 and POWER4 do a reasonable job of handling
32 unligned load/stores that do not cross 32-byte boundries.
34 Longer moves (>= 32-bytes) justify the effort to get at least the
35 destination doubleword (8-byte) aligned. Further optimization is
36 posible when both source and destination are doubleword aligned.
37 Each case has a optimized unrolled loop. */
39 EALIGN (BP_SYM (memcpy), 5, 0)
51 /* Move 0-7 bytes as needed to get the destination doubleword alligned. */
70 /* Copy doublewords from source to destination, assumpting the
71 destination is aligned on a doubleword boundary.
73 First verify that there is > 7 bytes to copy and check if the source
74 is also doubleword aligned. If there are < 8 bytes to copy fall
75 through to the tail byte copy code. Otherwise if the source and
76 destination are both doubleword aligned use an optimized doubleword
77 copy loop. Otherwise the source has a different alignment and we use
78 a load, shift, store strategy. */
81 ble- cr6,.L2 /* less than 8 bytes left. */
82 bne- 0,.L6 /* Source is not DW aligned. */
87 /* Move doublewords where destination and source are aligned.
88 Use a unrolled loop to copy 4 doubleword (32-bytes) per iteration.
89 If the remainder is >0 and < 32 bytes copy 1-3 doublewords. */
133 /* Copy the tail for up to 31 bytes. If this is the tail of a longer
134 copy then the destination will be aligned and the length will be
135 less than 8. So it is normally not worth the set-up overhead to
136 get doubleword aligned and do doubleword load/store. */
161 /* Return original dst pointer. */
172 /* Copy doublewords where the destination is aligned but the source is
173 not. Use aligned doubleword loads from the source, shifted to realign
174 the data, to allow aligned destination stores. */
210 END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS)
211 libc_hidden_builtin_def (memcpy)