1 /* Optimized strcpy/stpcpy implementation for PowerPC64/POWER7.
2 Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
21 /* Implements the function
23 char * [r3] strcpy (char *dest [r3], const char *src [r4])
27 char * [r3] strcpy (char *dest [r3], const char *src [r4])
29 if USE_AS_STPCPY is defined. It tries to use aligned memory accesses
30 when possible using the following algorithm:
32 if (((((uintptr_t)dst & 0x7UL) == 0) && ((uintptr_t)src & 0x7UL) == 0))
33 goto aligned_doubleword_copy;
34 if (((uintptr_t)dst & 0x7UL) == ((uintptr_t)src & 0x7UL))
38 The aligned comparison are made using cmpb instructions. */
41 # define FUNC_NAME __stpcpy
43 # define FUNC_NAME strcpy
47 EALIGN (FUNC_NAME, 4, 0)
52 #define rRTN r3 /* pointer to previous word/doubleword in dest */
54 #define rRTN r12 /* pointer to previous word/doubleword in dest */
56 #define rSRC r4 /* pointer to previous word/doubleword in src */
57 #define rMASK r5 /* mask 0xffffffff | 0xffffffffffffffff */
58 #define rWORD r6 /* current word from src */
59 #define rALT r7 /* alternate word from src */
60 #define rRTNAL r8 /* alignment of return pointer */
61 #define rSRCAL r9 /* alignment of source pointer */
62 #define rALCNT r10 /* bytes to read to reach 8 bytes alignment */
63 #define rSUBAL r11 /* doubleword minus unaligned displacement */
66 /* Save the dst pointer to use as return value. */
70 clrldi. rTMP, rTMP, 61
71 bne L(check_alignment)
72 b L(aligned_doubleword_copy)
76 rldicl rRTNAL, rRTN, 0, 61
77 rldicl rSRCAL, rSRC, 0, 61
78 cmpld cr7, rSRCAL, rRTNAL
79 beq cr7, L(same_alignment)
84 /* Src and dst with same alignment: align both to doubleword. */
87 subfic rSUBAL, rRTNAL, 8
94 add rALCNT, rALCNT, rSUBAL
95 subf rALCNT, rRTN, rALCNT
96 addi rALCNT, rALCNT, 1
105 stb rWORD, -1(rALCNT)
111 b L(aligned_doubleword_copy)
117 /* For doubleword aligned memory, operate using doubleword load and stores. */
119 L(aligned_doubleword_copy):
126 L(g0): ldu rALT, 8(rSRC)
128 cmpb rTMP, rALT, rMASK
133 L(g2): cmpb rTMP, rWORD, rMASK
134 cmpdi rTMP, 0 /* If rTMP is 0, no null's have been found. */
138 /* We've hit the end of the string. Do the rest byte-by-byte. */
140 #ifdef __LITTLE_ENDIAN__
141 extrdi. rTMP, rALT, 8, 56
144 extrdi. rTMP, rALT, 8, 48
147 extrdi. rTMP, rALT, 8, 40
150 extrdi. rTMP, rALT, 8, 32
153 extrdi. rTMP, rALT, 8, 24
156 extrdi. rTMP, rALT, 8, 16
159 extrdi. rTMP, rALT, 8, 8
162 extrdi rTMP, rALT, 8, 0
165 extrdi. rTMP, rALT, 8, 0
168 extrdi. rTMP, rALT, 8, 8
171 extrdi. rTMP, rALT, 8, 16
174 extrdi. rTMP, rALT, 8, 24
177 extrdi. rTMP, rALT, 8, 32
180 extrdi. rTMP, rALT, 8, 40
183 extrdi. rTMP, rALT, 8, 48
192 cmpdi rSRCAL, 0 /* Check src alignment */
193 beq L(srcaligndstunalign)
194 /* src is unaligned */
195 rlwinm r10, rSRC, 3,26,28 /* Calculate padding. */
196 clrrdi rSRC, rSRC, 3 /* Align the addr to dw boundary */
197 ld rWORD, 0(rSRC) /* Load doubleword from memory. */
199 /* Discard bits not part of the string */
200 #ifdef __LITTLE_ENDIAN__
205 cmpb rTMP, rALT, rTMP /* Compare each byte against null */
206 /* Discard bits not part of the string */
207 #ifdef __LITTLE_ENDIAN__
213 bne L(bytebybyte) /* if it has null, copy byte by byte */
215 rlwinm r5, rRTN, 3,26,28 /* Calculate padding in bits. */
216 rldicl r9, rRTN, 0, 61 /* Calculate padding in bytes. */
219 cmpdi r5, 0 /* check dest alignment */
220 beq L(srcunaligndstalign)
222 /* both src and dst unaligned */
223 #ifdef __LITTLE_ENDIAN__
226 addi r11, r11, -8 /* Adjust byte pointer on loaded dw */
231 /* dst alignment is greater then src alignment? */
233 blt cr7, L(dst_align_small)
234 /* src alignment is less than dst */
236 /* Calculate the dst alignment differnce */
240 /* Write till dst is aligned */
242 blt L(storebyte1) /* less than 4, store byte by byte */
243 beq L(equal1) /* if its 4, store word */
244 addi rTMP, rALT, -4 /* greater than 4, so stb and stw */
247 #ifdef __LITTLE_ENDIAN__
248 addi r11, r11, 8 /* Adjust byte pointer on loaded dw */
256 subfic rALT, r9, 8 /* Check the remaining bytes */
262 #ifdef __LITTLE_ENDIAN__
263 addi r11, r11, 8 /* Adjust byte pointer on loaded dw */
275 /* calculate the Left over bytes to be written */
278 subf r5, r5, r11 /* remaining bytes on second dw */
279 subfic r10, r5, 64 /* remaining bytes on first dw */
281 subf r8, r9, r8 /* recalculate padding */
282 L(srcunaligndstalign):
284 subfic r5, r10, 64 /* remaining bytes on second dw */
292 /* Write till src is aligned */
294 #ifdef __LITTLE_ENDIAN__
295 addi r11, r11, 8 /* Adjust byte pointer on dw */
303 addi rSRC, rSRC, 8 /* Increment src pointer */
304 addi rRTN, rRTN, 1 /* Increment dst pointer */
305 rldicl r8, rRTN, 0, 61 /* Recalculate padding */
308 L(srcaligndstunalign):
311 li rTMP, 0 /* Check null */
312 cmpb rTMP, rWORD, rTMP
314 bne L(bytebybyte) /* Do byte by byte if there is NULL */
315 rlwinm r5, rRTN, 3,26,28 /* Calculate padding */
318 /* write byte by byte till aligned */
319 #ifdef __LITTLE_ENDIAN__
331 #ifdef __LITTLE_ENDIAN__
332 addi r11, r11, 8 /* Adjust byte pointer on dw */
345 #ifdef __LITTLE_ENDIAN__
357 addi rSRC, rSRC, 8 /* Increment src pointer */
360 /* dst addr aligned to 8 */
362 ld rALT, 0(rSRC) /* load next dw */
363 cmpb rTMP, rALT, rTMP
364 cmpdi rTMP, 0 /* check for null on each new dw */
366 #ifdef __LITTLE_ENDIAN__
367 srd r9, rWORD, r10 /* bytes from first dw */
368 sld r11, rALT, r5 /* bytes from second dw */
373 or r11, r9, r11 /* make as a single dw */
374 std r11, 0(rRTN) /* store as std on aligned addr */
375 mr rWORD, rALT /* still few bytes left to be written */
376 addi rRTN, rRTN, 8 /* increment dst addr */
377 addi rSRC, rSRC, 8 /* increment src addr */
378 b L(storedouble) /* Loop till NULL */
382 /* We've hit the end of the string. Do the rest byte-by-byte. */
387 #ifdef __LITTLE_ENDIAN__
394 /* we can still use stw if leftover >= 4*/
395 #ifdef __LITTLE_ENDIAN__
408 #ifdef __LITTLE_ENDIAN__
415 /* remaining byte by byte part of first dw */
417 #ifdef __LITTLE_ENDIAN__
428 /* remaining byte by byte part of second dw */
435 #ifndef USE_AS_STPCPY
436 libc_hidden_builtin_def (strcpy)