1 /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
20 /* Implements the functions
22 char * [r3] strncpy (char *dst [r3], const char *src [r4], size_t n [r5])
26 char * [r3] stpncpy (char *dst [r3], const char *src [r4], size_t n [r5])
28 The algorithm is as follows:
29 > if src and dest are 8 byte aligned, perform double word copy
31 > copy byte by byte on unaligned addresses.
33 The aligned comparison are made using cmpb instructions. */
35 /* The focus on optimization for performance improvements are as follows:
36 1. data alignment [gain from aligned memory access on read/write]
37 2. POWER7 gains performance with loop unrolling/unwinding
38 [gain by reduction of branch penalty].
39 3. The final pad with null bytes is done by calling an optimized
43 # define FUNC_NAME __stpncpy
45 # define FUNC_NAME strncpy
48 #define FRAMESIZE (FRAME_MIN_SIZE+32)
51 /* For builds with no IFUNC support, local calls should be made to internal
52 GLIBC symbol (created by libc_hidden_builtin_def). */
54 # define MEMSET __GI_memset
56 # define MEMSET memset
61 EALIGN(FUNC_NAME, 4, 0)
64 mflr r0 /* load link register LR to r0 */
65 or r10, r3, r4 /* to verify source and destination */
66 rldicl. r8, r10, 0, 61 /* is double word aligned .. ? */
68 std r19, -8(r1) /* save callers register , r19 */
69 std r18, -16(r1) /* save callers register , r18 */
70 std r0, 16(r1) /* store the link register */
71 stdu r1, -FRAMESIZE(r1) /* create the stack frame */
73 mr r9, r3 /* save r3 into r9 for use */
74 mr r18, r3 /* save r3 for retCode of strncpy */
78 srdi r11, r5, 3 /* compute count for CTR ; count = n/8 */
79 cmpldi cr7, r11, 3 /* if count > 4 ; perform unrolling 4 times */
82 ld r10, 0(r4) /* load doubleWord from src */
83 cmpb r8, r10, r8 /* compare src with NULL ,we read just now */
84 cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */
87 std r10, 0(r3) /* copy doubleword at offset=0 */
88 ld r10, 8(r4) /* load next doubleword from offset=8 */
89 cmpb r8, r10, r8 /* compare src with NULL , we read just now */
90 cmpdi cr7, r8, 0 /* if cmpb returned NULL ; we continue */
105 ld r8, 24(r4) /* load dword,perform loop unrolling again */
110 std r8, 24(r7) /* copy dword at offset=24 */
113 bdz L(leftDwords) /* continue with loop on counter */
126 mr r6, r4 /* update values */
132 std r10, 8(r9) /* copy dword at offset=8 */
138 beq cr7, L(dWordUnroll)
140 addi r9, r9, 16 /* increment dst by 16 */
141 addi r4, r4, 16 /* increment src by 16 */
142 addi r5, r5, -16 /* decrement length 'n' by 16 */
143 addi r0, r11, -2 /* decrement loop counter */
146 ld r10, 0(r4) /* load first dword */
147 li r8, 0 /* load mask */
150 bne cr7, L(byte_by_byte)
156 L(loadDWordandCompare):
160 bne cr7, L(byte_by_byte)
167 bdnz L(loadDWordandCompare)
171 ble cr7, L(verifyByte)
179 lbz r10, 1(r4) /* load byte from src */
180 cmpdi cr7, r10, 0 /* compare for NULL */
181 stb r10, 1(r19) /* store byte to dst */
182 beq cr7, L(updtDestComputeN2ndByte)
184 addi r4, r4, 4 /* advance src */
186 lbz r10, -2(r4) /* perform loop unrolling for byte r/w */
189 beq cr7, L(updtDestComputeN3rdByte)
191 lbz r10, -1(r4) /* perform loop unrolling for byte r/w */
195 beq cr7, L(ComputeNByte)
200 lbz r10, 0(r4) /* perform loop unrolling for byte r/w */
203 bne cr7, L(bytes_unroll)
207 subf r9, r19, r9 /* compute 'n'n bytes to fill */
211 cmpdi cr7, r8, 0 /* compare if length is zero */
212 beq cr7, L(update3return)
214 mr r3, r19 /* fill buffer with */
215 li r4, 0 /* zero fill buffer */
216 mr r5, r8 /* how many bytes to fill buffer with */
217 bl MEMSET /* call optimized memset */
221 #ifdef USE_AS_STPNCPY
222 addi r3, r19, -1 /* update return value */
226 #ifndef USE_AS_STPNCPY
227 mr r3, r18 /* set return value */
229 addi r1, r1, FRAMESIZE /* restore stack pointer */
230 ld r0, 16(r1) /* read the saved link register */
231 ld r18, -16(r1) /* restore callers save register, r18 */
232 ld r19, -8(r1) /* restore callers save register, r19 */
233 mtlr r0 /* branch to link register */
242 rldicl. r8, r5, 0, 62
243 #ifdef USE_AS_STPNCPY
246 beq cr0, L(hop2return)
257 lbzu r10, 1(r4) /* copy byte */
267 addi r1, r1, FRAMESIZE /* restore stack pointer */
268 #ifdef USE_AS_STPNCPY
269 mr r3, r19 /* set the return value */
271 mr r3, r18 /* set the return value */
273 ld r0, 16(r1) /* read the saved link register */
274 ld r18, -16(r1) /* restore callers save register, r18 */
275 ld r19, -8(r1) /* restore callers save register, r19 */
276 mtlr r0 /* branch to link register */
287 bne cr7, L(dWordUnrollOFF)
291 L(updtDestComputeN2ndByte):
292 addi r19, r19, 2 /* update dst by 2 */
293 subf r9, r19, r9 /* compute distance covered */
298 L(updtDestComputeN3rdByte):
299 addi r19, r19, 3 /* update dst by 3 */
300 subf r9, r19, r9 /* compute distance covered */
306 addi r9, r9, 24 /* increment dst by 24 */
307 addi r4, r4, 24 /* increment src by 24 */
308 addi r5, r5, -24 /* decrement length 'n' by 24 */
309 addi r0, r11, -3 /* decrement loop counter */
319 addi r9, r7, 40 /* increment dst by 40 */
320 addi r4, r6, 40 /* increment src by 40 */
321 addi r5, r5, -40 /* decrement length 'n' by 40 */
322 addi r0, r11, -5 /* decrement loop counter */
330 addi r9, r3, 8 /* increment dst by 8 */
331 addi r4, r4, 8 /* increment src by 8 */
332 addi r5, r5, -8 /* decrement length 'n' by 8 */
333 addi r0, r11, -1 /* decrement loop counter */
337 cmpdi r5, 16 /* Proceed byte by byte for less than 16 */
341 cmpdi r6, 0 /* Check src alignment */
342 beq L(srcaligndstunalign)
343 /* src is unaligned */
344 rlwinm r10, r4, 3,26,28 /* Calculate padding. */
345 clrrdi r4, r4, 3 /* Align the addr to dw boundary */
346 ld r8, 0(r4) /* Load doubleword from memory. */
348 /* Discard bits not part of the string */
349 #ifdef __LITTLE_ENDIAN__
354 cmpb r0, r7, r0 /* Compare each byte against null */
355 /* Discard bits not part of the string */
356 #ifdef __LITTLE_ENDIAN__
362 bne L(bytebybyte) /* if it has null, copy byte by byte */
364 rlwinm r12, r3, 3,26,28 /* Calculate padding in bits. */
365 rldicl r9, r3, 0, 61 /* Calculate padding in bytes. */
368 cmpdi r12, 0 /* check dest alignment */
369 beq L(srcunaligndstalign)
371 /* both src and dst unaligned */
372 #ifdef __LITTLE_ENDIAN__
375 addi r11, r11, -8 /* Adjust byte pointer on loaded dw */
380 /* dst alignment is greater then src alignment? */
382 ble cr7, L(dst_align_small)
383 /* src alignment is less than dst */
385 /* Calculate the dst alignment difference */
389 /* Write until dst is aligned */
391 blt L(storebyte1) /* less than 4, store byte by byte */
392 beq L(equal1) /* if its 4, store word */
393 addi r0, r7, -4 /* greater than 4, so stb and stw */
396 #ifdef __LITTLE_ENDIAN__
397 addi r11, r11, 8 /* Adjust byte pointer on loaded dw */
406 subfic r7, r9, 8 /* Check the remaining bytes */
412 #ifdef __LITTLE_ENDIAN__
413 addi r11, r11, 8 /* Adjust byte pointer on loaded dw */
426 /* calculate the Left over bytes to be written */
429 subf r12, r12, r11 /* remaining bytes on second dw */
430 subfic r10, r12, 64 /* remaining bytes on first dw */
432 subf r6, r9, r6 /* recalculate padding */
433 L(srcunaligndstalign):
435 subfic r12, r10, 64 /* remaining bytes on second dw */
443 /* Write until src is aligned */
445 #ifdef __LITTLE_ENDIAN__
446 addi r11, r11, 8 /* Adjust byte pointer on dw */
455 addi r4, r4, 8 /* Increment src pointer */
456 addi r3, r3, 1 /* Increment dst pointer */
461 rldicl r6, r3, 0, 61 /* Recalculate padding */
465 L(srcaligndstunalign):
471 li r0, 0 /* Check null */
474 bne L(byte_by_byte) /* Do byte by byte if there is NULL */
475 rlwinm r12, r3, 3,26,28 /* Calculate padding */
477 /* write byte by byte until aligned */
478 #ifdef __LITTLE_ENDIAN__
490 #ifdef __LITTLE_ENDIAN__
491 addi r11, r11, 8 /* Adjust byte pointer on dw */
505 #ifdef __LITTLE_ENDIAN__
518 addi r4, r4, 8 /* Increment src pointer */
521 /* dst addr aligned to 8 */
525 ld r7, 0(r4) /* load next dw */
527 cmpdi r0, 0 /* check for null on each new dw */
529 #ifdef __LITTLE_ENDIAN__
530 srd r9, r8, r10 /* bytes from first dw */
531 sld r11, r7, r12 /* bytes from second dw */
536 or r11, r9, r11 /* make as a single dw */
537 std r11, 0(r3) /* store as std on aligned addr */
538 mr r8, r7 /* still few bytes left to be written */
539 addi r3, r3, 8 /* increment dst addr */
540 addi r4, r4, 8 /* increment src addr */
542 b L(storedouble) /* Loop until NULL */
546 /* We've hit the end of the string. Do the rest byte-by-byte. */
551 #ifdef __LITTLE_ENDIAN__
560 /* we can still use stw if leftover >= 4 */
561 #ifdef __LITTLE_ENDIAN__
577 #ifdef __LITTLE_ENDIAN__
584 /* remaining byte by byte part of first dw */
586 #ifdef __LITTLE_ENDIAN__
599 /* remaining byte by byte part of second dw */
604 #ifdef __LITTLE_ENDIAN__
605 extrdi. r0, r7, 8, 56
611 extrdi. r0, r7, 8, 48
617 extrdi. r0, r7, 8, 40
623 extrdi. r0, r7, 8, 32
629 extrdi. r0, r7, 8, 24
635 extrdi. r0, r7, 8, 16
664 extrdi. r0, r7, 8, 16
670 extrdi. r0, r7, 8, 24
676 extrdi. r0, r7, 8, 32
682 extrdi. r0, r7, 8, 40
688 extrdi. r0, r7, 8, 48
699 #ifdef USE_AS_STPNCPY
712 #ifndef USE_AS_STPNCPY
713 libc_hidden_builtin_def (strncpy)