1 /* Optimized strncpy implementation for POWER9 LE.
2 Copyright (C) 2020-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
23 # define FUNC_NAME __stpncpy
25 # define FUNC_NAME STPNCPY
29 # define FUNC_NAME strncpy
31 # define FUNC_NAME STRNCPY
33 #endif /* !USE_AS_STPNCPY */
36 /* For builds without IFUNC support, local calls should be made to internal
37 GLIBC symbol (created by libc_hidden_builtin_def). */
39 # define MEMSET_is_local
40 # define MEMSET __GI_memset
42 # define MEMSET memset
46 #define FRAMESIZE (FRAME_MIN_SIZE+8)
48 /* Implements the function
50 char * [r3] strncpy (char *dest [r3], const char *src [r4], size_t n [r5])
54 char * [r3] stpncpy (char *dest [r3], const char *src [r4], size_t n [r5])
56 if USE_AS_STPNCPY is defined.
58 The implementation can load bytes past a null terminator, but only
59 up to the next 16-byte aligned address, so it never crosses a page. */
62 #ifdef MEMSET_is_local
63 ENTRY_TOCLESS (FUNC_NAME, 4)
69 /* NULL string optimizations */
77 vspltisb v18,0 /* Zeroes in v18 */
81 /* Empty/1-byte string optimization */
85 /* Compute pointer to last byte copied into dest. */
95 rldicl r9,r7,0,60 /* How many bytes to get source 16B aligned? */
97 /* Get source 16B aligned */
102 vcmpequb v6,v0,v18 /* 0xff if byte is NULL, 0x00 otherwise */
103 vctzlsbb r7,v6 /* Number of trailing zeroes */
104 addi r8,r7,1 /* Add null terminator */
106 /* r8 = bytes including null
107 r9 = bytes to get source 16B aligned
109 no null, copy r9 bytes
111 there is a null, copy r8 bytes and return. */
115 cmpld cr6,r8,r5 /* r8 <= n? */
118 sldi r10,r5,56 /* stxvl wants size in top 8 bits */
119 stxvl 32+v0,r11,r10 /* Partial store */
121 #ifdef USE_AS_STPNCPY
122 /* Compute pointer to last byte copied into dest. */
128 sldi r10,r8,56 /* stxvl wants size in top 8 bits */
129 stxvl 32+v0,r11,r10 /* Partial store */
131 #ifdef USE_AS_STPNCPY
132 /* Compute pointer to last byte copied into dest. */
140 cmpld r9,r5 /* Check if length was reached. */
143 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
144 stxvl 32+v0,r11,r10 /* Partial store */
151 cmpldi cr6,r5,64 /* Check if length was reached. */
152 ble cr6,L(final_loop)
155 vcmpequb. v6,v0,v18 /* Any zero bytes? */
156 bne cr6,L(prep_tail1)
159 vcmpequb. v6,v1,v18 /* Any zero bytes? */
160 bne cr6,L(prep_tail2)
163 vcmpequb. v6,v2,v18 /* Any zero bytes? */
164 bne cr6,L(prep_tail3)
167 vcmpequb. v6,v3,v18 /* Any zero bytes? */
168 bne cr6,L(prep_tail4)
184 vcmpequb. v6,v0,v18 /* Any zero bytes? */
185 ble cr5,L(prep_n_tail1)
186 bne cr6,L(count_tail1)
191 vcmpequb. v6,v1,v18 /* Any zero bytes? */
192 ble cr5,L(prep_n_tail2)
193 bne cr6,L(count_tail2)
198 vcmpequb. v6,v2,v18 /* Any zero bytes? */
199 ble cr5,L(prep_n_tail3)
200 bne cr6,L(count_tail3)
204 vcmpequb. v6,v3,v18 /* Any zero bytes? */
207 vctzlsbb r8,v6 /* Number of trailing zeroes */
208 cmpld r8,r5 /* r8 < n? */
215 sldi r10,r5,56 /* stxvl wants size in top 8 bits */
216 addi r11,r11,48 /* Offset */
217 stxvl 32+v3,r11,r10 /* Partial store */
218 #ifdef USE_AS_STPNCPY
219 /* Compute pointer to last byte copied into dest. */
225 beq cr6,L(n_tail1) /* Any zero bytes? */
226 vctzlsbb r8,v6 /* Number of trailing zeroes */
227 cmpld r8,r5 /* r8 < n? */
231 sldi r10,r5,56 /* stxvl wants size in top 8 bits */
232 stxvl 32+v0,r11,r10 /* Partial store */
233 #ifdef USE_AS_STPNCPY
234 /* Compute pointer to last byte copied into dest. */
240 beq cr6,L(n_tail2) /* Any zero bytes? */
241 vctzlsbb r8,v6 /* Number of trailing zeroes */
242 cmpld r8,r5 /* r8 < n? */
247 sldi r10,r5,56 /* stxvl wants size in top 8 bits */
248 addi r11,r11,16 /* offset */
249 stxvl 32+v1,r11,r10 /* Partial store */
250 #ifdef USE_AS_STPNCPY
251 /* Compute pointer to last byte copied into dest. */
257 beq cr6,L(n_tail3) /* Any zero bytes? */
258 vctzlsbb r8,v6 /* Number of trailing zeroes */
259 cmpld r8,r5 /* r8 < n? */
265 sldi r10,r5,56 /* stxvl wants size in top 8 bits */
266 addi r11,r11,32 /* Offset */
267 stxvl 32+v2,r11,r10 /* Partial store */
268 #ifdef USE_AS_STPNCPY
269 /* Compute pointer to last byte copied into dest. */
276 vctzlsbb r8,v6 /* Number of trailing zeroes */
278 addi r9,r8,1 /* Add null terminator */
279 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
280 stxvl 32+v0,r11,r10 /* Partial store */
281 #ifdef USE_AS_STPNCPY
282 /* Compute pointer to last byte copied into dest. */
292 vctzlsbb r8,v6 /* Number of trailing zeroes */
294 addi r9,r8,1 /* Add null terminator */
296 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
297 addi r11,r11,16 /* offset */
298 stxvl 32+v1,r11,r10 /* Partial store */
299 #ifdef USE_AS_STPNCPY
300 /* Compute pointer to last byte copied into dest. */
310 vctzlsbb r8,v6 /* Number of trailing zeroes */
312 addi r9,r8,1 /* Add null terminator */
315 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
316 addi r11,r11,32 /* offset */
317 stxvl 32+v2,r11,r10 /* Partial store */
318 #ifdef USE_AS_STPNCPY
319 /* Compute pointer to last byte copied into dest. */
328 vctzlsbb r8,v6 /* Number of trailing zeroes */
330 addi r9,r8,1 /* Add null terminator */
334 sldi r10,r9,56 /* stxvl wants size in top 8 bits */
335 addi r11,r11,48 /* offset */
336 stxvl 32+v3,r11,r10 /* Partial store */
337 #ifdef USE_AS_STPNCPY
338 /* Compute pointer to last byte copied into dest. */
344 /* This code pads the remainder of dest with NULL bytes. For large numbers
345 memset gives a better performance, 255 was chosen through experimentation.
349 bge L(zero_padding_memset)
351 L(zero_padding_loop):
352 cmpldi cr6,r5,16 /* Check if length was reached. */
353 ble cr6,L(zero_padding_end)
359 b L(zero_padding_loop)
362 sldi r10,r5,56 /* stxvl wants size in top 8 bits */
363 stxvl 32+v18,r11,r10 /* Partial store */
367 L(zero_padding_memset):
368 std r30,-8(r1) /* Save r30 on the stack. */
370 mr r30,r3 /* Save the return value of strncpy. */
371 /* Prepare the call to memset. */
372 mr r3,r11 /* Pointer to the area to be zero-filled. */
373 li r4,0 /* Byte to be written (zero). */
375 /* We delayed the creation of the stack frame, as well as the saving of
376 the link register, because only at this point, we are sure that
377 doing so is actually needed. */
379 /* Save the link register. */
383 /* Create the stack frame. */
384 stdu r1,-FRAMESIZE(r1)
385 cfi_adjust_cfa_offset(FRAMESIZE)
389 #ifndef MEMSET_is_local
393 ld r0,FRAMESIZE+16(r1)
395 mr r3,r30 /* Restore the return value of strncpy, i.e.:
396 dest. For stpncpy, the return value is the
397 same as return value of memset. */
398 ld r30,FRAMESIZE-8(r1) /* Restore r30. */
399 /* Restore the stack frame. */
401 cfi_adjust_cfa_offset(-FRAMESIZE)
402 /* Restore the link register. */
408 #ifndef USE_AS_STPNCPY
409 libc_hidden_builtin_def (strncpy)