1 /* Optimized strstr implementation for PowerPC64/POWER7.
2 Copyright (C) 2015-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
21 /* Char * [r3] strstr (char *s [r3], char * pat[r4]) */
23 /* The performance gain is obtained using aligned memory access, load
24 * doubleword and usage of cmpb instruction for quicker comparison. */
29 # define STRSTR strstr
33 /* For builds with no IFUNC support, local calls should be made to internal
34 GLIBC symbol (created by libc_hidden_builtin_def). */
36 # define STRLEN __GI_strlen
37 # define STRLEN_is_local
39 # define STRLEN strlen
44 /* For builds with no IFUNC support, local calls should be made to internal
45 GLIBC symbol (created by libc_hidden_builtin_def). */
47 # define STRNLEN __GI_strnlen
48 # define STRNLEN_is_local
50 # define STRNLEN __strnlen
56 # define STRCHR __GI_strchr
57 # define STRCHR_is_local
59 # define STRCHR strchr
63 #define FRAMESIZE (FRAME_MIN_SIZE+32)
65 /* Can't be ENTRY_TOCLESS due to calling __strstr_ppc which uses r2. */
68 mflr r0 /* Load link register LR to r0. */
69 std r31, -8(r1) /* Save callers register r31. */
70 std r30, -16(r1) /* Save callers register r30. */
71 std r29, -24(r1) /* Save callers register r29. */
72 std r28, -32(r1) /* Save callers register r28. */
73 std r0, 16(r1) /* Store the link register. */
79 stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */
80 cfi_adjust_cfa_offset(FRAMESIZE)
93 #ifndef STRLEN_is_local
97 cmpdi cr7, r3, 0 /* If search str is null. */
104 #ifndef STRNLEN_is_local
108 cmpd cr7, r3, r31 /* If len(r3) < len(r4). */
113 #ifndef STRCHR_is_local
118 /* If first char of search str is not present. */
121 /* Reg r28 is used to count the number of iterations. */
123 rldicl r8, r3, 0, 52 /* Page cross check. */
124 cmpldi cr7, r8, 4096-16
125 bgt cr7, L(bytebybyte)
127 rldicl r8, r30, 0, 52
128 cmpldi cr7, r8, 4096-16
129 bgt cr7, L(bytebybyte)
131 /* If len(r4) < 8 handle in a different way. */
132 /* Shift position based on null and use cmpb. */
134 blt cr7, L(lessthan8)
136 /* Len(r4) >= 8 reaches here. */
137 mr r8, r3 /* Save r3 for future use. */
138 mr r4, r30 /* Restore r4. */
140 rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */
141 clrrdi r4, r4, 3 /* Make r4 aligned to 8. */
144 cmpdi cr7, r10, 0 /* Check if its already aligned? */
146 #ifdef __LITTLE_ENDIAN__
147 srd r6, r6, r10 /* Discard unwanted bits. */
153 #ifdef __LITTLE_ENDIAN__
154 sld r9, r9, r10 /* Discard unwanted bits. */
158 or r6, r6, r9 /* Form complete search str. */
161 rlwinm r10, r3, 3, 26, 28
164 cmpb r9, r0, r6 /* Check if input has null. */
167 cmpb r9, r0, r5 /* Check if input has null. */
168 #ifdef __LITTLE_ENDIAN__
176 li r12, -8 /* Shift values. */
177 li r11, 72 /* Shift values. */
179 beq cr7, L(nextbyte1)
185 ldu r7, 8(r3) /* Load next dw. */
186 addi r12, r12, 8 /* Shift one byte and compare. */
188 #ifdef __LITTLE_ENDIAN__
189 srd r9, r5, r12 /* Rotate based on mask. */
195 /* Form single dw from few bytes on first load and second load. */
197 /* Check for null in the formed dw. */
201 /* Cmpb search str and input str. */
210 /* There is a match of 8 bytes, check next bytes. */
213 /* Update next starting point r8. */
220 rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */
223 cmpdi cr7, r10, 0 /* Check if its already aligned? */
225 #ifdef __LITTLE_ENDIAN__
226 srd r6, r6, r10 /* Discard unwanted bits. */
238 #ifdef __LITTLE_ENDIAN__
239 sld r9, r9, r10 /* Discard unwanted bits. */
243 or r6, r6, r9 /* Form complete search str. */
253 #ifdef __LITTLE_ENDIAN__
260 /* Form single dw with few bytes from first and second load. */
265 /* Check for null in the formed dw. */
269 /* If the next 8 bytes dont match, start search again. */
273 /* If the next 8 bytes match, load and compare next 8. */
278 /* Start the search again. */
284 /* Count leading zeros and compare partial dw. */
285 #ifdef __LITTLE_ENDIAN__
301 /* Start search again if there is no match. */
303 /* If the words match, update return values. */
312 /* Count leading zeros and compare partial dw. */
313 #ifdef __LITTLE_ENDIAN__
341 /* When our iterations exceed ITERATIONS,fall back to default. */
343 cmpdi cr7, r28, ITERATIONS
347 #ifndef STRCHR_is_local
350 /* If first char of search str is not present. */
354 mr r4, r30 /* Restore r4. */
361 /* Handle less than 8 search string. */
368 rlwinm r10, r9, 3, 26, 28 /* Calculate padding in bits. */
369 srdi r8, r10, 3 /* Padding in bytes. */
370 clrrdi r9, r9, 3 /* Make r4 aligned to 8. */
372 cmpdi cr7, r10, 0 /* Check if its already aligned? */
374 #ifdef __LITTLE_ENDIAN__
375 srd r6, r6, r10 /* Discard unwanted bits. */
380 cmpd cr7, r8, r31 /* Next load needed? */
384 #ifdef __LITTLE_ENDIAN__
385 sld r7, r7, r10 /* Discard unwanted bits. */
389 or r6, r6, r7 /* Form complete search str. */
392 rlwinm r10, r3, 3, 26, 28
393 clrrdi r7, r3, 3 /* Make r3 aligned. */
397 #ifdef __LITTLE_ENDIAN__
420 li r12, -8 /* Shift values. */
421 li r11, 72 /* Shift values. */
423 addi r12, r12, 8 /* Mask for rotation. */
425 #ifdef __LITTLE_ENDIAN__
446 /* When our iterations exceed ITERATIONS,fall back to default. */
448 cmpdi cr7, r28, ITERATIONS
452 #ifndef STRCHR_is_local
455 /* If first char of search str is not present. */
465 /* Reached null in r3, so skip next load. */
471 /* Update return values. */
476 /* Handling byte by byte. */
501 /* Handling return values. */
504 subf r3, r31, r3 /* Reduce len of r4 from r3. */
509 mr r3, r29 /* Return r3. */
514 li r3, 0 /* Return NULL. */
525 addi r1, r1, FRAMESIZE /* Restore stack pointer. */
526 cfi_adjust_cfa_offset(-FRAMESIZE)
527 ld r0, 16(r1) /* Restore the saved link register. */
528 ld r28, -32(r1) /* Restore callers save register r28. */
529 ld r29, -24(r1) /* Restore callers save register r29. */
530 ld r30, -16(r1) /* Restore callers save register r30. */
531 ld r31, -8(r1) /* Restore callers save register r31. */
532 mtlr r0 /* Branch to link register. */
535 libc_hidden_builtin_def (strstr)