1 /* Optimized strstr implementation for PowerPC64/POWER7.
2 Copyright (C) 2015-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
21 /* Char * [r3] strstr (char *s [r3], char * pat[r4]) */
23 /* The performance gain is obtained using aligned memory access, load
24 * doubleword and usage of cmpb instruction for quicker comparison. */
29 /* For builds with no IFUNC support, local calls should be made to internal
30 GLIBC symbol (created by libc_hidden_builtin_def). */
32 # define STRLEN __GI_strlen
34 # define STRLEN strlen
39 /* For builds with no IFUNC support, local calls should be made to internal
40 GLIBC symbol (created by libc_hidden_builtin_def). */
42 # define STRNLEN __GI_strnlen
44 # define STRNLEN __strnlen
50 # define STRCHR __GI_strchr
52 # define STRCHR strchr
56 #define FRAMESIZE (FRAME_MIN_SIZE+32)
60 mflr r0 /* Load link register LR to r0. */
61 std r31, -8(r1) /* Save callers register r31. */
62 std r30, -16(r1) /* Save callers register r30. */
63 std r29, -24(r1) /* Save callers register r29. */
64 std r28, -32(r1) /* Save callers register r28. */
65 std r0, 16(r1) /* Store the link register. */
71 stdu r1, -FRAMESIZE(r1) /* Create the stack frame. */
72 cfi_adjust_cfa_offset(FRAMESIZE)
87 cmpdi cr7, r3, 0 /* If search str is null. */
96 cmpd cr7, r3, r31 /* If len(r3) < len(r4). */
104 /* If first char of search str is not present. */
107 /* Reg r28 is used to count the number of iterations. */
109 rldicl r8, r3, 0, 52 /* Page cross check. */
110 cmpldi cr7, r8, 4096-16
111 bgt cr7, L(bytebybyte)
113 rldicl r8, r30, 0, 52
114 cmpldi cr7, r8, 4096-16
115 bgt cr7, L(bytebybyte)
117 /* If len(r4) < 8 handle in a different way. */
118 /* Shift position based on null and use cmpb. */
120 blt cr7, L(lessthan8)
122 /* Len(r4) >= 8 reaches here. */
123 mr r8, r3 /* Save r3 for future use. */
124 mr r4, r30 /* Restore r4. */
126 rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */
127 clrrdi r4, r4, 3 /* Make r4 aligned to 8. */
130 cmpdi cr7, r10, 0 /* Check if its already aligned? */
132 #ifdef __LITTLE_ENDIAN__
133 srd r6, r6, r10 /* Discard unwanted bits. */
139 #ifdef __LITTLE_ENDIAN__
140 sld r9, r9, r10 /* Discard unwanted bits. */
144 or r6, r6, r9 /* Form complete search str. */
147 rlwinm r10, r3, 3, 26, 28
150 cmpb r9, r0, r6 /* Check if input has null. */
153 cmpb r9, r0, r5 /* Check if input has null. */
154 #ifdef __LITTLE_ENDIAN__
162 li r12, -8 /* Shift values. */
163 li r11, 72 /* Shift values. */
165 beq cr7, L(nextbyte1)
171 ldu r7, 8(r3) /* Load next dw. */
172 addi r12, r12, 8 /* Shift one byte and compare. */
174 #ifdef __LITTLE_ENDIAN__
175 srd r9, r5, r12 /* Rotate based on mask. */
181 /* Form single dw from few bytes on first load and second load. */
183 /* Check for null in the formed dw. */
187 /* Cmpb search str and input str. */
196 /* There is a match of 8 bytes, check next bytes. */
199 /* Update next starting point r8. */
206 rlwinm r10, r30, 3, 26, 28 /* Calculate padding in bits. */
209 cmpdi cr7, r10, 0 /* Check if its already aligned? */
211 #ifdef __LITTLE_ENDIAN__
212 srd r6, r6, r10 /* Discard unwanted bits. */
224 #ifdef __LITTLE_ENDIAN__
225 sld r9, r9, r10 /* Discard unwanted bits. */
229 or r6, r6, r9 /* Form complete search str. */
239 #ifdef __LITTLE_ENDIAN__
246 /* Form single dw with few bytes from first and second load. */
251 /* Check for null in the formed dw. */
255 /* If the next 8 bytes dont match, start search again. */
259 /* If the next 8 bytes match, load and compare next 8. */
264 /* Start the search again. */
270 /* Count leading zeros and compare partial dw. */
271 #ifdef __LITTLE_ENDIAN__
287 /* Start search again if there is no match. */
289 /* If the words match, update return values. */
298 /* Count leading zeros and compare partial dw. */
299 #ifdef __LITTLE_ENDIAN__
327 /* When our iterations exceed ITERATIONS,fall back to default. */
329 cmpdi cr7, r28, ITERATIONS
334 /* If first char of search str is not present. */
338 mr r4, r30 /* Restore r4. */
345 /* Handle less than 8 search string. */
352 rlwinm r10, r9, 3, 26, 28 /* Calculate padding in bits. */
353 srdi r8, r10, 3 /* Padding in bytes. */
354 clrrdi r9, r9, 3 /* Make r4 aligned to 8. */
356 cmpdi cr7, r10, 0 /* Check if its already aligned? */
358 #ifdef __LITTLE_ENDIAN__
359 srd r6, r6, r10 /* Discard unwanted bits. */
364 cmpd cr7, r8, r31 /* Next load needed? */
368 #ifdef __LITTLE_ENDIAN__
369 sld r7, r7, r10 /* Discard unwanted bits. */
373 or r6, r6, r7 /* Form complete search str. */
376 rlwinm r10, r3, 3, 26, 28
377 clrrdi r7, r3, 3 /* Make r3 aligned. */
381 #ifdef __LITTLE_ENDIAN__
404 li r12, -8 /* Shift values. */
405 li r11, 72 /* Shift values. */
407 addi r12, r12, 8 /* Mask for rotation. */
409 #ifdef __LITTLE_ENDIAN__
430 /* When our iterations exceed ITERATIONS,fall back to default. */
432 cmpdi cr7, r28, ITERATIONS
437 /* If first char of search str is not present. */
447 /* Reached null in r3, so skip next load. */
453 /* Update return values. */
458 /* Handling byte by byte. */
483 /* Handling return values. */
486 subf r3, r31, r3 /* Reduce len of r4 from r3. */
491 mr r3, r29 /* Return r3. */
496 li r3, 0 /* Return NULL. */
507 addi r1, r1, FRAMESIZE /* Restore stack pointer. */
508 cfi_adjust_cfa_offset(-FRAMESIZE)
509 ld r0, 16(r1) /* Restore the saved link register. */
510 ld r28, -32(r1) /* Restore callers save register r28. */
511 ld r29, -24(r1) /* Restore callers save register r29. */
512 ld r30, -16(r1) /* Restore callers save register r30. */
513 ld r31, -8(r1) /* Restore callers save register r31. */
514 mtlr r0 /* Branch to link register. */
517 libc_hidden_builtin_def (strstr)