1 /* Optimized strcasecmp implementation for PowerPC64.
2 Copyright (C) 2016-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include <locale-defines.h>
22 /* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */
24 #ifndef USE_AS_STRNCASECMP
25 # define __STRCASECMP __strcasecmp
26 # define STRCASECMP strcasecmp
28 # define __STRCASECMP __strncasecmp
29 # define STRCASECMP strncasecmp
31 /* Convert 16 bytes to lowercase and compare */
35 vcmpgtub v8, v8, v2; \
36 vsel v4, v7, v4, v8; \
39 vcmpgtub v8, v8, v2; \
40 vsel v5, v7, v5, v8; \
44 * Get 16 bytes for unaligned case.
45 * reg1: Vector to hold next 16 bytes.
46 * reg2: Address to read from.
47 * reg3: Permute control vector.
48 * v8: Tmp vector used to mask unwanted bytes.
49 * v9: Tmp vector,0 when null is found on first 16 bytes
51 #ifdef __LITTLE_ENDIAN__
52 #define GET16BYTES(reg1, reg2, reg3) \
55 vperm v8, v8, reg1, reg3; \
56 vcmpequb. v8, v0, v8; \
65 vperm reg1, v9, reg1, reg3;
67 #define GET16BYTES(reg1, reg2, reg3) \
70 vperm v8, reg1, v8, reg3; \
71 vcmpequb. v8, v0, v8; \
80 vperm reg1, reg1, v9, reg3;
83 /* Check null in v4, v5 and convert to lower. */
84 #define CHECKNULLANDCONVERT() \
85 vcmpequb. v7, v0, v5; \
87 vcmpequb. v7, v0, v4; \
97 #ifdef USE_AS_STRNCASECMP
102 #define rRTN r3 /* Return value */
103 #define rSTR1 r10 /* 1st string */
104 #define rSTR2 r4 /* 2nd string */
105 #define rCHAR1 r6 /* Byte read from 1st string */
106 #define rCHAR2 r7 /* Byte read from 2nd string */
107 #define rADDR1 r8 /* Address of tolower(rCHAR1) */
108 #define rADDR2 r12 /* Address of tolower(rCHAR2) */
109 #define rLWR1 r8 /* Word tolower(rCHAR1) */
110 #define rLWR2 r12 /* Word tolower(rCHAR2) */
112 #define rLOC r11 /* Default locale address */
114 cmpd cr7, rRTN, rSTR2
116 /* Get locale address. */
117 ld rTMP, __libc_tsd_LOCALE@got@tprel(r2)
118 add rLOC, rTMP, __libc_tsd_LOCALE@tls
124 #ifdef USE_AS_STRNCASECMP
128 blt cr7, L(bytebybyte)
132 /* Check for null in initial characters.
133 Check max of 16 char depending on the alignment.
134 If null is present, proceed byte by byte. */
136 #ifdef __LITTLE_ENDIAN__
137 lvsr v10, 0, rSTR1 /* Compute mask. */
138 vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */
141 vperm v9, v4, v8, v10
143 vcmpequb. v9, v0, v9 /* Check for null bytes. */
144 bne cr6, L(bytebybyte)
146 /* Calculate alignment. */
147 #ifdef __LITTLE_ENDIAN__
149 vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */
154 vcmpequb. v9, v0, v9 /* Check for null bytes. */
155 bne cr6, L(bytebybyte)
156 /* Check if locale has non ascii characters. */
158 addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
161 beq cr7, L(bytebybyte)
163 /* Load vector registers with values used for TOLOWER. */
164 /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */
173 andi. rADDR1, rSTR1, 0xF
177 /* Compute 16 bytes from previous two loads. */
178 #ifdef __LITTLE_ENDIAN__
179 vperm v4, v9, v4, v10
181 vperm v4, v4, v9, v10
184 andi. rADDR2, rSTR2, 0xF
188 /* Compute 16 bytes from previous two loads. */
189 #ifdef __LITTLE_ENDIAN__
195 CHECKNULLANDCONVERT()
202 #ifdef USE_AS_STRNCASECMP
207 andi. rADDR2, rSTR2, 0xF
208 addi rSTR1, rSTR1, -16
209 addi rSTR2, rSTR2, -16
211 #ifdef __LITTLE_ENDIAN__
216 /* There are 2 loops depending on the input alignment.
217 Each loop gets 16 bytes from s1 and s2, check for null,
218 convert to lowercase and compare. Loop till difference
221 addi rSTR1, rSTR1, 16
222 addi rSTR2, rSTR2, 16
223 #ifdef USE_AS_STRNCASECMP
225 blt cr7, L(bytebybyte)
229 GET16BYTES(v5, rSTR2, v6)
230 CHECKNULLANDCONVERT()
235 addi rSTR1, rSTR1, 16
236 addi rSTR2, rSTR2, 16
237 #ifdef USE_AS_STRNCASECMP
239 blt cr7, L(bytebybyte)
244 CHECKNULLANDCONVERT()
247 /* Calculate and return the difference. */
251 #ifdef __LITTLE_ENDIAN__
252 /* Count trailing zero. */
261 /* Count leading zero. */
273 #ifdef __LITTLE_ENDIAN__
274 /* Shift registers based on leading zero count. */
277 /* Merge and move to GPR. */
281 /* Place the characters that are different in first position. */
283 srdi rSTR2, rSTR2, 56
285 srdi rSTR1, rSTR1, 56
292 sldi rSTR2, rSTR2, 56
293 srdi rSTR2, rSTR2, 56
296 subf rRTN, rSTR1, rSTR2
301 /* OK. We've hit the end of the string. We need to be careful that
302 we don't compare two strings as different because of junk beyond
303 the end of the strings... */
306 #ifdef __LITTLE_ENDIAN__
307 /* Count trailing zero. */
313 vcmpequb. v6, v6, v10
316 /* Count leading zero. */
319 vcmpequb. v6, v6, v10
328 /* Calculate shift count based on count of zero. */
331 vsldoi v9, v0, v10, 1
336 /* Shift and remove junk after null character. */
337 #ifdef __LITTLE_ENDIAN__
344 /* Convert and compare 16 bytes. */
354 /* Unrolling loop for POWER: loads are done with 'lbz' plus
355 offset and string descriptors are only updated in the end
356 of loop unrolling. */
357 ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
358 lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
359 lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
360 #ifdef USE_AS_STRNCASECMP
361 rldicl rTMP, r5, 62, 2
363 beq cr7, L(lessthan4)
367 cmpdi rCHAR1, 0 /* *s1 == '\0' ? */
368 sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */
369 sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */
370 lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */
371 lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */
372 cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */
373 crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */
378 sldi rADDR1, rCHAR1, 2
379 sldi rADDR2, rCHAR2, 2
380 lwzx rLWR1, rLOC, rADDR1
381 lwzx rLWR2, rLOC, rADDR2
382 cmpw cr1, rLWR1, rLWR2
383 crorc 4*cr1+eq,eq,4*cr1+eq
388 sldi rADDR1, rCHAR1, 2
389 sldi rADDR2, rCHAR2, 2
390 lwzx rLWR1, rLOC, rADDR1
391 lwzx rLWR2, rLOC, rADDR2
392 cmpw cr1, rLWR1, rLWR2
393 crorc 4*cr1+eq,eq,4*cr1+eq
398 /* Increment both string descriptors */
401 sldi rADDR1, rCHAR1, 2
402 sldi rADDR2, rCHAR2, 2
403 lwzx rLWR1, rLOC, rADDR1
404 lwzx rLWR2, rLOC, rADDR2
405 cmpw cr1, rLWR1, rLWR2
406 crorc 4*cr1+eq,eq,4*cr1+eq
408 lbz rCHAR1, 0(rSTR1) /* Load char from s1 */
409 lbz rCHAR2, 0(rSTR2) /* Load char from s2 */
410 #ifdef USE_AS_STRNCASECMP
415 #ifdef USE_AS_STRNCASECMP
423 sldi rADDR1, rCHAR1, 2
424 sldi rADDR2, rCHAR2, 2
425 lwzx rLWR1, rLOC, rADDR1
426 lwzx rLWR2, rLOC, rADDR2
427 cmpw cr1, rLWR1, rLWR2
428 crorc 4*cr1+eq,eq,4*cr1+eq
437 subf r0, rLWR2, rLWR1
442 weak_alias (__STRCASECMP, STRCASECMP)
443 libc_hidden_builtin_def (__STRCASECMP)