1 /* Highly optimized version for x86-64.
2 Copyright (C) 1999-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Based on i686 version contributed by Ulrich Drepper
5 <drepper@cygnus.com>, 1999.
6 Updated with SSE2 support contributed by Intel Corporation.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
23 #include "asm-syntax.h"
25 #undef UPDATE_STRNCMP_COUNTER
32 /* The simplified code below is not set up to handle strncmp() so far.
33 Should this become necessary it has to be implemented. For now
34 just report the problem. */
36 # error "strncmp not implemented so far"
39 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
40 if the new counter > the old one or is 0. */
41 # define UPDATE_STRNCMP_COUNTER \
42 /* calculate left number to compare */ \
43 lea -16(%rcx, %r11), %r9; \
45 jb LABEL(strcmp_exitz); \
47 je LABEL(strcmp_exitz); \
50 #elif defined USE_AS_STRCASECMP_L
51 # include "locale-defines.h"
53 /* No support for strcasecmp outside libc so far since it is not needed. */
55 # error "strcasecmp_l not implemented so far"
58 # define UPDATE_STRNCMP_COUNTER
59 #elif defined USE_AS_STRNCASECMP_L
60 # include "locale-defines.h"
62 /* No support for strncasecmp outside libc so far since it is not needed. */
64 # error "strncasecmp_l not implemented so far"
67 # define UPDATE_STRNCMP_COUNTER \
68 /* calculate left number to compare */ \
69 lea -16(%rcx, %r11), %r9; \
71 jb LABEL(strcmp_exitz); \
73 je LABEL(strcmp_exitz); \
76 # define UPDATE_STRNCMP_COUNTER
78 # define STRCMP strcmp
85 .section .text.ssse3,"ax",@progbits
88 #ifdef USE_AS_STRCASECMP_L
90 # define ENTRY2(name) ENTRY (name)
91 # define END2(name) END (name)
95 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
96 mov %fs:(%rax),%RDX_LP
98 // XXX 5 byte should be before the function
100 .byte 0x0f,0x1f,0x44,0x00,0x00
102 # ifndef NO_NOLOCALE_ALIAS
103 weak_alias (__strcasecmp, strcasecmp)
104 libc_hidden_def (__strcasecmp)
106 /* FALLTHROUGH to strcasecmp_l. */
107 #elif defined USE_AS_STRNCASECMP_L
109 # define ENTRY2(name) ENTRY (name)
110 # define END2(name) END (name)
113 ENTRY2 (__strncasecmp)
114 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
115 mov %fs:(%rax),%RCX_LP
117 // XXX 5 byte should be before the function
119 .byte 0x0f,0x1f,0x44,0x00,0x00
121 # ifndef NO_NOLOCALE_ALIAS
122 weak_alias (__strncasecmp, strncasecmp)
123 libc_hidden_def (__strncasecmp)
125 /* FALLTHROUGH to strncasecmp_l. */
130 /* Simple version since we can't use SSE registers in ld.so. */
131 L(oop): movb (%rdi), %al
142 L(neq): movl $1, %eax
147 #else /* !IS_IN (libc) */
148 # ifdef USE_AS_STRCASECMP_L
149 /* We have to fall back on the C implementation for locales
150 with encodings not matching ASCII for single bytes. */
151 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
152 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP
156 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
157 jne __strcasecmp_l_nonascii
158 # elif defined USE_AS_STRNCASECMP_L
159 /* We have to fall back on the C implementation for locales
160 with encodings not matching ASCII for single bytes. */
161 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
162 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP
166 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
167 jne __strncasecmp_l_nonascii
171 * This implementation uses SSE to compare up to 16 bytes at a time.
173 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
175 je LABEL(strcmp_exitz)
182 /* Use 64bit AND here to avoid long NOP padding. */
183 and $0x3f, %rcx /* rsi alignment in cache line */
184 and $0x3f, %rax /* rdi alignment in cache line */
185 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
186 .section .rodata.cst16,"aM",@progbits,16
189 .quad 0x4040404040404040
190 .quad 0x4040404040404040
192 .quad 0x5b5b5b5b5b5b5b5b
193 .quad 0x5b5b5b5b5b5b5b5b
195 .quad 0x2020202020202020
196 .quad 0x2020202020202020
198 movdqa .Lbelowupper(%rip), %xmm5
199 # define UCLOW_reg %xmm5
200 movdqa .Ltopupper(%rip), %xmm6
201 # define UCHIGH_reg %xmm6
202 movdqa .Ltouppermask(%rip), %xmm7
203 # define LCQWORD_reg %xmm7
206 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
208 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
211 movhpd 8(%rdi), %xmm1
212 movhpd 8(%rsi), %xmm2
213 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
214 # define TOLOWER(reg1, reg2) \
215 movdqa reg1, %xmm8; \
216 movdqa UCHIGH_reg, %xmm9; \
217 movdqa reg2, %xmm10; \
218 movdqa UCHIGH_reg, %xmm11; \
219 pcmpgtb UCLOW_reg, %xmm8; \
220 pcmpgtb reg1, %xmm9; \
221 pcmpgtb UCLOW_reg, %xmm10; \
222 pcmpgtb reg2, %xmm11; \
224 pand %xmm11, %xmm10; \
225 pand LCQWORD_reg, %xmm8; \
226 pand LCQWORD_reg, %xmm10; \
229 TOLOWER (%xmm1, %xmm2)
231 # define TOLOWER(reg1, reg2)
233 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
234 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
235 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
236 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
238 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
239 jnz LABEL(less16bytes) /* If not, find different value or null char */
240 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
242 jbe LABEL(strcmp_exitz) /* finish comparision */
244 add $16, %rsi /* prepare to search next 16 bytes */
245 add $16, %rdi /* prepare to search next 16 bytes */
248 * Determine source and destination string offsets from 16-byte alignment.
249 * Use relative offset difference between the two to determine which case
254 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
255 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
256 mov $0xffff, %edx /* for equivalent offset */
258 and $0xf, %ecx /* offset of rsi */
259 and $0xf, %eax /* offset of rdi */
261 je LABEL(ashr_0) /* rsi and rdi relative offset same */
263 mov %edx, %r8d /* r8d is offset flag for exit tail */
269 lea LABEL(unaligned_table)(%rip), %r10
270 movslq (%r10, %r9,4), %r9
271 lea (%r10, %r9), %r10
272 jmp *%r10 /* jump to corresponding case */
275 * The following cases will be handled by ashr_0
276 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
277 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
283 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
284 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
285 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
286 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
289 TOLOWER (%xmm1, %xmm2)
290 pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
292 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
294 shr %cl, %edx /* adjust 0xffff for offset */
295 shr %cl, %r9d /* adjust for 16-byte offset */
298 * edx must be the same with r9d if in left byte (16-rcx) is equal to
299 * the start from (16-rax) and no null char was seen.
301 jne LABEL(less32bytes) /* mismatch or null char */
302 UPDATE_STRNCMP_COUNTER
305 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
308 * Now both strings are aligned at 16-byte boundary. Loop over strings
309 * checking 32-bytes per iteration.
313 movdqa (%rsi, %rcx), %xmm1
314 movdqa (%rdi, %rcx), %xmm2
315 TOLOWER (%xmm1, %xmm2)
322 jnz LABEL(exit) /* mismatch or null char seen */
324 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
326 jbe LABEL(strcmp_exitz)
329 movdqa (%rsi, %rcx), %xmm1
330 movdqa (%rdi, %rcx), %xmm2
331 TOLOWER (%xmm1, %xmm2)
339 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
341 jbe LABEL(strcmp_exitz)
344 jmp LABEL(loop_ashr_0)
347 * The following cases will be handled by ashr_1
348 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
349 * n(15) n -15 0(15 +(n-15) - n) ashr_1
356 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
357 pslldq $15, %xmm2 /* shift first string to align with second */
358 TOLOWER (%xmm1, %xmm2)
359 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
360 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
362 shr %cl, %edx /* adjust 0xffff for offset */
363 shr %cl, %r9d /* adjust for 16-byte offset */
365 jnz LABEL(less32bytes) /* mismatch or null char seen */
367 UPDATE_STRNCMP_COUNTER
370 mov $16, %rcx /* index for loads*/
371 mov $1, %r9d /* byte position left over from less32bytes case */
373 * Setup %r10 value allows us to detect crossing a page boundary.
374 * When %r10 goes positive we have crossed a page boundary and
375 * need to do a nibble.
378 and $0xfff, %r10 /* offset into 4K page */
379 sub $0x1000, %r10 /* subtract 4K pagesize */
384 jg LABEL(nibble_ashr_1) /* cross page boundary */
386 LABEL(gobble_ashr_1):
387 movdqa (%rsi, %rcx), %xmm1
388 movdqa (%rdi, %rcx), %xmm2
389 movdqa %xmm2, %xmm4 /* store for next cycle */
394 por %xmm3, %xmm2 /* merge into one 16byte value */
396 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
398 TOLOWER (%xmm1, %xmm2)
407 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
409 jbe LABEL(strcmp_exitz)
415 jg LABEL(nibble_ashr_1) /* cross page boundary */
417 movdqa (%rsi, %rcx), %xmm1
418 movdqa (%rdi, %rcx), %xmm2
419 movdqa %xmm2, %xmm4 /* store for next cycle */
424 por %xmm3, %xmm2 /* merge into one 16byte value */
426 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
428 TOLOWER (%xmm1, %xmm2)
437 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
439 jbe LABEL(strcmp_exitz)
443 jmp LABEL(loop_ashr_1)
446 * Nibble avoids loads across page boundary. This is to avoid a potential
447 * access into unmapped memory.
450 LABEL(nibble_ashr_1):
451 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
454 jnz LABEL(ashr_1_exittail) /* find null char*/
456 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
458 jbe LABEL(ashr_1_exittail)
462 sub $0x1000, %r10 /* substract 4K from %r10 */
463 jmp LABEL(gobble_ashr_1)
466 * Once find null char, determine if there is a string mismatch
467 * before the null char.
470 LABEL(ashr_1_exittail):
471 movdqa (%rsi, %rcx), %xmm1
477 * The following cases will be handled by ashr_2
478 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
479 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
488 TOLOWER (%xmm1, %xmm2)
495 jnz LABEL(less32bytes)
497 UPDATE_STRNCMP_COUNTER
500 mov $16, %rcx /* index for loads */
501 mov $2, %r9d /* byte position left over from less32bytes case */
503 * Setup %r10 value allows us to detect crossing a page boundary.
504 * When %r10 goes positive we have crossed a page boundary and
505 * need to do a nibble.
508 and $0xfff, %r10 /* offset into 4K page */
509 sub $0x1000, %r10 /* subtract 4K pagesize */
514 jg LABEL(nibble_ashr_2)
516 LABEL(gobble_ashr_2):
517 movdqa (%rsi, %rcx), %xmm1
518 movdqa (%rdi, %rcx), %xmm2
524 por %xmm3, %xmm2 /* merge into one 16byte value */
526 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
528 TOLOWER (%xmm1, %xmm2)
537 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
539 jbe LABEL(strcmp_exitz)
546 jg LABEL(nibble_ashr_2) /* cross page boundary */
548 movdqa (%rsi, %rcx), %xmm1
549 movdqa (%rdi, %rcx), %xmm2
555 por %xmm3, %xmm2 /* merge into one 16byte value */
557 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
559 TOLOWER (%xmm1, %xmm2)
568 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
570 jbe LABEL(strcmp_exitz)
575 jmp LABEL(loop_ashr_2)
578 LABEL(nibble_ashr_2):
579 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
582 jnz LABEL(ashr_2_exittail)
584 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
586 jbe LABEL(ashr_2_exittail)
591 jmp LABEL(gobble_ashr_2)
594 LABEL(ashr_2_exittail):
595 movdqa (%rsi, %rcx), %xmm1
601 * The following cases will be handled by ashr_3
602 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
603 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
612 TOLOWER (%xmm1, %xmm2)
619 jnz LABEL(less32bytes)
622 UPDATE_STRNCMP_COUNTER
625 mov $16, %rcx /* index for loads */
626 mov $3, %r9d /* byte position left over from less32bytes case */
628 * Setup %r10 value allows us to detect crossing a page boundary.
629 * When %r10 goes positive we have crossed a page boundary and
630 * need to do a nibble.
633 and $0xfff, %r10 /* offset into 4K page */
634 sub $0x1000, %r10 /* subtract 4K pagesize */
639 jg LABEL(nibble_ashr_3)
641 LABEL(gobble_ashr_3):
642 movdqa (%rsi, %rcx), %xmm1
643 movdqa (%rdi, %rcx), %xmm2
649 por %xmm3, %xmm2 /* merge into one 16byte value */
651 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
653 TOLOWER (%xmm1, %xmm2)
662 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
664 jbe LABEL(strcmp_exitz)
671 jg LABEL(nibble_ashr_3) /* cross page boundary */
673 movdqa (%rsi, %rcx), %xmm1
674 movdqa (%rdi, %rcx), %xmm2
680 por %xmm3, %xmm2 /* merge into one 16byte value */
682 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
684 TOLOWER (%xmm1, %xmm2)
693 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
695 jbe LABEL(strcmp_exitz)
700 jmp LABEL(loop_ashr_3)
703 LABEL(nibble_ashr_3):
704 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
707 jnz LABEL(ashr_3_exittail)
709 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
711 jbe LABEL(ashr_3_exittail)
716 jmp LABEL(gobble_ashr_3)
719 LABEL(ashr_3_exittail):
720 movdqa (%rsi, %rcx), %xmm1
726 * The following cases will be handled by ashr_4
727 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
728 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
737 TOLOWER (%xmm1, %xmm2)
744 jnz LABEL(less32bytes)
747 UPDATE_STRNCMP_COUNTER
750 mov $16, %rcx /* index for loads */
751 mov $4, %r9d /* byte position left over from less32bytes case */
753 * Setup %r10 value allows us to detect crossing a page boundary.
754 * When %r10 goes positive we have crossed a page boundary and
755 * need to do a nibble.
758 and $0xfff, %r10 /* offset into 4K page */
759 sub $0x1000, %r10 /* subtract 4K pagesize */
764 jg LABEL(nibble_ashr_4)
766 LABEL(gobble_ashr_4):
767 movdqa (%rsi, %rcx), %xmm1
768 movdqa (%rdi, %rcx), %xmm2
774 por %xmm3, %xmm2 /* merge into one 16byte value */
776 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
778 TOLOWER (%xmm1, %xmm2)
787 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
789 jbe LABEL(strcmp_exitz)
796 jg LABEL(nibble_ashr_4) /* cross page boundary */
798 movdqa (%rsi, %rcx), %xmm1
799 movdqa (%rdi, %rcx), %xmm2
805 por %xmm3, %xmm2 /* merge into one 16byte value */
807 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
809 TOLOWER (%xmm1, %xmm2)
818 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
820 jbe LABEL(strcmp_exitz)
825 jmp LABEL(loop_ashr_4)
828 LABEL(nibble_ashr_4):
829 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
832 jnz LABEL(ashr_4_exittail)
834 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
836 jbe LABEL(ashr_4_exittail)
841 jmp LABEL(gobble_ashr_4)
844 LABEL(ashr_4_exittail):
845 movdqa (%rsi, %rcx), %xmm1
851 * The following cases will be handled by ashr_5
852 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
853 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
862 TOLOWER (%xmm1, %xmm2)
869 jnz LABEL(less32bytes)
872 UPDATE_STRNCMP_COUNTER
875 mov $16, %rcx /* index for loads */
876 mov $5, %r9d /* byte position left over from less32bytes case */
878 * Setup %r10 value allows us to detect crossing a page boundary.
879 * When %r10 goes positive we have crossed a page boundary and
880 * need to do a nibble.
883 and $0xfff, %r10 /* offset into 4K page */
884 sub $0x1000, %r10 /* subtract 4K pagesize */
889 jg LABEL(nibble_ashr_5)
891 LABEL(gobble_ashr_5):
892 movdqa (%rsi, %rcx), %xmm1
893 movdqa (%rdi, %rcx), %xmm2
899 por %xmm3, %xmm2 /* merge into one 16byte value */
901 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
903 TOLOWER (%xmm1, %xmm2)
912 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
914 jbe LABEL(strcmp_exitz)
921 jg LABEL(nibble_ashr_5) /* cross page boundary */
923 movdqa (%rsi, %rcx), %xmm1
924 movdqa (%rdi, %rcx), %xmm2
930 por %xmm3, %xmm2 /* merge into one 16byte value */
932 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
934 TOLOWER (%xmm1, %xmm2)
943 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
945 jbe LABEL(strcmp_exitz)
950 jmp LABEL(loop_ashr_5)
953 LABEL(nibble_ashr_5):
954 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
957 jnz LABEL(ashr_5_exittail)
959 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
961 jbe LABEL(ashr_5_exittail)
966 jmp LABEL(gobble_ashr_5)
969 LABEL(ashr_5_exittail):
970 movdqa (%rsi, %rcx), %xmm1
976 * The following cases will be handled by ashr_6
977 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
978 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
987 TOLOWER (%xmm1, %xmm2)
994 jnz LABEL(less32bytes)
997 UPDATE_STRNCMP_COUNTER
1000 mov $16, %rcx /* index for loads */
1001 mov $6, %r9d /* byte position left over from less32bytes case */
1003 * Setup %r10 value allows us to detect crossing a page boundary.
1004 * When %r10 goes positive we have crossed a page boundary and
1005 * need to do a nibble.
1008 and $0xfff, %r10 /* offset into 4K page */
1009 sub $0x1000, %r10 /* subtract 4K pagesize */
1014 jg LABEL(nibble_ashr_6)
1016 LABEL(gobble_ashr_6):
1017 movdqa (%rsi, %rcx), %xmm1
1018 movdqa (%rdi, %rcx), %xmm2
1024 por %xmm3, %xmm2 /* merge into one 16byte value */
1026 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
1028 TOLOWER (%xmm1, %xmm2)
1030 pcmpeqb %xmm1, %xmm0
1031 pcmpeqb %xmm2, %xmm1
1033 pmovmskb %xmm1, %edx
1037 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1039 jbe LABEL(strcmp_exitz)
1046 jg LABEL(nibble_ashr_6) /* cross page boundary */
1048 movdqa (%rsi, %rcx), %xmm1
1049 movdqa (%rdi, %rcx), %xmm2
1055 por %xmm3, %xmm2 /* merge into one 16byte value */
1057 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
1059 TOLOWER (%xmm1, %xmm2)
1061 pcmpeqb %xmm1, %xmm0
1062 pcmpeqb %xmm2, %xmm1
1064 pmovmskb %xmm1, %edx
1068 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1070 jbe LABEL(strcmp_exitz)
1075 jmp LABEL(loop_ashr_6)
1078 LABEL(nibble_ashr_6):
1079 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1080 pmovmskb %xmm0, %edx
1082 jnz LABEL(ashr_6_exittail)
1084 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1086 jbe LABEL(ashr_6_exittail)
1091 jmp LABEL(gobble_ashr_6)
1094 LABEL(ashr_6_exittail):
1095 movdqa (%rsi, %rcx), %xmm1
1098 jmp LABEL(aftertail)
1101 * The following cases will be handled by ashr_7
1102 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1103 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
1108 movdqa (%rdi), %xmm2
1109 movdqa (%rsi), %xmm1
1110 pcmpeqb %xmm1, %xmm0
1112 TOLOWER (%xmm1, %xmm2)
1113 pcmpeqb %xmm1, %xmm2
1115 pmovmskb %xmm2, %r9d
1119 jnz LABEL(less32bytes)
1120 movdqa (%rdi), %xmm3
1122 UPDATE_STRNCMP_COUNTER
1125 mov $16, %rcx /* index for loads */
1126 mov $7, %r9d /* byte position left over from less32bytes case */
1128 * Setup %r10 value allows us to detect crossing a page boundary.
1129 * When %r10 goes positive we have crossed a page boundary and
1130 * need to do a nibble.
1133 and $0xfff, %r10 /* offset into 4K page */
1134 sub $0x1000, %r10 /* subtract 4K pagesize */
1139 jg LABEL(nibble_ashr_7)
1141 LABEL(gobble_ashr_7):
1142 movdqa (%rsi, %rcx), %xmm1
1143 movdqa (%rdi, %rcx), %xmm2
1149 por %xmm3, %xmm2 /* merge into one 16byte value */
1151 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1153 TOLOWER (%xmm1, %xmm2)
1155 pcmpeqb %xmm1, %xmm0
1156 pcmpeqb %xmm2, %xmm1
1158 pmovmskb %xmm1, %edx
1162 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1164 jbe LABEL(strcmp_exitz)
1171 jg LABEL(nibble_ashr_7) /* cross page boundary */
1173 movdqa (%rsi, %rcx), %xmm1
1174 movdqa (%rdi, %rcx), %xmm2
1180 por %xmm3, %xmm2 /* merge into one 16byte value */
1182 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1184 TOLOWER (%xmm1, %xmm2)
1186 pcmpeqb %xmm1, %xmm0
1187 pcmpeqb %xmm2, %xmm1
1189 pmovmskb %xmm1, %edx
1193 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1195 jbe LABEL(strcmp_exitz)
1200 jmp LABEL(loop_ashr_7)
1203 LABEL(nibble_ashr_7):
1204 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1205 pmovmskb %xmm0, %edx
1207 jnz LABEL(ashr_7_exittail)
1209 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1211 jbe LABEL(ashr_7_exittail)
1216 jmp LABEL(gobble_ashr_7)
1219 LABEL(ashr_7_exittail):
1220 movdqa (%rsi, %rcx), %xmm1
1223 jmp LABEL(aftertail)
1226 * The following cases will be handled by ashr_8
1227 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1228 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
1233 movdqa (%rdi), %xmm2
1234 movdqa (%rsi), %xmm1
1235 pcmpeqb %xmm1, %xmm0
1237 TOLOWER (%xmm1, %xmm2)
1238 pcmpeqb %xmm1, %xmm2
1240 pmovmskb %xmm2, %r9d
1244 jnz LABEL(less32bytes)
1245 movdqa (%rdi), %xmm3
1247 UPDATE_STRNCMP_COUNTER
1250 mov $16, %rcx /* index for loads */
1251 mov $8, %r9d /* byte position left over from less32bytes case */
1253 * Setup %r10 value allows us to detect crossing a page boundary.
1254 * When %r10 goes positive we have crossed a page boundary and
1255 * need to do a nibble.
1258 and $0xfff, %r10 /* offset into 4K page */
1259 sub $0x1000, %r10 /* subtract 4K pagesize */
1264 jg LABEL(nibble_ashr_8)
1266 LABEL(gobble_ashr_8):
1267 movdqa (%rsi, %rcx), %xmm1
1268 movdqa (%rdi, %rcx), %xmm2
1274 por %xmm3, %xmm2 /* merge into one 16byte value */
1276 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1278 TOLOWER (%xmm1, %xmm2)
1280 pcmpeqb %xmm1, %xmm0
1281 pcmpeqb %xmm2, %xmm1
1283 pmovmskb %xmm1, %edx
1287 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1289 jbe LABEL(strcmp_exitz)
1296 jg LABEL(nibble_ashr_8) /* cross page boundary */
1298 movdqa (%rsi, %rcx), %xmm1
1299 movdqa (%rdi, %rcx), %xmm2
1305 por %xmm3, %xmm2 /* merge into one 16byte value */
1307 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1309 TOLOWER (%xmm1, %xmm2)
1311 pcmpeqb %xmm1, %xmm0
1312 pcmpeqb %xmm2, %xmm1
1314 pmovmskb %xmm1, %edx
1318 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1320 jbe LABEL(strcmp_exitz)
1325 jmp LABEL(loop_ashr_8)
1328 LABEL(nibble_ashr_8):
1329 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1330 pmovmskb %xmm0, %edx
1332 jnz LABEL(ashr_8_exittail)
1334 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1336 jbe LABEL(ashr_8_exittail)
1341 jmp LABEL(gobble_ashr_8)
1344 LABEL(ashr_8_exittail):
1345 movdqa (%rsi, %rcx), %xmm1
1348 jmp LABEL(aftertail)
1351 * The following cases will be handled by ashr_9
1352 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1353 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
1358 movdqa (%rdi), %xmm2
1359 movdqa (%rsi), %xmm1
1360 pcmpeqb %xmm1, %xmm0
1362 TOLOWER (%xmm1, %xmm2)
1363 pcmpeqb %xmm1, %xmm2
1365 pmovmskb %xmm2, %r9d
1369 jnz LABEL(less32bytes)
1370 movdqa (%rdi), %xmm3
1372 UPDATE_STRNCMP_COUNTER
1375 mov $16, %rcx /* index for loads */
1376 mov $9, %r9d /* byte position left over from less32bytes case */
1378 * Setup %r10 value allows us to detect crossing a page boundary.
1379 * When %r10 goes positive we have crossed a page boundary and
1380 * need to do a nibble.
1383 and $0xfff, %r10 /* offset into 4K page */
1384 sub $0x1000, %r10 /* subtract 4K pagesize */
1389 jg LABEL(nibble_ashr_9)
1391 LABEL(gobble_ashr_9):
1392 movdqa (%rsi, %rcx), %xmm1
1393 movdqa (%rdi, %rcx), %xmm2
1399 por %xmm3, %xmm2 /* merge into one 16byte value */
1401 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1403 TOLOWER (%xmm1, %xmm2)
1405 pcmpeqb %xmm1, %xmm0
1406 pcmpeqb %xmm2, %xmm1
1408 pmovmskb %xmm1, %edx
1412 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1414 jbe LABEL(strcmp_exitz)
1421 jg LABEL(nibble_ashr_9) /* cross page boundary */
1423 movdqa (%rsi, %rcx), %xmm1
1424 movdqa (%rdi, %rcx), %xmm2
1430 por %xmm3, %xmm2 /* merge into one 16byte value */
1432 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1434 TOLOWER (%xmm1, %xmm2)
1436 pcmpeqb %xmm1, %xmm0
1437 pcmpeqb %xmm2, %xmm1
1439 pmovmskb %xmm1, %edx
1443 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1445 jbe LABEL(strcmp_exitz)
1449 movdqa %xmm4, %xmm3 /* store for next cycle */
1450 jmp LABEL(loop_ashr_9)
1453 LABEL(nibble_ashr_9):
1454 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1455 pmovmskb %xmm0, %edx
1457 jnz LABEL(ashr_9_exittail)
1459 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1461 jbe LABEL(ashr_9_exittail)
1466 jmp LABEL(gobble_ashr_9)
1469 LABEL(ashr_9_exittail):
1470 movdqa (%rsi, %rcx), %xmm1
1473 jmp LABEL(aftertail)
1476 * The following cases will be handled by ashr_10
1477 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1478 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
1483 movdqa (%rdi), %xmm2
1484 movdqa (%rsi), %xmm1
1485 pcmpeqb %xmm1, %xmm0
1487 TOLOWER (%xmm1, %xmm2)
1488 pcmpeqb %xmm1, %xmm2
1490 pmovmskb %xmm2, %r9d
1494 jnz LABEL(less32bytes)
1495 movdqa (%rdi), %xmm3
1497 UPDATE_STRNCMP_COUNTER
1500 mov $16, %rcx /* index for loads */
1501 mov $10, %r9d /* byte position left over from less32bytes case */
1503 * Setup %r10 value allows us to detect crossing a page boundary.
1504 * When %r10 goes positive we have crossed a page boundary and
1505 * need to do a nibble.
1508 and $0xfff, %r10 /* offset into 4K page */
1509 sub $0x1000, %r10 /* subtract 4K pagesize */
1512 LABEL(loop_ashr_10):
1514 jg LABEL(nibble_ashr_10)
1516 LABEL(gobble_ashr_10):
1517 movdqa (%rsi, %rcx), %xmm1
1518 movdqa (%rdi, %rcx), %xmm2
1524 por %xmm3, %xmm2 /* merge into one 16byte value */
1526 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1528 TOLOWER (%xmm1, %xmm2)
1530 pcmpeqb %xmm1, %xmm0
1531 pcmpeqb %xmm2, %xmm1
1533 pmovmskb %xmm1, %edx
1537 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1539 jbe LABEL(strcmp_exitz)
1546 jg LABEL(nibble_ashr_10) /* cross page boundary */
1548 movdqa (%rsi, %rcx), %xmm1
1549 movdqa (%rdi, %rcx), %xmm2
1555 por %xmm3, %xmm2 /* merge into one 16byte value */
1557 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1559 TOLOWER (%xmm1, %xmm2)
1561 pcmpeqb %xmm1, %xmm0
1562 pcmpeqb %xmm2, %xmm1
1564 pmovmskb %xmm1, %edx
1568 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1570 jbe LABEL(strcmp_exitz)
1575 jmp LABEL(loop_ashr_10)
1578 LABEL(nibble_ashr_10):
1579 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1580 pmovmskb %xmm0, %edx
1582 jnz LABEL(ashr_10_exittail)
1584 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1586 jbe LABEL(ashr_10_exittail)
1591 jmp LABEL(gobble_ashr_10)
1594 LABEL(ashr_10_exittail):
1595 movdqa (%rsi, %rcx), %xmm1
1598 jmp LABEL(aftertail)
1601 * The following cases will be handled by ashr_11
1602 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1603 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
1608 movdqa (%rdi), %xmm2
1609 movdqa (%rsi), %xmm1
1610 pcmpeqb %xmm1, %xmm0
1612 TOLOWER (%xmm1, %xmm2)
1613 pcmpeqb %xmm1, %xmm2
1615 pmovmskb %xmm2, %r9d
1619 jnz LABEL(less32bytes)
1620 movdqa (%rdi), %xmm3
1622 UPDATE_STRNCMP_COUNTER
1625 mov $16, %rcx /* index for loads */
1626 mov $11, %r9d /* byte position left over from less32bytes case */
1628 * Setup %r10 value allows us to detect crossing a page boundary.
1629 * When %r10 goes positive we have crossed a page boundary and
1630 * need to do a nibble.
1633 and $0xfff, %r10 /* offset into 4K page */
1634 sub $0x1000, %r10 /* subtract 4K pagesize */
1637 LABEL(loop_ashr_11):
1639 jg LABEL(nibble_ashr_11)
1641 LABEL(gobble_ashr_11):
1642 movdqa (%rsi, %rcx), %xmm1
1643 movdqa (%rdi, %rcx), %xmm2
1649 por %xmm3, %xmm2 /* merge into one 16byte value */
1651 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1653 TOLOWER (%xmm1, %xmm2)
1655 pcmpeqb %xmm1, %xmm0
1656 pcmpeqb %xmm2, %xmm1
1658 pmovmskb %xmm1, %edx
1662 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1664 jbe LABEL(strcmp_exitz)
1671 jg LABEL(nibble_ashr_11) /* cross page boundary */
1673 movdqa (%rsi, %rcx), %xmm1
1674 movdqa (%rdi, %rcx), %xmm2
1680 por %xmm3, %xmm2 /* merge into one 16byte value */
1682 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1684 TOLOWER (%xmm1, %xmm2)
1686 pcmpeqb %xmm1, %xmm0
1687 pcmpeqb %xmm2, %xmm1
1689 pmovmskb %xmm1, %edx
1693 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1695 jbe LABEL(strcmp_exitz)
1700 jmp LABEL(loop_ashr_11)
1703 LABEL(nibble_ashr_11):
1704 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1705 pmovmskb %xmm0, %edx
1707 jnz LABEL(ashr_11_exittail)
1709 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1711 jbe LABEL(ashr_11_exittail)
1716 jmp LABEL(gobble_ashr_11)
1719 LABEL(ashr_11_exittail):
1720 movdqa (%rsi, %rcx), %xmm1
1723 jmp LABEL(aftertail)
1726 * The following cases will be handled by ashr_12
1727 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1728 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
1733 movdqa (%rdi), %xmm2
1734 movdqa (%rsi), %xmm1
1735 pcmpeqb %xmm1, %xmm0
1737 TOLOWER (%xmm1, %xmm2)
1738 pcmpeqb %xmm1, %xmm2
1740 pmovmskb %xmm2, %r9d
1744 jnz LABEL(less32bytes)
1745 movdqa (%rdi), %xmm3
1747 UPDATE_STRNCMP_COUNTER
1750 mov $16, %rcx /* index for loads */
1751 mov $12, %r9d /* byte position left over from less32bytes case */
1753 * Setup %r10 value allows us to detect crossing a page boundary.
1754 * When %r10 goes positive we have crossed a page boundary and
1755 * need to do a nibble.
1758 and $0xfff, %r10 /* offset into 4K page */
1759 sub $0x1000, %r10 /* subtract 4K pagesize */
1762 LABEL(loop_ashr_12):
1764 jg LABEL(nibble_ashr_12)
1766 LABEL(gobble_ashr_12):
1767 movdqa (%rsi, %rcx), %xmm1
1768 movdqa (%rdi, %rcx), %xmm2
1774 por %xmm3, %xmm2 /* merge into one 16byte value */
1776 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1778 TOLOWER (%xmm1, %xmm2)
1780 pcmpeqb %xmm1, %xmm0
1781 pcmpeqb %xmm2, %xmm1
1783 pmovmskb %xmm1, %edx
1787 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1789 jbe LABEL(strcmp_exitz)
1796 jg LABEL(nibble_ashr_12) /* cross page boundary */
1798 movdqa (%rsi, %rcx), %xmm1
1799 movdqa (%rdi, %rcx), %xmm2
1805 por %xmm3, %xmm2 /* merge into one 16byte value */
1807 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1809 TOLOWER (%xmm1, %xmm2)
1811 pcmpeqb %xmm1, %xmm0
1812 pcmpeqb %xmm2, %xmm1
1814 pmovmskb %xmm1, %edx
1818 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1820 jbe LABEL(strcmp_exitz)
1825 jmp LABEL(loop_ashr_12)
1828 LABEL(nibble_ashr_12):
1829 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1830 pmovmskb %xmm0, %edx
1832 jnz LABEL(ashr_12_exittail)
1834 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1836 jbe LABEL(ashr_12_exittail)
1841 jmp LABEL(gobble_ashr_12)
1844 LABEL(ashr_12_exittail):
1845 movdqa (%rsi, %rcx), %xmm1
1848 jmp LABEL(aftertail)
1851 * The following cases will be handled by ashr_13
1852 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1853 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
1858 movdqa (%rdi), %xmm2
1859 movdqa (%rsi), %xmm1
1860 pcmpeqb %xmm1, %xmm0
1862 TOLOWER (%xmm1, %xmm2)
1863 pcmpeqb %xmm1, %xmm2
1865 pmovmskb %xmm2, %r9d
1869 jnz LABEL(less32bytes)
1870 movdqa (%rdi), %xmm3
1872 UPDATE_STRNCMP_COUNTER
1875 mov $16, %rcx /* index for loads */
1876 mov $13, %r9d /* byte position left over from less32bytes case */
1878 * Setup %r10 value allows us to detect crossing a page boundary.
1879 * When %r10 goes positive we have crossed a page boundary and
1880 * need to do a nibble.
1883 and $0xfff, %r10 /* offset into 4K page */
1884 sub $0x1000, %r10 /* subtract 4K pagesize */
1887 LABEL(loop_ashr_13):
1889 jg LABEL(nibble_ashr_13)
1891 LABEL(gobble_ashr_13):
1892 movdqa (%rsi, %rcx), %xmm1
1893 movdqa (%rdi, %rcx), %xmm2
1899 por %xmm3, %xmm2 /* merge into one 16byte value */
1901 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1903 TOLOWER (%xmm1, %xmm2)
1905 pcmpeqb %xmm1, %xmm0
1906 pcmpeqb %xmm2, %xmm1
1908 pmovmskb %xmm1, %edx
1912 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1914 jbe LABEL(strcmp_exitz)
1921 jg LABEL(nibble_ashr_13) /* cross page boundary */
1923 movdqa (%rsi, %rcx), %xmm1
1924 movdqa (%rdi, %rcx), %xmm2
1930 por %xmm3, %xmm2 /* merge into one 16byte value */
1932 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1934 TOLOWER (%xmm1, %xmm2)
1936 pcmpeqb %xmm1, %xmm0
1937 pcmpeqb %xmm2, %xmm1
1939 pmovmskb %xmm1, %edx
1943 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1945 jbe LABEL(strcmp_exitz)
1950 jmp LABEL(loop_ashr_13)
1953 LABEL(nibble_ashr_13):
1954 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1955 pmovmskb %xmm0, %edx
1957 jnz LABEL(ashr_13_exittail)
1959 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1961 jbe LABEL(ashr_13_exittail)
1966 jmp LABEL(gobble_ashr_13)
1969 LABEL(ashr_13_exittail):
1970 movdqa (%rsi, %rcx), %xmm1
1973 jmp LABEL(aftertail)
1976 * The following cases will be handled by ashr_14
1977 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1978 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
1983 movdqa (%rdi), %xmm2
1984 movdqa (%rsi), %xmm1
1985 pcmpeqb %xmm1, %xmm0
1987 TOLOWER (%xmm1, %xmm2)
1988 pcmpeqb %xmm1, %xmm2
1990 pmovmskb %xmm2, %r9d
1994 jnz LABEL(less32bytes)
1995 movdqa (%rdi), %xmm3
1997 UPDATE_STRNCMP_COUNTER
2000 mov $16, %rcx /* index for loads */
2001 mov $14, %r9d /* byte position left over from less32bytes case */
2003 * Setup %r10 value allows us to detect crossing a page boundary.
2004 * When %r10 goes positive we have crossed a page boundary and
2005 * need to do a nibble.
2008 and $0xfff, %r10 /* offset into 4K page */
2009 sub $0x1000, %r10 /* subtract 4K pagesize */
2012 LABEL(loop_ashr_14):
2014 jg LABEL(nibble_ashr_14)
2016 LABEL(gobble_ashr_14):
2017 movdqa (%rsi, %rcx), %xmm1
2018 movdqa (%rdi, %rcx), %xmm2
2024 por %xmm3, %xmm2 /* merge into one 16byte value */
2026 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
2028 TOLOWER (%xmm1, %xmm2)
2030 pcmpeqb %xmm1, %xmm0
2031 pcmpeqb %xmm2, %xmm1
2033 pmovmskb %xmm1, %edx
2037 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2039 jbe LABEL(strcmp_exitz)
2046 jg LABEL(nibble_ashr_14) /* cross page boundary */
2048 movdqa (%rsi, %rcx), %xmm1
2049 movdqa (%rdi, %rcx), %xmm2
2055 por %xmm3, %xmm2 /* merge into one 16byte value */
2057 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
2059 TOLOWER (%xmm1, %xmm2)
2061 pcmpeqb %xmm1, %xmm0
2062 pcmpeqb %xmm2, %xmm1
2064 pmovmskb %xmm1, %edx
2068 # if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
2070 jbe LABEL(strcmp_exitz)
2075 jmp LABEL(loop_ashr_14)
2078 LABEL(nibble_ashr_14):
2079 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2080 pmovmskb %xmm0, %edx
2082 jnz LABEL(ashr_14_exittail)
2084 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2086 jbe LABEL(ashr_14_exittail)
2091 jmp LABEL(gobble_ashr_14)
2094 LABEL(ashr_14_exittail):
2095 movdqa (%rsi, %rcx), %xmm1
2098 jmp LABEL(aftertail)
2101 * The following cases will be handled by ashr_15
2102 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2103 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
2108 movdqa (%rdi), %xmm2
2109 movdqa (%rsi), %xmm1
2110 pcmpeqb %xmm1, %xmm0
2112 TOLOWER (%xmm1, %xmm2)
2113 pcmpeqb %xmm1, %xmm2
2115 pmovmskb %xmm2, %r9d
2119 jnz LABEL(less32bytes)
2121 movdqa (%rdi), %xmm3
2123 UPDATE_STRNCMP_COUNTER
2126 mov $16, %rcx /* index for loads */
2127 mov $15, %r9d /* byte position left over from less32bytes case */
2129 * Setup %r10 value allows us to detect crossing a page boundary.
2130 * When %r10 goes positive we have crossed a page boundary and
2131 * need to do a nibble.
2134 and $0xfff, %r10 /* offset into 4K page */
2136 sub $0x1000, %r10 /* subtract 4K pagesize */
2139 LABEL(loop_ashr_15):
2141 jg LABEL(nibble_ashr_15)
2143 LABEL(gobble_ashr_15):
2144 movdqa (%rsi, %rcx), %xmm1
2145 movdqa (%rdi, %rcx), %xmm2
2151 por %xmm3, %xmm2 /* merge into one 16byte value */
2153 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2155 TOLOWER (%xmm1, %xmm2)
2157 pcmpeqb %xmm1, %xmm0
2158 pcmpeqb %xmm2, %xmm1
2160 pmovmskb %xmm1, %edx
2164 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2166 jbe LABEL(strcmp_exitz)
2173 jg LABEL(nibble_ashr_15) /* cross page boundary */
2175 movdqa (%rsi, %rcx), %xmm1
2176 movdqa (%rdi, %rcx), %xmm2
2182 por %xmm3, %xmm2 /* merge into one 16byte value */
2184 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2186 TOLOWER (%xmm1, %xmm2)
2188 pcmpeqb %xmm1, %xmm0
2189 pcmpeqb %xmm2, %xmm1
2191 pmovmskb %xmm1, %edx
2195 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2197 jbe LABEL(strcmp_exitz)
2202 jmp LABEL(loop_ashr_15)
2205 LABEL(nibble_ashr_15):
2206 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2207 pmovmskb %xmm0, %edx
2209 jnz LABEL(ashr_15_exittail)
2211 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2213 jbe LABEL(ashr_15_exittail)
2218 jmp LABEL(gobble_ashr_15)
2221 LABEL(ashr_15_exittail):
2222 movdqa (%rsi, %rcx), %xmm1
2228 TOLOWER (%xmm1, %xmm3)
2229 pcmpeqb %xmm3, %xmm1
2231 pmovmskb %xmm1, %edx
2236 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
2238 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
2239 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
2242 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
2247 bsf %rdx, %rdx /* find and store bit index in %rdx */
2249 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2251 jbe LABEL(strcmp_exitz)
2253 movzbl (%rsi, %rdx), %ecx
2254 movzbl (%rdi, %rdx), %eax
2256 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2257 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2258 movl (%rdx,%rcx,4), %ecx
2259 movl (%rdx,%rax,4), %eax
2265 LABEL(strcmp_exitz):
2274 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2275 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2276 movl (%rdx,%rcx,4), %ecx
2277 movl (%rdx,%rax,4), %eax
2284 .section .rodata,"a",@progbits
2286 LABEL(unaligned_table):
2287 .int LABEL(ashr_1) - LABEL(unaligned_table)
2288 .int LABEL(ashr_2) - LABEL(unaligned_table)
2289 .int LABEL(ashr_3) - LABEL(unaligned_table)
2290 .int LABEL(ashr_4) - LABEL(unaligned_table)
2291 .int LABEL(ashr_5) - LABEL(unaligned_table)
2292 .int LABEL(ashr_6) - LABEL(unaligned_table)
2293 .int LABEL(ashr_7) - LABEL(unaligned_table)
2294 .int LABEL(ashr_8) - LABEL(unaligned_table)
2295 .int LABEL(ashr_9) - LABEL(unaligned_table)
2296 .int LABEL(ashr_10) - LABEL(unaligned_table)
2297 .int LABEL(ashr_11) - LABEL(unaligned_table)
2298 .int LABEL(ashr_12) - LABEL(unaligned_table)
2299 .int LABEL(ashr_13) - LABEL(unaligned_table)
2300 .int LABEL(ashr_14) - LABEL(unaligned_table)
2301 .int LABEL(ashr_15) - LABEL(unaligned_table)
2302 .int LABEL(ashr_0) - LABEL(unaligned_table)
2303 #endif /* !IS_IN (libc) */
2304 libc_hidden_builtin_def (STRCMP)