1 /* Highly optimized version for x86-64.
2 Copyright (C) 1999, 2000, 2002, 2003, 2005, 2009, 2010
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Based on i686 version contributed by Ulrich Drepper
6 <drepper@cygnus.com>, 1999.
7 Updated with SSE2 support contributed by Intel Corporation.
9 The GNU C Library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
14 The GNU C Library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
19 You should have received a copy of the GNU Lesser General Public
20 License along with the GNU C Library; if not, write to the Free
21 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 #include "asm-syntax.h"
29 #undef UPDATE_STRNCMP_COUNTER
36 /* The simplified code below is not set up to handle strncmp() so far.
37 Should this become necessary it has to be implemented. For now
38 just report the problem. */
40 # error "strncmp not implemented so far"
43 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
44 if the new counter > the old one or is 0. */
45 # define UPDATE_STRNCMP_COUNTER \
46 /* calculate left number to compare */ \
47 lea -16(%rcx, %r11), %r9; \
49 jb LABEL(strcmp_exitz); \
51 je LABEL(strcmp_exitz); \
54 #elif defined USE_AS_STRCASECMP_L
55 # include "locale-defines.h"
57 /* No support for strcasecmp outside libc so far since it is not needed. */
59 # error "strcasecmp_l not implemented so far"
62 # define UPDATE_STRNCMP_COUNTER
64 # define UPDATE_STRNCMP_COUNTER
66 # define STRCMP strcmp
73 .section .text.ssse3,"ax",@progbits
76 #ifdef USE_AS_STRCASECMP_L
78 # define ENTRY2(name) ENTRY (name)
79 # define END2(name) END (name)
83 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
86 // XXX 5 byte should be before the function
88 .byte 0x0f,0x1f,0x44,0x00,0x00
90 # ifndef NO_NOLOCALE_ALIAS
91 weak_alias (__strcasecmp, strcasecmp)
92 libc_hidden_def (__strcasecmp)
94 /* FALLTHROUGH to strcasecmp_l. */
97 ENTRY (BP_SYM (STRCMP))
99 /* Simple version since we can't use SSE registers in ld.so. */
100 L(oop): movb (%rdi), %al
111 L(neq): movl $1, %eax
115 END (BP_SYM (STRCMP))
116 #else /* NOT_IN_libc */
117 # ifdef USE_AS_STRCASECMP_L
118 /* We have to fall back on the C implementation for locales
119 with encodings not matching ASCII for single bytes. */
120 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
121 movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
125 testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
126 jne __strcasecmp_l_nonascii
130 * This implementation uses SSE to compare up to 16 bytes at a time.
132 # ifdef USE_AS_STRNCMP
134 je LABEL(strcmp_exitz)
141 /* Use 64bit AND here to avoid long NOP padding. */
142 and $0x3f, %rcx /* rsi alignment in cache line */
143 and $0x3f, %rax /* rdi alignment in cache line */
144 # ifdef USE_AS_STRCASECMP_L
145 .section .rodata.cst16,"aM",@progbits,16
148 .quad 0x4040404040404040
149 .quad 0x4040404040404040
151 .quad 0x5b5b5b5b5b5b5b5b
152 .quad 0x5b5b5b5b5b5b5b5b
154 .quad 0x2020202020202020
155 .quad 0x2020202020202020
157 movdqa .Lbelowupper(%rip), %xmm5
158 # define UCLOW_reg %xmm5
159 movdqa .Ltopupper(%rip), %xmm6
160 # define UCHIGH_reg %xmm6
161 movdqa .Ltouppermask(%rip), %xmm7
162 # define LCQWORD_reg %xmm7
165 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
167 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
170 movhpd 8(%rdi), %xmm1
171 movhpd 8(%rsi), %xmm2
172 # ifdef USE_AS_STRCASECMP_L
173 # define TOLOWER(reg1, reg2) \
174 movdqa reg1, %xmm8; \
175 movdqa UCHIGH_reg, %xmm9; \
176 movdqa reg2, %xmm10; \
177 movdqa UCHIGH_reg, %xmm11; \
178 pcmpgtb UCLOW_reg, %xmm8; \
179 pcmpgtb reg1, %xmm9; \
180 pcmpgtb UCLOW_reg, %xmm10; \
181 pcmpgtb reg2, %xmm11; \
183 pand %xmm11, %xmm10; \
184 pand LCQWORD_reg, %xmm8; \
185 pand LCQWORD_reg, %xmm10; \
188 TOLOWER (%xmm1, %xmm2)
190 # define TOLOWER(reg1, reg2)
192 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
193 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
194 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
195 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
197 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
198 jnz LABEL(less16bytes) /* If not, find different value or null char */
199 # ifdef USE_AS_STRNCMP
201 jbe LABEL(strcmp_exitz) /* finish comparision */
203 add $16, %rsi /* prepare to search next 16 bytes */
204 add $16, %rdi /* prepare to search next 16 bytes */
207 * Determine source and destination string offsets from 16-byte alignment.
208 * Use relative offset difference between the two to determine which case
213 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
214 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
215 mov $0xffff, %edx /* for equivalent offset */
217 and $0xf, %ecx /* offset of rsi */
218 and $0xf, %eax /* offset of rdi */
220 je LABEL(ashr_0) /* rsi and rdi relative offset same */
222 mov %edx, %r8d /* r8d is offset flag for exit tail */
228 lea LABEL(unaligned_table)(%rip), %r10
229 movslq (%r10, %r9,4), %r9
230 lea (%r10, %r9), %r10
231 jmp *%r10 /* jump to corresponding case */
234 * The following cases will be handled by ashr_0
235 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
236 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
242 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
243 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
244 # ifndef USE_AS_STRCASECMP_L
245 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
248 TOLOWER (%xmm1, %xmm2)
249 pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
251 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
253 shr %cl, %edx /* adjust 0xffff for offset */
254 shr %cl, %r9d /* adjust for 16-byte offset */
257 * edx must be the same with r9d if in left byte (16-rcx) is equal to
258 * the start from (16-rax) and no null char was seen.
260 jne LABEL(less32bytes) /* mismatch or null char */
261 UPDATE_STRNCMP_COUNTER
264 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
267 * Now both strings are aligned at 16-byte boundary. Loop over strings
268 * checking 32-bytes per iteration.
272 movdqa (%rsi, %rcx), %xmm1
273 movdqa (%rdi, %rcx), %xmm2
274 TOLOWER (%xmm1, %xmm2)
281 jnz LABEL(exit) /* mismatch or null char seen */
283 # ifdef USE_AS_STRNCMP
285 jbe LABEL(strcmp_exitz)
288 movdqa (%rsi, %rcx), %xmm1
289 movdqa (%rdi, %rcx), %xmm2
290 TOLOWER (%xmm1, %xmm2)
298 # ifdef USE_AS_STRNCMP
300 jbe LABEL(strcmp_exitz)
303 jmp LABEL(loop_ashr_0)
306 * The following cases will be handled by ashr_1
307 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
308 * n(15) n -15 0(15 +(n-15) - n) ashr_1
315 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
316 pslldq $15, %xmm2 /* shift first string to align with second */
317 TOLOWER (%xmm1, %xmm2)
318 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
319 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
321 shr %cl, %edx /* adjust 0xffff for offset */
322 shr %cl, %r9d /* adjust for 16-byte offset */
324 jnz LABEL(less32bytes) /* mismatch or null char seen */
326 UPDATE_STRNCMP_COUNTER
329 mov $16, %rcx /* index for loads*/
330 mov $1, %r9d /* byte position left over from less32bytes case */
332 * Setup %r10 value allows us to detect crossing a page boundary.
333 * When %r10 goes positive we have crossed a page boundary and
334 * need to do a nibble.
337 and $0xfff, %r10 /* offset into 4K page */
338 sub $0x1000, %r10 /* subtract 4K pagesize */
343 jg LABEL(nibble_ashr_1) /* cross page boundary */
345 LABEL(gobble_ashr_1):
346 movdqa (%rsi, %rcx), %xmm1
347 movdqa (%rdi, %rcx), %xmm2
348 movdqa %xmm2, %xmm4 /* store for next cycle */
353 por %xmm3, %xmm2 /* merge into one 16byte value */
355 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
357 TOLOWER (%xmm1, %xmm2)
366 # ifdef USE_AS_STRNCMP
368 jbe LABEL(strcmp_exitz)
374 jg LABEL(nibble_ashr_1) /* cross page boundary */
376 movdqa (%rsi, %rcx), %xmm1
377 movdqa (%rdi, %rcx), %xmm2
378 movdqa %xmm2, %xmm4 /* store for next cycle */
383 por %xmm3, %xmm2 /* merge into one 16byte value */
385 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
387 TOLOWER (%xmm1, %xmm2)
396 # ifdef USE_AS_STRNCMP
398 jbe LABEL(strcmp_exitz)
402 jmp LABEL(loop_ashr_1)
405 * Nibble avoids loads across page boundary. This is to avoid a potential
406 * access into unmapped memory.
409 LABEL(nibble_ashr_1):
410 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
413 jnz LABEL(ashr_1_exittail) /* find null char*/
415 # ifdef USE_AS_STRNCMP
417 jbe LABEL(ashr_1_exittail)
421 sub $0x1000, %r10 /* substract 4K from %r10 */
422 jmp LABEL(gobble_ashr_1)
425 * Once find null char, determine if there is a string mismatch
426 * before the null char.
429 LABEL(ashr_1_exittail):
430 movdqa (%rsi, %rcx), %xmm1
436 * The following cases will be handled by ashr_2
437 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
438 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
447 TOLOWER (%xmm1, %xmm2)
454 jnz LABEL(less32bytes)
456 UPDATE_STRNCMP_COUNTER
459 mov $16, %rcx /* index for loads */
460 mov $2, %r9d /* byte position left over from less32bytes case */
462 * Setup %r10 value allows us to detect crossing a page boundary.
463 * When %r10 goes positive we have crossed a page boundary and
464 * need to do a nibble.
467 and $0xfff, %r10 /* offset into 4K page */
468 sub $0x1000, %r10 /* subtract 4K pagesize */
473 jg LABEL(nibble_ashr_2)
475 LABEL(gobble_ashr_2):
476 movdqa (%rsi, %rcx), %xmm1
477 movdqa (%rdi, %rcx), %xmm2
483 por %xmm3, %xmm2 /* merge into one 16byte value */
485 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
487 TOLOWER (%xmm1, %xmm2)
496 # ifdef USE_AS_STRNCMP
498 jbe LABEL(strcmp_exitz)
505 jg LABEL(nibble_ashr_2) /* cross page boundary */
507 movdqa (%rsi, %rcx), %xmm1
508 movdqa (%rdi, %rcx), %xmm2
514 por %xmm3, %xmm2 /* merge into one 16byte value */
516 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
518 TOLOWER (%xmm1, %xmm2)
527 # ifdef USE_AS_STRNCMP
529 jbe LABEL(strcmp_exitz)
534 jmp LABEL(loop_ashr_2)
537 LABEL(nibble_ashr_2):
538 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
541 jnz LABEL(ashr_2_exittail)
543 # ifdef USE_AS_STRNCMP
545 jbe LABEL(ashr_2_exittail)
550 jmp LABEL(gobble_ashr_2)
553 LABEL(ashr_2_exittail):
554 movdqa (%rsi, %rcx), %xmm1
560 * The following cases will be handled by ashr_3
561 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
562 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
571 TOLOWER (%xmm1, %xmm2)
578 jnz LABEL(less32bytes)
581 UPDATE_STRNCMP_COUNTER
584 mov $16, %rcx /* index for loads */
585 mov $3, %r9d /* byte position left over from less32bytes case */
587 * Setup %r10 value allows us to detect crossing a page boundary.
588 * When %r10 goes positive we have crossed a page boundary and
589 * need to do a nibble.
592 and $0xfff, %r10 /* offset into 4K page */
593 sub $0x1000, %r10 /* subtract 4K pagesize */
598 jg LABEL(nibble_ashr_3)
600 LABEL(gobble_ashr_3):
601 movdqa (%rsi, %rcx), %xmm1
602 movdqa (%rdi, %rcx), %xmm2
608 por %xmm3, %xmm2 /* merge into one 16byte value */
610 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
612 TOLOWER (%xmm1, %xmm2)
621 # ifdef USE_AS_STRNCMP
623 jbe LABEL(strcmp_exitz)
630 jg LABEL(nibble_ashr_3) /* cross page boundary */
632 movdqa (%rsi, %rcx), %xmm1
633 movdqa (%rdi, %rcx), %xmm2
639 por %xmm3, %xmm2 /* merge into one 16byte value */
641 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
643 TOLOWER (%xmm1, %xmm2)
652 # ifdef USE_AS_STRNCMP
654 jbe LABEL(strcmp_exitz)
659 jmp LABEL(loop_ashr_3)
662 LABEL(nibble_ashr_3):
663 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
666 jnz LABEL(ashr_3_exittail)
668 # ifdef USE_AS_STRNCMP
670 jbe LABEL(ashr_3_exittail)
675 jmp LABEL(gobble_ashr_3)
678 LABEL(ashr_3_exittail):
679 movdqa (%rsi, %rcx), %xmm1
685 * The following cases will be handled by ashr_4
686 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
687 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
696 TOLOWER (%xmm1, %xmm2)
703 jnz LABEL(less32bytes)
706 UPDATE_STRNCMP_COUNTER
709 mov $16, %rcx /* index for loads */
710 mov $4, %r9d /* byte position left over from less32bytes case */
712 * Setup %r10 value allows us to detect crossing a page boundary.
713 * When %r10 goes positive we have crossed a page boundary and
714 * need to do a nibble.
717 and $0xfff, %r10 /* offset into 4K page */
718 sub $0x1000, %r10 /* subtract 4K pagesize */
723 jg LABEL(nibble_ashr_4)
725 LABEL(gobble_ashr_4):
726 movdqa (%rsi, %rcx), %xmm1
727 movdqa (%rdi, %rcx), %xmm2
733 por %xmm3, %xmm2 /* merge into one 16byte value */
735 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
737 TOLOWER (%xmm1, %xmm2)
746 # ifdef USE_AS_STRNCMP
748 jbe LABEL(strcmp_exitz)
755 jg LABEL(nibble_ashr_4) /* cross page boundary */
757 movdqa (%rsi, %rcx), %xmm1
758 movdqa (%rdi, %rcx), %xmm2
764 por %xmm3, %xmm2 /* merge into one 16byte value */
766 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
768 TOLOWER (%xmm1, %xmm2)
777 # ifdef USE_AS_STRNCMP
779 jbe LABEL(strcmp_exitz)
784 jmp LABEL(loop_ashr_4)
787 LABEL(nibble_ashr_4):
788 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
791 jnz LABEL(ashr_4_exittail)
793 # ifdef USE_AS_STRNCMP
795 jbe LABEL(ashr_4_exittail)
800 jmp LABEL(gobble_ashr_4)
803 LABEL(ashr_4_exittail):
804 movdqa (%rsi, %rcx), %xmm1
810 * The following cases will be handled by ashr_5
811 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
812 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
821 TOLOWER (%xmm1, %xmm2)
828 jnz LABEL(less32bytes)
831 UPDATE_STRNCMP_COUNTER
834 mov $16, %rcx /* index for loads */
835 mov $5, %r9d /* byte position left over from less32bytes case */
837 * Setup %r10 value allows us to detect crossing a page boundary.
838 * When %r10 goes positive we have crossed a page boundary and
839 * need to do a nibble.
842 and $0xfff, %r10 /* offset into 4K page */
843 sub $0x1000, %r10 /* subtract 4K pagesize */
848 jg LABEL(nibble_ashr_5)
850 LABEL(gobble_ashr_5):
851 movdqa (%rsi, %rcx), %xmm1
852 movdqa (%rdi, %rcx), %xmm2
858 por %xmm3, %xmm2 /* merge into one 16byte value */
860 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
862 TOLOWER (%xmm1, %xmm2)
871 # ifdef USE_AS_STRNCMP
873 jbe LABEL(strcmp_exitz)
880 jg LABEL(nibble_ashr_5) /* cross page boundary */
882 movdqa (%rsi, %rcx), %xmm1
883 movdqa (%rdi, %rcx), %xmm2
889 por %xmm3, %xmm2 /* merge into one 16byte value */
891 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
893 TOLOWER (%xmm1, %xmm2)
902 # ifdef USE_AS_STRNCMP
904 jbe LABEL(strcmp_exitz)
909 jmp LABEL(loop_ashr_5)
912 LABEL(nibble_ashr_5):
913 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
916 jnz LABEL(ashr_5_exittail)
918 # ifdef USE_AS_STRNCMP
920 jbe LABEL(ashr_5_exittail)
925 jmp LABEL(gobble_ashr_5)
928 LABEL(ashr_5_exittail):
929 movdqa (%rsi, %rcx), %xmm1
935 * The following cases will be handled by ashr_6
936 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
937 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
946 TOLOWER (%xmm1, %xmm2)
953 jnz LABEL(less32bytes)
956 UPDATE_STRNCMP_COUNTER
959 mov $16, %rcx /* index for loads */
960 mov $6, %r9d /* byte position left over from less32bytes case */
962 * Setup %r10 value allows us to detect crossing a page boundary.
963 * When %r10 goes positive we have crossed a page boundary and
964 * need to do a nibble.
967 and $0xfff, %r10 /* offset into 4K page */
968 sub $0x1000, %r10 /* subtract 4K pagesize */
973 jg LABEL(nibble_ashr_6)
975 LABEL(gobble_ashr_6):
976 movdqa (%rsi, %rcx), %xmm1
977 movdqa (%rdi, %rcx), %xmm2
983 por %xmm3, %xmm2 /* merge into one 16byte value */
985 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
987 TOLOWER (%xmm1, %xmm2)
996 # ifdef USE_AS_STRNCMP
998 jbe LABEL(strcmp_exitz)
1005 jg LABEL(nibble_ashr_6) /* cross page boundary */
1007 movdqa (%rsi, %rcx), %xmm1
1008 movdqa (%rdi, %rcx), %xmm2
1014 por %xmm3, %xmm2 /* merge into one 16byte value */
1016 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
1018 TOLOWER (%xmm1, %xmm2)
1020 pcmpeqb %xmm1, %xmm0
1021 pcmpeqb %xmm2, %xmm1
1023 pmovmskb %xmm1, %edx
1027 # ifdef USE_AS_STRNCMP
1029 jbe LABEL(strcmp_exitz)
1034 jmp LABEL(loop_ashr_6)
1037 LABEL(nibble_ashr_6):
1038 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1039 pmovmskb %xmm0, %edx
1041 jnz LABEL(ashr_6_exittail)
1043 # ifdef USE_AS_STRNCMP
1045 jbe LABEL(ashr_6_exittail)
1050 jmp LABEL(gobble_ashr_6)
1053 LABEL(ashr_6_exittail):
1054 movdqa (%rsi, %rcx), %xmm1
1057 jmp LABEL(aftertail)
1060 * The following cases will be handled by ashr_7
1061 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1062 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
1067 movdqa (%rdi), %xmm2
1068 movdqa (%rsi), %xmm1
1069 pcmpeqb %xmm1, %xmm0
1071 TOLOWER (%xmm1, %xmm2)
1072 pcmpeqb %xmm1, %xmm2
1074 pmovmskb %xmm2, %r9d
1078 jnz LABEL(less32bytes)
1079 movdqa (%rdi), %xmm3
1081 UPDATE_STRNCMP_COUNTER
1084 mov $16, %rcx /* index for loads */
1085 mov $7, %r9d /* byte position left over from less32bytes case */
1087 * Setup %r10 value allows us to detect crossing a page boundary.
1088 * When %r10 goes positive we have crossed a page boundary and
1089 * need to do a nibble.
1092 and $0xfff, %r10 /* offset into 4K page */
1093 sub $0x1000, %r10 /* subtract 4K pagesize */
1098 jg LABEL(nibble_ashr_7)
1100 LABEL(gobble_ashr_7):
1101 movdqa (%rsi, %rcx), %xmm1
1102 movdqa (%rdi, %rcx), %xmm2
1108 por %xmm3, %xmm2 /* merge into one 16byte value */
1110 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1112 TOLOWER (%xmm1, %xmm2)
1114 pcmpeqb %xmm1, %xmm0
1115 pcmpeqb %xmm2, %xmm1
1117 pmovmskb %xmm1, %edx
1121 # ifdef USE_AS_STRNCMP
1123 jbe LABEL(strcmp_exitz)
1130 jg LABEL(nibble_ashr_7) /* cross page boundary */
1132 movdqa (%rsi, %rcx), %xmm1
1133 movdqa (%rdi, %rcx), %xmm2
1139 por %xmm3, %xmm2 /* merge into one 16byte value */
1141 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1143 TOLOWER (%xmm1, %xmm2)
1145 pcmpeqb %xmm1, %xmm0
1146 pcmpeqb %xmm2, %xmm1
1148 pmovmskb %xmm1, %edx
1152 # ifdef USE_AS_STRNCMP
1154 jbe LABEL(strcmp_exitz)
1159 jmp LABEL(loop_ashr_7)
1162 LABEL(nibble_ashr_7):
1163 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1164 pmovmskb %xmm0, %edx
1166 jnz LABEL(ashr_7_exittail)
1168 # ifdef USE_AS_STRNCMP
1170 jbe LABEL(ashr_7_exittail)
1175 jmp LABEL(gobble_ashr_7)
1178 LABEL(ashr_7_exittail):
1179 movdqa (%rsi, %rcx), %xmm1
1182 jmp LABEL(aftertail)
1185 * The following cases will be handled by ashr_8
1186 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1187 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
1192 movdqa (%rdi), %xmm2
1193 movdqa (%rsi), %xmm1
1194 pcmpeqb %xmm1, %xmm0
1196 TOLOWER (%xmm1, %xmm2)
1197 pcmpeqb %xmm1, %xmm2
1199 pmovmskb %xmm2, %r9d
1203 jnz LABEL(less32bytes)
1204 movdqa (%rdi), %xmm3
1206 UPDATE_STRNCMP_COUNTER
1209 mov $16, %rcx /* index for loads */
1210 mov $8, %r9d /* byte position left over from less32bytes case */
1212 * Setup %r10 value allows us to detect crossing a page boundary.
1213 * When %r10 goes positive we have crossed a page boundary and
1214 * need to do a nibble.
1217 and $0xfff, %r10 /* offset into 4K page */
1218 sub $0x1000, %r10 /* subtract 4K pagesize */
1223 jg LABEL(nibble_ashr_8)
1225 LABEL(gobble_ashr_8):
1226 movdqa (%rsi, %rcx), %xmm1
1227 movdqa (%rdi, %rcx), %xmm2
1233 por %xmm3, %xmm2 /* merge into one 16byte value */
1235 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1237 TOLOWER (%xmm1, %xmm2)
1239 pcmpeqb %xmm1, %xmm0
1240 pcmpeqb %xmm2, %xmm1
1242 pmovmskb %xmm1, %edx
1246 # ifdef USE_AS_STRNCMP
1248 jbe LABEL(strcmp_exitz)
1255 jg LABEL(nibble_ashr_8) /* cross page boundary */
1257 movdqa (%rsi, %rcx), %xmm1
1258 movdqa (%rdi, %rcx), %xmm2
1264 por %xmm3, %xmm2 /* merge into one 16byte value */
1266 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1268 TOLOWER (%xmm1, %xmm2)
1270 pcmpeqb %xmm1, %xmm0
1271 pcmpeqb %xmm2, %xmm1
1273 pmovmskb %xmm1, %edx
1277 # ifdef USE_AS_STRNCMP
1279 jbe LABEL(strcmp_exitz)
1284 jmp LABEL(loop_ashr_8)
1287 LABEL(nibble_ashr_8):
1288 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1289 pmovmskb %xmm0, %edx
1291 jnz LABEL(ashr_8_exittail)
1293 # ifdef USE_AS_STRNCMP
1295 jbe LABEL(ashr_8_exittail)
1300 jmp LABEL(gobble_ashr_8)
1303 LABEL(ashr_8_exittail):
1304 movdqa (%rsi, %rcx), %xmm1
1307 jmp LABEL(aftertail)
1310 * The following cases will be handled by ashr_9
1311 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1312 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
1317 movdqa (%rdi), %xmm2
1318 movdqa (%rsi), %xmm1
1319 pcmpeqb %xmm1, %xmm0
1321 TOLOWER (%xmm1, %xmm2)
1322 pcmpeqb %xmm1, %xmm2
1324 pmovmskb %xmm2, %r9d
1328 jnz LABEL(less32bytes)
1329 movdqa (%rdi), %xmm3
1331 UPDATE_STRNCMP_COUNTER
1334 mov $16, %rcx /* index for loads */
1335 mov $9, %r9d /* byte position left over from less32bytes case */
1337 * Setup %r10 value allows us to detect crossing a page boundary.
1338 * When %r10 goes positive we have crossed a page boundary and
1339 * need to do a nibble.
1342 and $0xfff, %r10 /* offset into 4K page */
1343 sub $0x1000, %r10 /* subtract 4K pagesize */
1348 jg LABEL(nibble_ashr_9)
1350 LABEL(gobble_ashr_9):
1351 movdqa (%rsi, %rcx), %xmm1
1352 movdqa (%rdi, %rcx), %xmm2
1358 por %xmm3, %xmm2 /* merge into one 16byte value */
1360 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1362 TOLOWER (%xmm1, %xmm2)
1364 pcmpeqb %xmm1, %xmm0
1365 pcmpeqb %xmm2, %xmm1
1367 pmovmskb %xmm1, %edx
1371 # ifdef USE_AS_STRNCMP
1373 jbe LABEL(strcmp_exitz)
1380 jg LABEL(nibble_ashr_9) /* cross page boundary */
1382 movdqa (%rsi, %rcx), %xmm1
1383 movdqa (%rdi, %rcx), %xmm2
1389 por %xmm3, %xmm2 /* merge into one 16byte value */
1391 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1393 TOLOWER (%xmm1, %xmm2)
1395 pcmpeqb %xmm1, %xmm0
1396 pcmpeqb %xmm2, %xmm1
1398 pmovmskb %xmm1, %edx
1402 # ifdef USE_AS_STRNCMP
1404 jbe LABEL(strcmp_exitz)
1408 movdqa %xmm4, %xmm3 /* store for next cycle */
1409 jmp LABEL(loop_ashr_9)
1412 LABEL(nibble_ashr_9):
1413 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1414 pmovmskb %xmm0, %edx
1416 jnz LABEL(ashr_9_exittail)
1418 # ifdef USE_AS_STRNCMP
1420 jbe LABEL(ashr_9_exittail)
1425 jmp LABEL(gobble_ashr_9)
1428 LABEL(ashr_9_exittail):
1429 movdqa (%rsi, %rcx), %xmm1
1432 jmp LABEL(aftertail)
1435 * The following cases will be handled by ashr_10
1436 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1437 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
1442 movdqa (%rdi), %xmm2
1443 movdqa (%rsi), %xmm1
1444 pcmpeqb %xmm1, %xmm0
1446 TOLOWER (%xmm1, %xmm2)
1447 pcmpeqb %xmm1, %xmm2
1449 pmovmskb %xmm2, %r9d
1453 jnz LABEL(less32bytes)
1454 movdqa (%rdi), %xmm3
1456 UPDATE_STRNCMP_COUNTER
1459 mov $16, %rcx /* index for loads */
1460 mov $10, %r9d /* byte position left over from less32bytes case */
1462 * Setup %r10 value allows us to detect crossing a page boundary.
1463 * When %r10 goes positive we have crossed a page boundary and
1464 * need to do a nibble.
1467 and $0xfff, %r10 /* offset into 4K page */
1468 sub $0x1000, %r10 /* subtract 4K pagesize */
1471 LABEL(loop_ashr_10):
1473 jg LABEL(nibble_ashr_10)
1475 LABEL(gobble_ashr_10):
1476 movdqa (%rsi, %rcx), %xmm1
1477 movdqa (%rdi, %rcx), %xmm2
1483 por %xmm3, %xmm2 /* merge into one 16byte value */
1485 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1487 TOLOWER (%xmm1, %xmm2)
1489 pcmpeqb %xmm1, %xmm0
1490 pcmpeqb %xmm2, %xmm1
1492 pmovmskb %xmm1, %edx
1496 # ifdef USE_AS_STRNCMP
1498 jbe LABEL(strcmp_exitz)
1505 jg LABEL(nibble_ashr_10) /* cross page boundary */
1507 movdqa (%rsi, %rcx), %xmm1
1508 movdqa (%rdi, %rcx), %xmm2
1514 por %xmm3, %xmm2 /* merge into one 16byte value */
1516 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1518 TOLOWER (%xmm1, %xmm2)
1520 pcmpeqb %xmm1, %xmm0
1521 pcmpeqb %xmm2, %xmm1
1523 pmovmskb %xmm1, %edx
1527 # ifdef USE_AS_STRNCMP
1529 jbe LABEL(strcmp_exitz)
1534 jmp LABEL(loop_ashr_10)
1537 LABEL(nibble_ashr_10):
1538 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1539 pmovmskb %xmm0, %edx
1541 jnz LABEL(ashr_10_exittail)
1543 # ifdef USE_AS_STRNCMP
1545 jbe LABEL(ashr_10_exittail)
1550 jmp LABEL(gobble_ashr_10)
1553 LABEL(ashr_10_exittail):
1554 movdqa (%rsi, %rcx), %xmm1
1557 jmp LABEL(aftertail)
1560 * The following cases will be handled by ashr_11
1561 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1562 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
1567 movdqa (%rdi), %xmm2
1568 movdqa (%rsi), %xmm1
1569 pcmpeqb %xmm1, %xmm0
1571 TOLOWER (%xmm1, %xmm2)
1572 pcmpeqb %xmm1, %xmm2
1574 pmovmskb %xmm2, %r9d
1578 jnz LABEL(less32bytes)
1579 movdqa (%rdi), %xmm3
1581 UPDATE_STRNCMP_COUNTER
1584 mov $16, %rcx /* index for loads */
1585 mov $11, %r9d /* byte position left over from less32bytes case */
1587 * Setup %r10 value allows us to detect crossing a page boundary.
1588 * When %r10 goes positive we have crossed a page boundary and
1589 * need to do a nibble.
1592 and $0xfff, %r10 /* offset into 4K page */
1593 sub $0x1000, %r10 /* subtract 4K pagesize */
1596 LABEL(loop_ashr_11):
1598 jg LABEL(nibble_ashr_11)
1600 LABEL(gobble_ashr_11):
1601 movdqa (%rsi, %rcx), %xmm1
1602 movdqa (%rdi, %rcx), %xmm2
1608 por %xmm3, %xmm2 /* merge into one 16byte value */
1610 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1612 TOLOWER (%xmm1, %xmm2)
1614 pcmpeqb %xmm1, %xmm0
1615 pcmpeqb %xmm2, %xmm1
1617 pmovmskb %xmm1, %edx
1621 # ifdef USE_AS_STRNCMP
1623 jbe LABEL(strcmp_exitz)
1630 jg LABEL(nibble_ashr_11) /* cross page boundary */
1632 movdqa (%rsi, %rcx), %xmm1
1633 movdqa (%rdi, %rcx), %xmm2
1639 por %xmm3, %xmm2 /* merge into one 16byte value */
1641 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1643 TOLOWER (%xmm1, %xmm2)
1645 pcmpeqb %xmm1, %xmm0
1646 pcmpeqb %xmm2, %xmm1
1648 pmovmskb %xmm1, %edx
1652 # ifdef USE_AS_STRNCMP
1654 jbe LABEL(strcmp_exitz)
1659 jmp LABEL(loop_ashr_11)
1662 LABEL(nibble_ashr_11):
1663 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1664 pmovmskb %xmm0, %edx
1666 jnz LABEL(ashr_11_exittail)
1668 # ifdef USE_AS_STRNCMP
1670 jbe LABEL(ashr_11_exittail)
1675 jmp LABEL(gobble_ashr_11)
1678 LABEL(ashr_11_exittail):
1679 movdqa (%rsi, %rcx), %xmm1
1682 jmp LABEL(aftertail)
1685 * The following cases will be handled by ashr_12
1686 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1687 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
1692 movdqa (%rdi), %xmm2
1693 movdqa (%rsi), %xmm1
1694 pcmpeqb %xmm1, %xmm0
1696 TOLOWER (%xmm1, %xmm2)
1697 pcmpeqb %xmm1, %xmm2
1699 pmovmskb %xmm2, %r9d
1703 jnz LABEL(less32bytes)
1704 movdqa (%rdi), %xmm3
1706 UPDATE_STRNCMP_COUNTER
1709 mov $16, %rcx /* index for loads */
1710 mov $12, %r9d /* byte position left over from less32bytes case */
1712 * Setup %r10 value allows us to detect crossing a page boundary.
1713 * When %r10 goes positive we have crossed a page boundary and
1714 * need to do a nibble.
1717 and $0xfff, %r10 /* offset into 4K page */
1718 sub $0x1000, %r10 /* subtract 4K pagesize */
1721 LABEL(loop_ashr_12):
1723 jg LABEL(nibble_ashr_12)
1725 LABEL(gobble_ashr_12):
1726 movdqa (%rsi, %rcx), %xmm1
1727 movdqa (%rdi, %rcx), %xmm2
1733 por %xmm3, %xmm2 /* merge into one 16byte value */
1735 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1737 TOLOWER (%xmm1, %xmm2)
1739 pcmpeqb %xmm1, %xmm0
1740 pcmpeqb %xmm2, %xmm1
1742 pmovmskb %xmm1, %edx
1746 # ifdef USE_AS_STRNCMP
1748 jbe LABEL(strcmp_exitz)
1755 jg LABEL(nibble_ashr_12) /* cross page boundary */
1757 movdqa (%rsi, %rcx), %xmm1
1758 movdqa (%rdi, %rcx), %xmm2
1764 por %xmm3, %xmm2 /* merge into one 16byte value */
1766 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1768 TOLOWER (%xmm1, %xmm2)
1770 pcmpeqb %xmm1, %xmm0
1771 pcmpeqb %xmm2, %xmm1
1773 pmovmskb %xmm1, %edx
1777 # ifdef USE_AS_STRNCMP
1779 jbe LABEL(strcmp_exitz)
1784 jmp LABEL(loop_ashr_12)
1787 LABEL(nibble_ashr_12):
1788 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1789 pmovmskb %xmm0, %edx
1791 jnz LABEL(ashr_12_exittail)
1793 # ifdef USE_AS_STRNCMP
1795 jbe LABEL(ashr_12_exittail)
1800 jmp LABEL(gobble_ashr_12)
1803 LABEL(ashr_12_exittail):
1804 movdqa (%rsi, %rcx), %xmm1
1807 jmp LABEL(aftertail)
1810 * The following cases will be handled by ashr_13
1811 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1812 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
1817 movdqa (%rdi), %xmm2
1818 movdqa (%rsi), %xmm1
1819 pcmpeqb %xmm1, %xmm0
1821 TOLOWER (%xmm1, %xmm2)
1822 pcmpeqb %xmm1, %xmm2
1824 pmovmskb %xmm2, %r9d
1828 jnz LABEL(less32bytes)
1829 movdqa (%rdi), %xmm3
1831 UPDATE_STRNCMP_COUNTER
1834 mov $16, %rcx /* index for loads */
1835 mov $13, %r9d /* byte position left over from less32bytes case */
1837 * Setup %r10 value allows us to detect crossing a page boundary.
1838 * When %r10 goes positive we have crossed a page boundary and
1839 * need to do a nibble.
1842 and $0xfff, %r10 /* offset into 4K page */
1843 sub $0x1000, %r10 /* subtract 4K pagesize */
1846 LABEL(loop_ashr_13):
1848 jg LABEL(nibble_ashr_13)
1850 LABEL(gobble_ashr_13):
1851 movdqa (%rsi, %rcx), %xmm1
1852 movdqa (%rdi, %rcx), %xmm2
1858 por %xmm3, %xmm2 /* merge into one 16byte value */
1860 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1862 TOLOWER (%xmm1, %xmm2)
1864 pcmpeqb %xmm1, %xmm0
1865 pcmpeqb %xmm2, %xmm1
1867 pmovmskb %xmm1, %edx
1871 # ifdef USE_AS_STRNCMP
1873 jbe LABEL(strcmp_exitz)
1880 jg LABEL(nibble_ashr_13) /* cross page boundary */
1882 movdqa (%rsi, %rcx), %xmm1
1883 movdqa (%rdi, %rcx), %xmm2
1889 por %xmm3, %xmm2 /* merge into one 16byte value */
1891 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1893 TOLOWER (%xmm1, %xmm2)
1895 pcmpeqb %xmm1, %xmm0
1896 pcmpeqb %xmm2, %xmm1
1898 pmovmskb %xmm1, %edx
1902 # ifdef USE_AS_STRNCMP
1904 jbe LABEL(strcmp_exitz)
1909 jmp LABEL(loop_ashr_13)
1912 LABEL(nibble_ashr_13):
1913 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1914 pmovmskb %xmm0, %edx
1916 jnz LABEL(ashr_13_exittail)
1918 # ifdef USE_AS_STRNCMP
1920 jbe LABEL(ashr_13_exittail)
1925 jmp LABEL(gobble_ashr_13)
1928 LABEL(ashr_13_exittail):
1929 movdqa (%rsi, %rcx), %xmm1
1932 jmp LABEL(aftertail)
1935 * The following cases will be handled by ashr_14
1936 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1937 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
1942 movdqa (%rdi), %xmm2
1943 movdqa (%rsi), %xmm1
1944 pcmpeqb %xmm1, %xmm0
1946 TOLOWER (%xmm1, %xmm2)
1947 pcmpeqb %xmm1, %xmm2
1949 pmovmskb %xmm2, %r9d
1953 jnz LABEL(less32bytes)
1954 movdqa (%rdi), %xmm3
1956 UPDATE_STRNCMP_COUNTER
1959 mov $16, %rcx /* index for loads */
1960 mov $14, %r9d /* byte position left over from less32bytes case */
1962 * Setup %r10 value allows us to detect crossing a page boundary.
1963 * When %r10 goes positive we have crossed a page boundary and
1964 * need to do a nibble.
1967 and $0xfff, %r10 /* offset into 4K page */
1968 sub $0x1000, %r10 /* subtract 4K pagesize */
1971 LABEL(loop_ashr_14):
1973 jg LABEL(nibble_ashr_14)
1975 LABEL(gobble_ashr_14):
1976 movdqa (%rsi, %rcx), %xmm1
1977 movdqa (%rdi, %rcx), %xmm2
1983 por %xmm3, %xmm2 /* merge into one 16byte value */
1985 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
1987 TOLOWER (%xmm1, %xmm2)
1989 pcmpeqb %xmm1, %xmm0
1990 pcmpeqb %xmm2, %xmm1
1992 pmovmskb %xmm1, %edx
1996 # ifdef USE_AS_STRNCMP
1998 jbe LABEL(strcmp_exitz)
2005 jg LABEL(nibble_ashr_14) /* cross page boundary */
2007 movdqa (%rsi, %rcx), %xmm1
2008 movdqa (%rdi, %rcx), %xmm2
2014 por %xmm3, %xmm2 /* merge into one 16byte value */
2016 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
2018 TOLOWER (%xmm1, %xmm2)
2020 pcmpeqb %xmm1, %xmm0
2021 pcmpeqb %xmm2, %xmm1
2023 pmovmskb %xmm1, %edx
2027 # ifdef USE_AS_STRNCMP
2029 jbe LABEL(strcmp_exitz)
2034 jmp LABEL(loop_ashr_14)
2037 LABEL(nibble_ashr_14):
2038 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2039 pmovmskb %xmm0, %edx
2041 jnz LABEL(ashr_14_exittail)
2043 # ifdef USE_AS_STRNCMP
2045 jbe LABEL(ashr_14_exittail)
2050 jmp LABEL(gobble_ashr_14)
2053 LABEL(ashr_14_exittail):
2054 movdqa (%rsi, %rcx), %xmm1
2057 jmp LABEL(aftertail)
2060 * The following cases will be handled by ashr_15
2061 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2062 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
2067 movdqa (%rdi), %xmm2
2068 movdqa (%rsi), %xmm1
2069 pcmpeqb %xmm1, %xmm0
2071 TOLOWER (%xmm1, %xmm2)
2072 pcmpeqb %xmm1, %xmm2
2074 pmovmskb %xmm2, %r9d
2078 jnz LABEL(less32bytes)
2080 movdqa (%rdi), %xmm3
2082 UPDATE_STRNCMP_COUNTER
2085 mov $16, %rcx /* index for loads */
2086 mov $15, %r9d /* byte position left over from less32bytes case */
2088 * Setup %r10 value allows us to detect crossing a page boundary.
2089 * When %r10 goes positive we have crossed a page boundary and
2090 * need to do a nibble.
2093 and $0xfff, %r10 /* offset into 4K page */
2095 sub $0x1000, %r10 /* subtract 4K pagesize */
2098 LABEL(loop_ashr_15):
2100 jg LABEL(nibble_ashr_15)
2102 LABEL(gobble_ashr_15):
2103 movdqa (%rsi, %rcx), %xmm1
2104 movdqa (%rdi, %rcx), %xmm2
2110 por %xmm3, %xmm2 /* merge into one 16byte value */
2112 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2114 TOLOWER (%xmm1, %xmm2)
2116 pcmpeqb %xmm1, %xmm0
2117 pcmpeqb %xmm2, %xmm1
2119 pmovmskb %xmm1, %edx
2123 # ifdef USE_AS_STRNCMP
2125 jbe LABEL(strcmp_exitz)
2132 jg LABEL(nibble_ashr_15) /* cross page boundary */
2134 movdqa (%rsi, %rcx), %xmm1
2135 movdqa (%rdi, %rcx), %xmm2
2141 por %xmm3, %xmm2 /* merge into one 16byte value */
2143 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2145 TOLOWER (%xmm1, %xmm2)
2147 pcmpeqb %xmm1, %xmm0
2148 pcmpeqb %xmm2, %xmm1
2150 pmovmskb %xmm1, %edx
2154 # ifdef USE_AS_STRNCMP
2156 jbe LABEL(strcmp_exitz)
2161 jmp LABEL(loop_ashr_15)
2164 LABEL(nibble_ashr_15):
2165 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2166 pmovmskb %xmm0, %edx
2168 jnz LABEL(ashr_15_exittail)
2170 # ifdef USE_AS_STRNCMP
2172 je LABEL(ashr_15_exittail)
2177 jmp LABEL(gobble_ashr_15)
2180 LABEL(ashr_15_exittail):
2181 movdqa (%rsi, %rcx), %xmm1
2187 TOLOWER (%xmm1, %xmm3)
2188 pcmpeqb %xmm3, %xmm1
2190 pmovmskb %xmm1, %edx
2195 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
2197 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
2198 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
2201 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
2206 bsf %rdx, %rdx /* find and store bit index in %rdx */
2208 # ifdef USE_AS_STRNCMP
2210 jbe LABEL(strcmp_exitz)
2212 movzbl (%rsi, %rdx), %ecx
2213 movzbl (%rdi, %rdx), %eax
2215 # ifdef USE_AS_STRCASECMP_L
2216 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2217 movl (%rdx,%rcx,4), %ecx
2218 movl (%rdx,%rax,4), %eax
2224 LABEL(strcmp_exitz):
2235 END (BP_SYM (STRCMP))
2237 .section .rodata,"a",@progbits
2239 LABEL(unaligned_table):
2240 .int LABEL(ashr_1) - LABEL(unaligned_table)
2241 .int LABEL(ashr_2) - LABEL(unaligned_table)
2242 .int LABEL(ashr_3) - LABEL(unaligned_table)
2243 .int LABEL(ashr_4) - LABEL(unaligned_table)
2244 .int LABEL(ashr_5) - LABEL(unaligned_table)
2245 .int LABEL(ashr_6) - LABEL(unaligned_table)
2246 .int LABEL(ashr_7) - LABEL(unaligned_table)
2247 .int LABEL(ashr_8) - LABEL(unaligned_table)
2248 .int LABEL(ashr_9) - LABEL(unaligned_table)
2249 .int LABEL(ashr_10) - LABEL(unaligned_table)
2250 .int LABEL(ashr_11) - LABEL(unaligned_table)
2251 .int LABEL(ashr_12) - LABEL(unaligned_table)
2252 .int LABEL(ashr_13) - LABEL(unaligned_table)
2253 .int LABEL(ashr_14) - LABEL(unaligned_table)
2254 .int LABEL(ashr_15) - LABEL(unaligned_table)
2255 .int LABEL(ashr_0) - LABEL(unaligned_table)
2256 #endif /* NOT_IN_libc */
2257 libc_hidden_builtin_def (STRCMP)