1 /* Highly optimized version for x86-64.
2 Copyright (C) 1999, 2000, 2002, 2003, 2005, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Based on i686 version contributed by Ulrich Drepper
6 <drepper@cygnus.com>, 1999.
7 Updated with SSE2 support contributed by Intel Corporation.
9 The GNU C Library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
14 The GNU C Library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
19 You should have received a copy of the GNU Lesser General Public
20 License along with the GNU C Library; if not, write to the Free
21 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 #include "asm-syntax.h"
29 #undef UPDATE_STRNCMP_COUNTER
36 /* The simplified code below is not set up to handle strncmp() so far.
37 Should this become necessary it has to be implemented. For now
38 just report the problem. */
40 # error "strncmp not implemented so far"
43 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
44 if the new counter > the old one or is 0. */
45 # define UPDATE_STRNCMP_COUNTER \
46 /* calculate left number to compare */ \
47 lea -16(%rcx, %r11), %r9; \
49 jb LABEL(strcmp_exitz); \
51 je LABEL(strcmp_exitz); \
54 #elif defined USE_AS_STRCASECMP_L
55 # include "locale-defines.h"
57 /* No support for strcasecmp outside libc so far since it is not needed. */
59 # error "strcasecmp_l not implemented so far"
62 # define UPDATE_STRNCMP_COUNTER
63 #elif defined USE_AS_STRNCASECMP_L
64 # include "locale-defines.h"
66 /* No support for strncasecmp outside libc so far since it is not needed. */
68 # error "strncasecmp_l not implemented so far"
71 # define UPDATE_STRNCMP_COUNTER \
72 /* calculate left number to compare */ \
73 lea -16(%rcx, %r11), %r9; \
75 jb LABEL(strcmp_exitz); \
77 je LABEL(strcmp_exitz); \
80 # define UPDATE_STRNCMP_COUNTER
82 # define STRCMP strcmp
89 .section .text.ssse3,"ax",@progbits
92 #ifdef USE_AS_STRCASECMP_L
94 # define ENTRY2(name) ENTRY (name)
95 # define END2(name) END (name)
99 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
102 // XXX 5 byte should be before the function
104 .byte 0x0f,0x1f,0x44,0x00,0x00
106 # ifndef NO_NOLOCALE_ALIAS
107 weak_alias (__strcasecmp, strcasecmp)
108 libc_hidden_def (__strcasecmp)
110 /* FALLTHROUGH to strcasecmp_l. */
111 #elif defined USE_AS_STRNCASECMP_L
113 # define ENTRY2(name) ENTRY (name)
114 # define END2(name) END (name)
117 ENTRY2 (__strncasecmp)
118 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
121 // XXX 5 byte should be before the function
123 .byte 0x0f,0x1f,0x44,0x00,0x00
125 # ifndef NO_NOLOCALE_ALIAS
126 weak_alias (__strncasecmp, strncasecmp)
127 libc_hidden_def (__strncasecmp)
129 /* FALLTHROUGH to strncasecmp_l. */
132 ENTRY (BP_SYM (STRCMP))
134 /* Simple version since we can't use SSE registers in ld.so. */
135 L(oop): movb (%rdi), %al
146 L(neq): movl $1, %eax
150 END (BP_SYM (STRCMP))
151 #else /* NOT_IN_libc */
152 # ifdef USE_AS_STRCASECMP_L
153 /* We have to fall back on the C implementation for locales
154 with encodings not matching ASCII for single bytes. */
155 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
156 movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
160 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
161 jne __strcasecmp_l_nonascii
162 # elif defined USE_AS_STRNCASECMP_L
163 /* We have to fall back on the C implementation for locales
164 with encodings not matching ASCII for single bytes. */
165 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
166 movq LOCALE_T___LOCALES+LC_CTYPE*8(%rcx), %rax
170 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
171 jne __strncasecmp_l_nonascii
175 * This implementation uses SSE to compare up to 16 bytes at a time.
177 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
179 je LABEL(strcmp_exitz)
186 /* Use 64bit AND here to avoid long NOP padding. */
187 and $0x3f, %rcx /* rsi alignment in cache line */
188 and $0x3f, %rax /* rdi alignment in cache line */
189 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
190 .section .rodata.cst16,"aM",@progbits,16
193 .quad 0x4040404040404040
194 .quad 0x4040404040404040
196 .quad 0x5b5b5b5b5b5b5b5b
197 .quad 0x5b5b5b5b5b5b5b5b
199 .quad 0x2020202020202020
200 .quad 0x2020202020202020
202 movdqa .Lbelowupper(%rip), %xmm5
203 # define UCLOW_reg %xmm5
204 movdqa .Ltopupper(%rip), %xmm6
205 # define UCHIGH_reg %xmm6
206 movdqa .Ltouppermask(%rip), %xmm7
207 # define LCQWORD_reg %xmm7
210 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
212 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
215 movhpd 8(%rdi), %xmm1
216 movhpd 8(%rsi), %xmm2
217 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
218 # define TOLOWER(reg1, reg2) \
219 movdqa reg1, %xmm8; \
220 movdqa UCHIGH_reg, %xmm9; \
221 movdqa reg2, %xmm10; \
222 movdqa UCHIGH_reg, %xmm11; \
223 pcmpgtb UCLOW_reg, %xmm8; \
224 pcmpgtb reg1, %xmm9; \
225 pcmpgtb UCLOW_reg, %xmm10; \
226 pcmpgtb reg2, %xmm11; \
228 pand %xmm11, %xmm10; \
229 pand LCQWORD_reg, %xmm8; \
230 pand LCQWORD_reg, %xmm10; \
233 TOLOWER (%xmm1, %xmm2)
235 # define TOLOWER(reg1, reg2)
237 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
238 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
239 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
240 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
242 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
243 jnz LABEL(less16bytes) /* If not, find different value or null char */
244 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
246 jbe LABEL(strcmp_exitz) /* finish comparision */
248 add $16, %rsi /* prepare to search next 16 bytes */
249 add $16, %rdi /* prepare to search next 16 bytes */
252 * Determine source and destination string offsets from 16-byte alignment.
253 * Use relative offset difference between the two to determine which case
258 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
259 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
260 mov $0xffff, %edx /* for equivalent offset */
262 and $0xf, %ecx /* offset of rsi */
263 and $0xf, %eax /* offset of rdi */
265 je LABEL(ashr_0) /* rsi and rdi relative offset same */
267 mov %edx, %r8d /* r8d is offset flag for exit tail */
273 lea LABEL(unaligned_table)(%rip), %r10
274 movslq (%r10, %r9,4), %r9
275 lea (%r10, %r9), %r10
276 jmp *%r10 /* jump to corresponding case */
279 * The following cases will be handled by ashr_0
280 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
281 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
287 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
288 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
289 # if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
290 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
293 TOLOWER (%xmm1, %xmm2)
294 pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
296 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
298 shr %cl, %edx /* adjust 0xffff for offset */
299 shr %cl, %r9d /* adjust for 16-byte offset */
302 * edx must be the same with r9d if in left byte (16-rcx) is equal to
303 * the start from (16-rax) and no null char was seen.
305 jne LABEL(less32bytes) /* mismatch or null char */
306 UPDATE_STRNCMP_COUNTER
309 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
312 * Now both strings are aligned at 16-byte boundary. Loop over strings
313 * checking 32-bytes per iteration.
317 movdqa (%rsi, %rcx), %xmm1
318 movdqa (%rdi, %rcx), %xmm2
319 TOLOWER (%xmm1, %xmm2)
326 jnz LABEL(exit) /* mismatch or null char seen */
328 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
330 jbe LABEL(strcmp_exitz)
333 movdqa (%rsi, %rcx), %xmm1
334 movdqa (%rdi, %rcx), %xmm2
335 TOLOWER (%xmm1, %xmm2)
343 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
345 jbe LABEL(strcmp_exitz)
348 jmp LABEL(loop_ashr_0)
351 * The following cases will be handled by ashr_1
352 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
353 * n(15) n -15 0(15 +(n-15) - n) ashr_1
360 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
361 pslldq $15, %xmm2 /* shift first string to align with second */
362 TOLOWER (%xmm1, %xmm2)
363 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
364 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
366 shr %cl, %edx /* adjust 0xffff for offset */
367 shr %cl, %r9d /* adjust for 16-byte offset */
369 jnz LABEL(less32bytes) /* mismatch or null char seen */
371 UPDATE_STRNCMP_COUNTER
374 mov $16, %rcx /* index for loads*/
375 mov $1, %r9d /* byte position left over from less32bytes case */
377 * Setup %r10 value allows us to detect crossing a page boundary.
378 * When %r10 goes positive we have crossed a page boundary and
379 * need to do a nibble.
382 and $0xfff, %r10 /* offset into 4K page */
383 sub $0x1000, %r10 /* subtract 4K pagesize */
388 jg LABEL(nibble_ashr_1) /* cross page boundary */
390 LABEL(gobble_ashr_1):
391 movdqa (%rsi, %rcx), %xmm1
392 movdqa (%rdi, %rcx), %xmm2
393 movdqa %xmm2, %xmm4 /* store for next cycle */
398 por %xmm3, %xmm2 /* merge into one 16byte value */
400 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
402 TOLOWER (%xmm1, %xmm2)
411 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
413 jbe LABEL(strcmp_exitz)
419 jg LABEL(nibble_ashr_1) /* cross page boundary */
421 movdqa (%rsi, %rcx), %xmm1
422 movdqa (%rdi, %rcx), %xmm2
423 movdqa %xmm2, %xmm4 /* store for next cycle */
428 por %xmm3, %xmm2 /* merge into one 16byte value */
430 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
432 TOLOWER (%xmm1, %xmm2)
441 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
443 jbe LABEL(strcmp_exitz)
447 jmp LABEL(loop_ashr_1)
450 * Nibble avoids loads across page boundary. This is to avoid a potential
451 * access into unmapped memory.
454 LABEL(nibble_ashr_1):
455 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
458 jnz LABEL(ashr_1_exittail) /* find null char*/
460 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
462 jbe LABEL(ashr_1_exittail)
466 sub $0x1000, %r10 /* substract 4K from %r10 */
467 jmp LABEL(gobble_ashr_1)
470 * Once find null char, determine if there is a string mismatch
471 * before the null char.
474 LABEL(ashr_1_exittail):
475 movdqa (%rsi, %rcx), %xmm1
481 * The following cases will be handled by ashr_2
482 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
483 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
492 TOLOWER (%xmm1, %xmm2)
499 jnz LABEL(less32bytes)
501 UPDATE_STRNCMP_COUNTER
504 mov $16, %rcx /* index for loads */
505 mov $2, %r9d /* byte position left over from less32bytes case */
507 * Setup %r10 value allows us to detect crossing a page boundary.
508 * When %r10 goes positive we have crossed a page boundary and
509 * need to do a nibble.
512 and $0xfff, %r10 /* offset into 4K page */
513 sub $0x1000, %r10 /* subtract 4K pagesize */
518 jg LABEL(nibble_ashr_2)
520 LABEL(gobble_ashr_2):
521 movdqa (%rsi, %rcx), %xmm1
522 movdqa (%rdi, %rcx), %xmm2
528 por %xmm3, %xmm2 /* merge into one 16byte value */
530 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
532 TOLOWER (%xmm1, %xmm2)
541 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
543 jbe LABEL(strcmp_exitz)
550 jg LABEL(nibble_ashr_2) /* cross page boundary */
552 movdqa (%rsi, %rcx), %xmm1
553 movdqa (%rdi, %rcx), %xmm2
559 por %xmm3, %xmm2 /* merge into one 16byte value */
561 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
563 TOLOWER (%xmm1, %xmm2)
572 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
574 jbe LABEL(strcmp_exitz)
579 jmp LABEL(loop_ashr_2)
582 LABEL(nibble_ashr_2):
583 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
586 jnz LABEL(ashr_2_exittail)
588 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
590 jbe LABEL(ashr_2_exittail)
595 jmp LABEL(gobble_ashr_2)
598 LABEL(ashr_2_exittail):
599 movdqa (%rsi, %rcx), %xmm1
605 * The following cases will be handled by ashr_3
606 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
607 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
616 TOLOWER (%xmm1, %xmm2)
623 jnz LABEL(less32bytes)
626 UPDATE_STRNCMP_COUNTER
629 mov $16, %rcx /* index for loads */
630 mov $3, %r9d /* byte position left over from less32bytes case */
632 * Setup %r10 value allows us to detect crossing a page boundary.
633 * When %r10 goes positive we have crossed a page boundary and
634 * need to do a nibble.
637 and $0xfff, %r10 /* offset into 4K page */
638 sub $0x1000, %r10 /* subtract 4K pagesize */
643 jg LABEL(nibble_ashr_3)
645 LABEL(gobble_ashr_3):
646 movdqa (%rsi, %rcx), %xmm1
647 movdqa (%rdi, %rcx), %xmm2
653 por %xmm3, %xmm2 /* merge into one 16byte value */
655 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
657 TOLOWER (%xmm1, %xmm2)
666 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
668 jbe LABEL(strcmp_exitz)
675 jg LABEL(nibble_ashr_3) /* cross page boundary */
677 movdqa (%rsi, %rcx), %xmm1
678 movdqa (%rdi, %rcx), %xmm2
684 por %xmm3, %xmm2 /* merge into one 16byte value */
686 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
688 TOLOWER (%xmm1, %xmm2)
697 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
699 jbe LABEL(strcmp_exitz)
704 jmp LABEL(loop_ashr_3)
707 LABEL(nibble_ashr_3):
708 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
711 jnz LABEL(ashr_3_exittail)
713 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
715 jbe LABEL(ashr_3_exittail)
720 jmp LABEL(gobble_ashr_3)
723 LABEL(ashr_3_exittail):
724 movdqa (%rsi, %rcx), %xmm1
730 * The following cases will be handled by ashr_4
731 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
732 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
741 TOLOWER (%xmm1, %xmm2)
748 jnz LABEL(less32bytes)
751 UPDATE_STRNCMP_COUNTER
754 mov $16, %rcx /* index for loads */
755 mov $4, %r9d /* byte position left over from less32bytes case */
757 * Setup %r10 value allows us to detect crossing a page boundary.
758 * When %r10 goes positive we have crossed a page boundary and
759 * need to do a nibble.
762 and $0xfff, %r10 /* offset into 4K page */
763 sub $0x1000, %r10 /* subtract 4K pagesize */
768 jg LABEL(nibble_ashr_4)
770 LABEL(gobble_ashr_4):
771 movdqa (%rsi, %rcx), %xmm1
772 movdqa (%rdi, %rcx), %xmm2
778 por %xmm3, %xmm2 /* merge into one 16byte value */
780 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
782 TOLOWER (%xmm1, %xmm2)
791 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
793 jbe LABEL(strcmp_exitz)
800 jg LABEL(nibble_ashr_4) /* cross page boundary */
802 movdqa (%rsi, %rcx), %xmm1
803 movdqa (%rdi, %rcx), %xmm2
809 por %xmm3, %xmm2 /* merge into one 16byte value */
811 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
813 TOLOWER (%xmm1, %xmm2)
822 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
824 jbe LABEL(strcmp_exitz)
829 jmp LABEL(loop_ashr_4)
832 LABEL(nibble_ashr_4):
833 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
836 jnz LABEL(ashr_4_exittail)
838 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
840 jbe LABEL(ashr_4_exittail)
845 jmp LABEL(gobble_ashr_4)
848 LABEL(ashr_4_exittail):
849 movdqa (%rsi, %rcx), %xmm1
855 * The following cases will be handled by ashr_5
856 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
857 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
866 TOLOWER (%xmm1, %xmm2)
873 jnz LABEL(less32bytes)
876 UPDATE_STRNCMP_COUNTER
879 mov $16, %rcx /* index for loads */
880 mov $5, %r9d /* byte position left over from less32bytes case */
882 * Setup %r10 value allows us to detect crossing a page boundary.
883 * When %r10 goes positive we have crossed a page boundary and
884 * need to do a nibble.
887 and $0xfff, %r10 /* offset into 4K page */
888 sub $0x1000, %r10 /* subtract 4K pagesize */
893 jg LABEL(nibble_ashr_5)
895 LABEL(gobble_ashr_5):
896 movdqa (%rsi, %rcx), %xmm1
897 movdqa (%rdi, %rcx), %xmm2
903 por %xmm3, %xmm2 /* merge into one 16byte value */
905 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
907 TOLOWER (%xmm1, %xmm2)
916 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
918 jbe LABEL(strcmp_exitz)
925 jg LABEL(nibble_ashr_5) /* cross page boundary */
927 movdqa (%rsi, %rcx), %xmm1
928 movdqa (%rdi, %rcx), %xmm2
934 por %xmm3, %xmm2 /* merge into one 16byte value */
936 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
938 TOLOWER (%xmm1, %xmm2)
947 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
949 jbe LABEL(strcmp_exitz)
954 jmp LABEL(loop_ashr_5)
957 LABEL(nibble_ashr_5):
958 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
961 jnz LABEL(ashr_5_exittail)
963 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
965 jbe LABEL(ashr_5_exittail)
970 jmp LABEL(gobble_ashr_5)
973 LABEL(ashr_5_exittail):
974 movdqa (%rsi, %rcx), %xmm1
980 * The following cases will be handled by ashr_6
981 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
982 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
991 TOLOWER (%xmm1, %xmm2)
998 jnz LABEL(less32bytes)
1001 UPDATE_STRNCMP_COUNTER
1004 mov $16, %rcx /* index for loads */
1005 mov $6, %r9d /* byte position left over from less32bytes case */
1007 * Setup %r10 value allows us to detect crossing a page boundary.
1008 * When %r10 goes positive we have crossed a page boundary and
1009 * need to do a nibble.
1012 and $0xfff, %r10 /* offset into 4K page */
1013 sub $0x1000, %r10 /* subtract 4K pagesize */
1018 jg LABEL(nibble_ashr_6)
1020 LABEL(gobble_ashr_6):
1021 movdqa (%rsi, %rcx), %xmm1
1022 movdqa (%rdi, %rcx), %xmm2
1028 por %xmm3, %xmm2 /* merge into one 16byte value */
1030 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
1032 TOLOWER (%xmm1, %xmm2)
1034 pcmpeqb %xmm1, %xmm0
1035 pcmpeqb %xmm2, %xmm1
1037 pmovmskb %xmm1, %edx
1041 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1043 jbe LABEL(strcmp_exitz)
1050 jg LABEL(nibble_ashr_6) /* cross page boundary */
1052 movdqa (%rsi, %rcx), %xmm1
1053 movdqa (%rdi, %rcx), %xmm2
1059 por %xmm3, %xmm2 /* merge into one 16byte value */
1061 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
1063 TOLOWER (%xmm1, %xmm2)
1065 pcmpeqb %xmm1, %xmm0
1066 pcmpeqb %xmm2, %xmm1
1068 pmovmskb %xmm1, %edx
1072 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1074 jbe LABEL(strcmp_exitz)
1079 jmp LABEL(loop_ashr_6)
1082 LABEL(nibble_ashr_6):
1083 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1084 pmovmskb %xmm0, %edx
1086 jnz LABEL(ashr_6_exittail)
1088 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1090 jbe LABEL(ashr_6_exittail)
1095 jmp LABEL(gobble_ashr_6)
1098 LABEL(ashr_6_exittail):
1099 movdqa (%rsi, %rcx), %xmm1
1102 jmp LABEL(aftertail)
1105 * The following cases will be handled by ashr_7
1106 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1107 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
1112 movdqa (%rdi), %xmm2
1113 movdqa (%rsi), %xmm1
1114 pcmpeqb %xmm1, %xmm0
1116 TOLOWER (%xmm1, %xmm2)
1117 pcmpeqb %xmm1, %xmm2
1119 pmovmskb %xmm2, %r9d
1123 jnz LABEL(less32bytes)
1124 movdqa (%rdi), %xmm3
1126 UPDATE_STRNCMP_COUNTER
1129 mov $16, %rcx /* index for loads */
1130 mov $7, %r9d /* byte position left over from less32bytes case */
1132 * Setup %r10 value allows us to detect crossing a page boundary.
1133 * When %r10 goes positive we have crossed a page boundary and
1134 * need to do a nibble.
1137 and $0xfff, %r10 /* offset into 4K page */
1138 sub $0x1000, %r10 /* subtract 4K pagesize */
1143 jg LABEL(nibble_ashr_7)
1145 LABEL(gobble_ashr_7):
1146 movdqa (%rsi, %rcx), %xmm1
1147 movdqa (%rdi, %rcx), %xmm2
1153 por %xmm3, %xmm2 /* merge into one 16byte value */
1155 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1157 TOLOWER (%xmm1, %xmm2)
1159 pcmpeqb %xmm1, %xmm0
1160 pcmpeqb %xmm2, %xmm1
1162 pmovmskb %xmm1, %edx
1166 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1168 jbe LABEL(strcmp_exitz)
1175 jg LABEL(nibble_ashr_7) /* cross page boundary */
1177 movdqa (%rsi, %rcx), %xmm1
1178 movdqa (%rdi, %rcx), %xmm2
1184 por %xmm3, %xmm2 /* merge into one 16byte value */
1186 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1188 TOLOWER (%xmm1, %xmm2)
1190 pcmpeqb %xmm1, %xmm0
1191 pcmpeqb %xmm2, %xmm1
1193 pmovmskb %xmm1, %edx
1197 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1199 jbe LABEL(strcmp_exitz)
1204 jmp LABEL(loop_ashr_7)
1207 LABEL(nibble_ashr_7):
1208 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1209 pmovmskb %xmm0, %edx
1211 jnz LABEL(ashr_7_exittail)
1213 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1215 jbe LABEL(ashr_7_exittail)
1220 jmp LABEL(gobble_ashr_7)
1223 LABEL(ashr_7_exittail):
1224 movdqa (%rsi, %rcx), %xmm1
1227 jmp LABEL(aftertail)
1230 * The following cases will be handled by ashr_8
1231 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1232 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
1237 movdqa (%rdi), %xmm2
1238 movdqa (%rsi), %xmm1
1239 pcmpeqb %xmm1, %xmm0
1241 TOLOWER (%xmm1, %xmm2)
1242 pcmpeqb %xmm1, %xmm2
1244 pmovmskb %xmm2, %r9d
1248 jnz LABEL(less32bytes)
1249 movdqa (%rdi), %xmm3
1251 UPDATE_STRNCMP_COUNTER
1254 mov $16, %rcx /* index for loads */
1255 mov $8, %r9d /* byte position left over from less32bytes case */
1257 * Setup %r10 value allows us to detect crossing a page boundary.
1258 * When %r10 goes positive we have crossed a page boundary and
1259 * need to do a nibble.
1262 and $0xfff, %r10 /* offset into 4K page */
1263 sub $0x1000, %r10 /* subtract 4K pagesize */
1268 jg LABEL(nibble_ashr_8)
1270 LABEL(gobble_ashr_8):
1271 movdqa (%rsi, %rcx), %xmm1
1272 movdqa (%rdi, %rcx), %xmm2
1278 por %xmm3, %xmm2 /* merge into one 16byte value */
1280 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1282 TOLOWER (%xmm1, %xmm2)
1284 pcmpeqb %xmm1, %xmm0
1285 pcmpeqb %xmm2, %xmm1
1287 pmovmskb %xmm1, %edx
1291 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1293 jbe LABEL(strcmp_exitz)
1300 jg LABEL(nibble_ashr_8) /* cross page boundary */
1302 movdqa (%rsi, %rcx), %xmm1
1303 movdqa (%rdi, %rcx), %xmm2
1309 por %xmm3, %xmm2 /* merge into one 16byte value */
1311 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1313 TOLOWER (%xmm1, %xmm2)
1315 pcmpeqb %xmm1, %xmm0
1316 pcmpeqb %xmm2, %xmm1
1318 pmovmskb %xmm1, %edx
1322 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1324 jbe LABEL(strcmp_exitz)
1329 jmp LABEL(loop_ashr_8)
1332 LABEL(nibble_ashr_8):
1333 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1334 pmovmskb %xmm0, %edx
1336 jnz LABEL(ashr_8_exittail)
1338 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1340 jbe LABEL(ashr_8_exittail)
1345 jmp LABEL(gobble_ashr_8)
1348 LABEL(ashr_8_exittail):
1349 movdqa (%rsi, %rcx), %xmm1
1352 jmp LABEL(aftertail)
1355 * The following cases will be handled by ashr_9
1356 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1357 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
1362 movdqa (%rdi), %xmm2
1363 movdqa (%rsi), %xmm1
1364 pcmpeqb %xmm1, %xmm0
1366 TOLOWER (%xmm1, %xmm2)
1367 pcmpeqb %xmm1, %xmm2
1369 pmovmskb %xmm2, %r9d
1373 jnz LABEL(less32bytes)
1374 movdqa (%rdi), %xmm3
1376 UPDATE_STRNCMP_COUNTER
1379 mov $16, %rcx /* index for loads */
1380 mov $9, %r9d /* byte position left over from less32bytes case */
1382 * Setup %r10 value allows us to detect crossing a page boundary.
1383 * When %r10 goes positive we have crossed a page boundary and
1384 * need to do a nibble.
1387 and $0xfff, %r10 /* offset into 4K page */
1388 sub $0x1000, %r10 /* subtract 4K pagesize */
1393 jg LABEL(nibble_ashr_9)
1395 LABEL(gobble_ashr_9):
1396 movdqa (%rsi, %rcx), %xmm1
1397 movdqa (%rdi, %rcx), %xmm2
1403 por %xmm3, %xmm2 /* merge into one 16byte value */
1405 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1407 TOLOWER (%xmm1, %xmm2)
1409 pcmpeqb %xmm1, %xmm0
1410 pcmpeqb %xmm2, %xmm1
1412 pmovmskb %xmm1, %edx
1416 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1418 jbe LABEL(strcmp_exitz)
1425 jg LABEL(nibble_ashr_9) /* cross page boundary */
1427 movdqa (%rsi, %rcx), %xmm1
1428 movdqa (%rdi, %rcx), %xmm2
1434 por %xmm3, %xmm2 /* merge into one 16byte value */
1436 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1438 TOLOWER (%xmm1, %xmm2)
1440 pcmpeqb %xmm1, %xmm0
1441 pcmpeqb %xmm2, %xmm1
1443 pmovmskb %xmm1, %edx
1447 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1449 jbe LABEL(strcmp_exitz)
1453 movdqa %xmm4, %xmm3 /* store for next cycle */
1454 jmp LABEL(loop_ashr_9)
1457 LABEL(nibble_ashr_9):
1458 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1459 pmovmskb %xmm0, %edx
1461 jnz LABEL(ashr_9_exittail)
1463 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1465 jbe LABEL(ashr_9_exittail)
1470 jmp LABEL(gobble_ashr_9)
1473 LABEL(ashr_9_exittail):
1474 movdqa (%rsi, %rcx), %xmm1
1477 jmp LABEL(aftertail)
1480 * The following cases will be handled by ashr_10
1481 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1482 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
1487 movdqa (%rdi), %xmm2
1488 movdqa (%rsi), %xmm1
1489 pcmpeqb %xmm1, %xmm0
1491 TOLOWER (%xmm1, %xmm2)
1492 pcmpeqb %xmm1, %xmm2
1494 pmovmskb %xmm2, %r9d
1498 jnz LABEL(less32bytes)
1499 movdqa (%rdi), %xmm3
1501 UPDATE_STRNCMP_COUNTER
1504 mov $16, %rcx /* index for loads */
1505 mov $10, %r9d /* byte position left over from less32bytes case */
1507 * Setup %r10 value allows us to detect crossing a page boundary.
1508 * When %r10 goes positive we have crossed a page boundary and
1509 * need to do a nibble.
1512 and $0xfff, %r10 /* offset into 4K page */
1513 sub $0x1000, %r10 /* subtract 4K pagesize */
1516 LABEL(loop_ashr_10):
1518 jg LABEL(nibble_ashr_10)
1520 LABEL(gobble_ashr_10):
1521 movdqa (%rsi, %rcx), %xmm1
1522 movdqa (%rdi, %rcx), %xmm2
1528 por %xmm3, %xmm2 /* merge into one 16byte value */
1530 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1532 TOLOWER (%xmm1, %xmm2)
1534 pcmpeqb %xmm1, %xmm0
1535 pcmpeqb %xmm2, %xmm1
1537 pmovmskb %xmm1, %edx
1541 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1543 jbe LABEL(strcmp_exitz)
1550 jg LABEL(nibble_ashr_10) /* cross page boundary */
1552 movdqa (%rsi, %rcx), %xmm1
1553 movdqa (%rdi, %rcx), %xmm2
1559 por %xmm3, %xmm2 /* merge into one 16byte value */
1561 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1563 TOLOWER (%xmm1, %xmm2)
1565 pcmpeqb %xmm1, %xmm0
1566 pcmpeqb %xmm2, %xmm1
1568 pmovmskb %xmm1, %edx
1572 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1574 jbe LABEL(strcmp_exitz)
1579 jmp LABEL(loop_ashr_10)
1582 LABEL(nibble_ashr_10):
1583 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1584 pmovmskb %xmm0, %edx
1586 jnz LABEL(ashr_10_exittail)
1588 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1590 jbe LABEL(ashr_10_exittail)
1595 jmp LABEL(gobble_ashr_10)
1598 LABEL(ashr_10_exittail):
1599 movdqa (%rsi, %rcx), %xmm1
1602 jmp LABEL(aftertail)
1605 * The following cases will be handled by ashr_11
1606 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1607 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
1612 movdqa (%rdi), %xmm2
1613 movdqa (%rsi), %xmm1
1614 pcmpeqb %xmm1, %xmm0
1616 TOLOWER (%xmm1, %xmm2)
1617 pcmpeqb %xmm1, %xmm2
1619 pmovmskb %xmm2, %r9d
1623 jnz LABEL(less32bytes)
1624 movdqa (%rdi), %xmm3
1626 UPDATE_STRNCMP_COUNTER
1629 mov $16, %rcx /* index for loads */
1630 mov $11, %r9d /* byte position left over from less32bytes case */
1632 * Setup %r10 value allows us to detect crossing a page boundary.
1633 * When %r10 goes positive we have crossed a page boundary and
1634 * need to do a nibble.
1637 and $0xfff, %r10 /* offset into 4K page */
1638 sub $0x1000, %r10 /* subtract 4K pagesize */
1641 LABEL(loop_ashr_11):
1643 jg LABEL(nibble_ashr_11)
1645 LABEL(gobble_ashr_11):
1646 movdqa (%rsi, %rcx), %xmm1
1647 movdqa (%rdi, %rcx), %xmm2
1653 por %xmm3, %xmm2 /* merge into one 16byte value */
1655 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1657 TOLOWER (%xmm1, %xmm2)
1659 pcmpeqb %xmm1, %xmm0
1660 pcmpeqb %xmm2, %xmm1
1662 pmovmskb %xmm1, %edx
1666 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1668 jbe LABEL(strcmp_exitz)
1675 jg LABEL(nibble_ashr_11) /* cross page boundary */
1677 movdqa (%rsi, %rcx), %xmm1
1678 movdqa (%rdi, %rcx), %xmm2
1684 por %xmm3, %xmm2 /* merge into one 16byte value */
1686 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1688 TOLOWER (%xmm1, %xmm2)
1690 pcmpeqb %xmm1, %xmm0
1691 pcmpeqb %xmm2, %xmm1
1693 pmovmskb %xmm1, %edx
1697 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1699 jbe LABEL(strcmp_exitz)
1704 jmp LABEL(loop_ashr_11)
1707 LABEL(nibble_ashr_11):
1708 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1709 pmovmskb %xmm0, %edx
1711 jnz LABEL(ashr_11_exittail)
1713 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1715 jbe LABEL(ashr_11_exittail)
1720 jmp LABEL(gobble_ashr_11)
1723 LABEL(ashr_11_exittail):
1724 movdqa (%rsi, %rcx), %xmm1
1727 jmp LABEL(aftertail)
1730 * The following cases will be handled by ashr_12
1731 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1732 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
1737 movdqa (%rdi), %xmm2
1738 movdqa (%rsi), %xmm1
1739 pcmpeqb %xmm1, %xmm0
1741 TOLOWER (%xmm1, %xmm2)
1742 pcmpeqb %xmm1, %xmm2
1744 pmovmskb %xmm2, %r9d
1748 jnz LABEL(less32bytes)
1749 movdqa (%rdi), %xmm3
1751 UPDATE_STRNCMP_COUNTER
1754 mov $16, %rcx /* index for loads */
1755 mov $12, %r9d /* byte position left over from less32bytes case */
1757 * Setup %r10 value allows us to detect crossing a page boundary.
1758 * When %r10 goes positive we have crossed a page boundary and
1759 * need to do a nibble.
1762 and $0xfff, %r10 /* offset into 4K page */
1763 sub $0x1000, %r10 /* subtract 4K pagesize */
1766 LABEL(loop_ashr_12):
1768 jg LABEL(nibble_ashr_12)
1770 LABEL(gobble_ashr_12):
1771 movdqa (%rsi, %rcx), %xmm1
1772 movdqa (%rdi, %rcx), %xmm2
1778 por %xmm3, %xmm2 /* merge into one 16byte value */
1780 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1782 TOLOWER (%xmm1, %xmm2)
1784 pcmpeqb %xmm1, %xmm0
1785 pcmpeqb %xmm2, %xmm1
1787 pmovmskb %xmm1, %edx
1791 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1793 jbe LABEL(strcmp_exitz)
1800 jg LABEL(nibble_ashr_12) /* cross page boundary */
1802 movdqa (%rsi, %rcx), %xmm1
1803 movdqa (%rdi, %rcx), %xmm2
1809 por %xmm3, %xmm2 /* merge into one 16byte value */
1811 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1813 TOLOWER (%xmm1, %xmm2)
1815 pcmpeqb %xmm1, %xmm0
1816 pcmpeqb %xmm2, %xmm1
1818 pmovmskb %xmm1, %edx
1822 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1824 jbe LABEL(strcmp_exitz)
1829 jmp LABEL(loop_ashr_12)
1832 LABEL(nibble_ashr_12):
1833 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1834 pmovmskb %xmm0, %edx
1836 jnz LABEL(ashr_12_exittail)
1838 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1840 jbe LABEL(ashr_12_exittail)
1845 jmp LABEL(gobble_ashr_12)
1848 LABEL(ashr_12_exittail):
1849 movdqa (%rsi, %rcx), %xmm1
1852 jmp LABEL(aftertail)
1855 * The following cases will be handled by ashr_13
1856 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1857 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
1862 movdqa (%rdi), %xmm2
1863 movdqa (%rsi), %xmm1
1864 pcmpeqb %xmm1, %xmm0
1866 TOLOWER (%xmm1, %xmm2)
1867 pcmpeqb %xmm1, %xmm2
1869 pmovmskb %xmm2, %r9d
1873 jnz LABEL(less32bytes)
1874 movdqa (%rdi), %xmm3
1876 UPDATE_STRNCMP_COUNTER
1879 mov $16, %rcx /* index for loads */
1880 mov $13, %r9d /* byte position left over from less32bytes case */
1882 * Setup %r10 value allows us to detect crossing a page boundary.
1883 * When %r10 goes positive we have crossed a page boundary and
1884 * need to do a nibble.
1887 and $0xfff, %r10 /* offset into 4K page */
1888 sub $0x1000, %r10 /* subtract 4K pagesize */
1891 LABEL(loop_ashr_13):
1893 jg LABEL(nibble_ashr_13)
1895 LABEL(gobble_ashr_13):
1896 movdqa (%rsi, %rcx), %xmm1
1897 movdqa (%rdi, %rcx), %xmm2
1903 por %xmm3, %xmm2 /* merge into one 16byte value */
1905 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1907 TOLOWER (%xmm1, %xmm2)
1909 pcmpeqb %xmm1, %xmm0
1910 pcmpeqb %xmm2, %xmm1
1912 pmovmskb %xmm1, %edx
1916 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1918 jbe LABEL(strcmp_exitz)
1925 jg LABEL(nibble_ashr_13) /* cross page boundary */
1927 movdqa (%rsi, %rcx), %xmm1
1928 movdqa (%rdi, %rcx), %xmm2
1934 por %xmm3, %xmm2 /* merge into one 16byte value */
1936 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1938 TOLOWER (%xmm1, %xmm2)
1940 pcmpeqb %xmm1, %xmm0
1941 pcmpeqb %xmm2, %xmm1
1943 pmovmskb %xmm1, %edx
1947 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1949 jbe LABEL(strcmp_exitz)
1954 jmp LABEL(loop_ashr_13)
1957 LABEL(nibble_ashr_13):
1958 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1959 pmovmskb %xmm0, %edx
1961 jnz LABEL(ashr_13_exittail)
1963 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1965 jbe LABEL(ashr_13_exittail)
1970 jmp LABEL(gobble_ashr_13)
1973 LABEL(ashr_13_exittail):
1974 movdqa (%rsi, %rcx), %xmm1
1977 jmp LABEL(aftertail)
1980 * The following cases will be handled by ashr_14
1981 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1982 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
1987 movdqa (%rdi), %xmm2
1988 movdqa (%rsi), %xmm1
1989 pcmpeqb %xmm1, %xmm0
1991 TOLOWER (%xmm1, %xmm2)
1992 pcmpeqb %xmm1, %xmm2
1994 pmovmskb %xmm2, %r9d
1998 jnz LABEL(less32bytes)
1999 movdqa (%rdi), %xmm3
2001 UPDATE_STRNCMP_COUNTER
2004 mov $16, %rcx /* index for loads */
2005 mov $14, %r9d /* byte position left over from less32bytes case */
2007 * Setup %r10 value allows us to detect crossing a page boundary.
2008 * When %r10 goes positive we have crossed a page boundary and
2009 * need to do a nibble.
2012 and $0xfff, %r10 /* offset into 4K page */
2013 sub $0x1000, %r10 /* subtract 4K pagesize */
2016 LABEL(loop_ashr_14):
2018 jg LABEL(nibble_ashr_14)
2020 LABEL(gobble_ashr_14):
2021 movdqa (%rsi, %rcx), %xmm1
2022 movdqa (%rdi, %rcx), %xmm2
2028 por %xmm3, %xmm2 /* merge into one 16byte value */
2030 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
2032 TOLOWER (%xmm1, %xmm2)
2034 pcmpeqb %xmm1, %xmm0
2035 pcmpeqb %xmm2, %xmm1
2037 pmovmskb %xmm1, %edx
2041 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2043 jbe LABEL(strcmp_exitz)
2050 jg LABEL(nibble_ashr_14) /* cross page boundary */
2052 movdqa (%rsi, %rcx), %xmm1
2053 movdqa (%rdi, %rcx), %xmm2
2059 por %xmm3, %xmm2 /* merge into one 16byte value */
2061 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
2063 TOLOWER (%xmm1, %xmm2)
2065 pcmpeqb %xmm1, %xmm0
2066 pcmpeqb %xmm2, %xmm1
2068 pmovmskb %xmm1, %edx
2072 # if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
2074 jbe LABEL(strcmp_exitz)
2079 jmp LABEL(loop_ashr_14)
2082 LABEL(nibble_ashr_14):
2083 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2084 pmovmskb %xmm0, %edx
2086 jnz LABEL(ashr_14_exittail)
2088 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2090 jbe LABEL(ashr_14_exittail)
2095 jmp LABEL(gobble_ashr_14)
2098 LABEL(ashr_14_exittail):
2099 movdqa (%rsi, %rcx), %xmm1
2102 jmp LABEL(aftertail)
2105 * The following cases will be handled by ashr_15
2106 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2107 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
2112 movdqa (%rdi), %xmm2
2113 movdqa (%rsi), %xmm1
2114 pcmpeqb %xmm1, %xmm0
2116 TOLOWER (%xmm1, %xmm2)
2117 pcmpeqb %xmm1, %xmm2
2119 pmovmskb %xmm2, %r9d
2123 jnz LABEL(less32bytes)
2125 movdqa (%rdi), %xmm3
2127 UPDATE_STRNCMP_COUNTER
2130 mov $16, %rcx /* index for loads */
2131 mov $15, %r9d /* byte position left over from less32bytes case */
2133 * Setup %r10 value allows us to detect crossing a page boundary.
2134 * When %r10 goes positive we have crossed a page boundary and
2135 * need to do a nibble.
2138 and $0xfff, %r10 /* offset into 4K page */
2140 sub $0x1000, %r10 /* subtract 4K pagesize */
2143 LABEL(loop_ashr_15):
2145 jg LABEL(nibble_ashr_15)
2147 LABEL(gobble_ashr_15):
2148 movdqa (%rsi, %rcx), %xmm1
2149 movdqa (%rdi, %rcx), %xmm2
2155 por %xmm3, %xmm2 /* merge into one 16byte value */
2157 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2159 TOLOWER (%xmm1, %xmm2)
2161 pcmpeqb %xmm1, %xmm0
2162 pcmpeqb %xmm2, %xmm1
2164 pmovmskb %xmm1, %edx
2168 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2170 jbe LABEL(strcmp_exitz)
2177 jg LABEL(nibble_ashr_15) /* cross page boundary */
2179 movdqa (%rsi, %rcx), %xmm1
2180 movdqa (%rdi, %rcx), %xmm2
2186 por %xmm3, %xmm2 /* merge into one 16byte value */
2188 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2190 TOLOWER (%xmm1, %xmm2)
2192 pcmpeqb %xmm1, %xmm0
2193 pcmpeqb %xmm2, %xmm1
2195 pmovmskb %xmm1, %edx
2199 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2201 jbe LABEL(strcmp_exitz)
2206 jmp LABEL(loop_ashr_15)
2209 LABEL(nibble_ashr_15):
2210 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2211 pmovmskb %xmm0, %edx
2213 jnz LABEL(ashr_15_exittail)
2215 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2217 jbe LABEL(ashr_15_exittail)
2222 jmp LABEL(gobble_ashr_15)
2225 LABEL(ashr_15_exittail):
2226 movdqa (%rsi, %rcx), %xmm1
2232 TOLOWER (%xmm1, %xmm3)
2233 pcmpeqb %xmm3, %xmm1
2235 pmovmskb %xmm1, %edx
2240 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
2242 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
2243 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
2246 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
2251 bsf %rdx, %rdx /* find and store bit index in %rdx */
2253 # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2255 jbe LABEL(strcmp_exitz)
2257 movzbl (%rsi, %rdx), %ecx
2258 movzbl (%rdi, %rdx), %eax
2260 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2261 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2262 movl (%rdx,%rcx,4), %ecx
2263 movl (%rdx,%rax,4), %eax
2269 LABEL(strcmp_exitz):
2278 # if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2279 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2280 movl (%rdx,%rcx,4), %ecx
2281 movl (%rdx,%rax,4), %eax
2286 END (BP_SYM (STRCMP))
2288 .section .rodata,"a",@progbits
2290 LABEL(unaligned_table):
2291 .int LABEL(ashr_1) - LABEL(unaligned_table)
2292 .int LABEL(ashr_2) - LABEL(unaligned_table)
2293 .int LABEL(ashr_3) - LABEL(unaligned_table)
2294 .int LABEL(ashr_4) - LABEL(unaligned_table)
2295 .int LABEL(ashr_5) - LABEL(unaligned_table)
2296 .int LABEL(ashr_6) - LABEL(unaligned_table)
2297 .int LABEL(ashr_7) - LABEL(unaligned_table)
2298 .int LABEL(ashr_8) - LABEL(unaligned_table)
2299 .int LABEL(ashr_9) - LABEL(unaligned_table)
2300 .int LABEL(ashr_10) - LABEL(unaligned_table)
2301 .int LABEL(ashr_11) - LABEL(unaligned_table)
2302 .int LABEL(ashr_12) - LABEL(unaligned_table)
2303 .int LABEL(ashr_13) - LABEL(unaligned_table)
2304 .int LABEL(ashr_14) - LABEL(unaligned_table)
2305 .int LABEL(ashr_15) - LABEL(unaligned_table)
2306 .int LABEL(ashr_0) - LABEL(unaligned_table)
2307 #endif /* NOT_IN_libc */
2308 libc_hidden_builtin_def (STRCMP)