1 /* Highly optimized version for x86-64.
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Based on i686 version contributed by Ulrich Drepper
5 <drepper@cygnus.com>, 1999.
6 Updated with SSE2 support contributed by Intel Corporation.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, see
20 <http://www.gnu.org/licenses/>. */
23 #include "asm-syntax.h"
25 #undef UPDATE_STRNCMP_COUNTER
32 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
33 if the new counter > the old one or is 0. */
34 # define UPDATE_STRNCMP_COUNTER \
35 /* calculate left number to compare */ \
36 lea -16(%rcx, %r11), %r9; \
38 jb LABEL(strcmp_exitz); \
40 je LABEL(strcmp_exitz); \
43 #elif defined USE_AS_STRCASECMP_L
44 # include "locale-defines.h"
46 # define UPDATE_STRNCMP_COUNTER
47 #elif defined USE_AS_STRNCASECMP_L
48 # include "locale-defines.h"
50 # define UPDATE_STRNCMP_COUNTER \
51 /* calculate left number to compare */ \
52 lea -16(%rcx, %r11), %r9; \
54 jb LABEL(strcmp_exitz); \
56 je LABEL(strcmp_exitz); \
59 # define UPDATE_STRNCMP_COUNTER
61 # define STRCMP strcmp
68 .section .text.ssse3,"ax",@progbits
71 #ifdef USE_AS_STRCASECMP_L
73 # define ENTRY2(name) ENTRY (name)
74 # define END2(name) END (name)
78 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
79 mov %fs:(%rax),%RDX_LP
81 // XXX 5 byte should be before the function
83 .byte 0x0f,0x1f,0x44,0x00,0x00
85 # ifndef NO_NOLOCALE_ALIAS
86 weak_alias (__strcasecmp, strcasecmp)
87 libc_hidden_def (__strcasecmp)
89 /* FALLTHROUGH to strcasecmp_l. */
90 #elif defined USE_AS_STRNCASECMP_L
92 # define ENTRY2(name) ENTRY (name)
93 # define END2(name) END (name)
96 ENTRY2 (__strncasecmp)
97 movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
98 mov %fs:(%rax),%RCX_LP
100 // XXX 5 byte should be before the function
102 .byte 0x0f,0x1f,0x44,0x00,0x00
104 # ifndef NO_NOLOCALE_ALIAS
105 weak_alias (__strncasecmp, strncasecmp)
106 libc_hidden_def (__strncasecmp)
108 /* FALLTHROUGH to strncasecmp_l. */
112 #ifdef USE_AS_STRCASECMP_L
113 /* We have to fall back on the C implementation for locales
114 with encodings not matching ASCII for single bytes. */
115 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
116 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rdx), %RAX_LP
120 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
121 jne __strcasecmp_l_nonascii
122 #elif defined USE_AS_STRNCASECMP_L
123 /* We have to fall back on the C implementation for locales
124 with encodings not matching ASCII for single bytes. */
125 # if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
126 mov LOCALE_T___LOCALES+LC_CTYPE*LP_SIZE(%rcx), %RAX_LP
130 testl $1, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
131 jne __strncasecmp_l_nonascii
135 * This implementation uses SSE to compare up to 16 bytes at a time.
137 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
139 je LABEL(strcmp_exitz)
146 /* Use 64bit AND here to avoid long NOP padding. */
147 and $0x3f, %rcx /* rsi alignment in cache line */
148 and $0x3f, %rax /* rdi alignment in cache line */
149 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
150 .section .rodata.cst16,"aM",@progbits,16
153 .quad 0x4040404040404040
154 .quad 0x4040404040404040
156 .quad 0x5b5b5b5b5b5b5b5b
157 .quad 0x5b5b5b5b5b5b5b5b
159 .quad 0x2020202020202020
160 .quad 0x2020202020202020
162 movdqa .Lbelowupper(%rip), %xmm5
163 # define UCLOW_reg %xmm5
164 movdqa .Ltopupper(%rip), %xmm6
165 # define UCHIGH_reg %xmm6
166 movdqa .Ltouppermask(%rip), %xmm7
167 # define LCQWORD_reg %xmm7
170 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
172 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
175 movhpd 8(%rdi), %xmm1
176 movhpd 8(%rsi), %xmm2
177 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
178 # define TOLOWER(reg1, reg2) \
179 movdqa reg1, %xmm8; \
180 movdqa UCHIGH_reg, %xmm9; \
181 movdqa reg2, %xmm10; \
182 movdqa UCHIGH_reg, %xmm11; \
183 pcmpgtb UCLOW_reg, %xmm8; \
184 pcmpgtb reg1, %xmm9; \
185 pcmpgtb UCLOW_reg, %xmm10; \
186 pcmpgtb reg2, %xmm11; \
188 pand %xmm11, %xmm10; \
189 pand LCQWORD_reg, %xmm8; \
190 pand LCQWORD_reg, %xmm10; \
193 TOLOWER (%xmm1, %xmm2)
195 # define TOLOWER(reg1, reg2)
197 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
198 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
199 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
200 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
202 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
203 jnz LABEL(less16bytes) /* If not, find different value or null char */
204 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
206 jbe LABEL(strcmp_exitz) /* finish comparision */
208 add $16, %rsi /* prepare to search next 16 bytes */
209 add $16, %rdi /* prepare to search next 16 bytes */
212 * Determine source and destination string offsets from 16-byte alignment.
213 * Use relative offset difference between the two to determine which case
218 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
219 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
220 mov $0xffff, %edx /* for equivalent offset */
222 and $0xf, %ecx /* offset of rsi */
223 and $0xf, %eax /* offset of rdi */
225 je LABEL(ashr_0) /* rsi and rdi relative offset same */
227 mov %edx, %r8d /* r8d is offset flag for exit tail */
233 lea LABEL(unaligned_table)(%rip), %r10
234 movslq (%r10, %r9,4), %r9
235 lea (%r10, %r9), %r10
236 jmp *%r10 /* jump to corresponding case */
239 * The following cases will be handled by ashr_0
240 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
241 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
247 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
248 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
249 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
250 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
253 TOLOWER (%xmm1, %xmm2)
254 pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
256 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
258 shr %cl, %edx /* adjust 0xffff for offset */
259 shr %cl, %r9d /* adjust for 16-byte offset */
262 * edx must be the same with r9d if in left byte (16-rcx) is equal to
263 * the start from (16-rax) and no null char was seen.
265 jne LABEL(less32bytes) /* mismatch or null char */
266 UPDATE_STRNCMP_COUNTER
269 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
272 * Now both strings are aligned at 16-byte boundary. Loop over strings
273 * checking 32-bytes per iteration.
277 movdqa (%rsi, %rcx), %xmm1
278 movdqa (%rdi, %rcx), %xmm2
279 TOLOWER (%xmm1, %xmm2)
286 jnz LABEL(exit) /* mismatch or null char seen */
288 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
290 jbe LABEL(strcmp_exitz)
293 movdqa (%rsi, %rcx), %xmm1
294 movdqa (%rdi, %rcx), %xmm2
295 TOLOWER (%xmm1, %xmm2)
303 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
305 jbe LABEL(strcmp_exitz)
308 jmp LABEL(loop_ashr_0)
311 * The following cases will be handled by ashr_1
312 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
313 * n(15) n -15 0(15 +(n-15) - n) ashr_1
320 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
321 pslldq $15, %xmm2 /* shift first string to align with second */
322 TOLOWER (%xmm1, %xmm2)
323 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
324 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
326 shr %cl, %edx /* adjust 0xffff for offset */
327 shr %cl, %r9d /* adjust for 16-byte offset */
329 jnz LABEL(less32bytes) /* mismatch or null char seen */
331 UPDATE_STRNCMP_COUNTER
334 mov $16, %rcx /* index for loads*/
335 mov $1, %r9d /* byte position left over from less32bytes case */
337 * Setup %r10 value allows us to detect crossing a page boundary.
338 * When %r10 goes positive we have crossed a page boundary and
339 * need to do a nibble.
342 and $0xfff, %r10 /* offset into 4K page */
343 sub $0x1000, %r10 /* subtract 4K pagesize */
348 jg LABEL(nibble_ashr_1) /* cross page boundary */
350 LABEL(gobble_ashr_1):
351 movdqa (%rsi, %rcx), %xmm1
352 movdqa (%rdi, %rcx), %xmm2
353 movdqa %xmm2, %xmm4 /* store for next cycle */
358 por %xmm3, %xmm2 /* merge into one 16byte value */
360 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
362 TOLOWER (%xmm1, %xmm2)
371 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
373 jbe LABEL(strcmp_exitz)
379 jg LABEL(nibble_ashr_1) /* cross page boundary */
381 movdqa (%rsi, %rcx), %xmm1
382 movdqa (%rdi, %rcx), %xmm2
383 movdqa %xmm2, %xmm4 /* store for next cycle */
388 por %xmm3, %xmm2 /* merge into one 16byte value */
390 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
392 TOLOWER (%xmm1, %xmm2)
401 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
403 jbe LABEL(strcmp_exitz)
407 jmp LABEL(loop_ashr_1)
410 * Nibble avoids loads across page boundary. This is to avoid a potential
411 * access into unmapped memory.
414 LABEL(nibble_ashr_1):
415 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
418 jnz LABEL(ashr_1_exittail) /* find null char*/
420 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
422 jbe LABEL(ashr_1_exittail)
426 sub $0x1000, %r10 /* substract 4K from %r10 */
427 jmp LABEL(gobble_ashr_1)
430 * Once find null char, determine if there is a string mismatch
431 * before the null char.
434 LABEL(ashr_1_exittail):
435 movdqa (%rsi, %rcx), %xmm1
441 * The following cases will be handled by ashr_2
442 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
443 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
452 TOLOWER (%xmm1, %xmm2)
459 jnz LABEL(less32bytes)
461 UPDATE_STRNCMP_COUNTER
464 mov $16, %rcx /* index for loads */
465 mov $2, %r9d /* byte position left over from less32bytes case */
467 * Setup %r10 value allows us to detect crossing a page boundary.
468 * When %r10 goes positive we have crossed a page boundary and
469 * need to do a nibble.
472 and $0xfff, %r10 /* offset into 4K page */
473 sub $0x1000, %r10 /* subtract 4K pagesize */
478 jg LABEL(nibble_ashr_2)
480 LABEL(gobble_ashr_2):
481 movdqa (%rsi, %rcx), %xmm1
482 movdqa (%rdi, %rcx), %xmm2
488 por %xmm3, %xmm2 /* merge into one 16byte value */
490 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
492 TOLOWER (%xmm1, %xmm2)
501 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
503 jbe LABEL(strcmp_exitz)
510 jg LABEL(nibble_ashr_2) /* cross page boundary */
512 movdqa (%rsi, %rcx), %xmm1
513 movdqa (%rdi, %rcx), %xmm2
519 por %xmm3, %xmm2 /* merge into one 16byte value */
521 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
523 TOLOWER (%xmm1, %xmm2)
532 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
534 jbe LABEL(strcmp_exitz)
539 jmp LABEL(loop_ashr_2)
542 LABEL(nibble_ashr_2):
543 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
546 jnz LABEL(ashr_2_exittail)
548 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
550 jbe LABEL(ashr_2_exittail)
555 jmp LABEL(gobble_ashr_2)
558 LABEL(ashr_2_exittail):
559 movdqa (%rsi, %rcx), %xmm1
565 * The following cases will be handled by ashr_3
566 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
567 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
576 TOLOWER (%xmm1, %xmm2)
583 jnz LABEL(less32bytes)
586 UPDATE_STRNCMP_COUNTER
589 mov $16, %rcx /* index for loads */
590 mov $3, %r9d /* byte position left over from less32bytes case */
592 * Setup %r10 value allows us to detect crossing a page boundary.
593 * When %r10 goes positive we have crossed a page boundary and
594 * need to do a nibble.
597 and $0xfff, %r10 /* offset into 4K page */
598 sub $0x1000, %r10 /* subtract 4K pagesize */
603 jg LABEL(nibble_ashr_3)
605 LABEL(gobble_ashr_3):
606 movdqa (%rsi, %rcx), %xmm1
607 movdqa (%rdi, %rcx), %xmm2
613 por %xmm3, %xmm2 /* merge into one 16byte value */
615 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
617 TOLOWER (%xmm1, %xmm2)
626 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
628 jbe LABEL(strcmp_exitz)
635 jg LABEL(nibble_ashr_3) /* cross page boundary */
637 movdqa (%rsi, %rcx), %xmm1
638 movdqa (%rdi, %rcx), %xmm2
644 por %xmm3, %xmm2 /* merge into one 16byte value */
646 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
648 TOLOWER (%xmm1, %xmm2)
657 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
659 jbe LABEL(strcmp_exitz)
664 jmp LABEL(loop_ashr_3)
667 LABEL(nibble_ashr_3):
668 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
671 jnz LABEL(ashr_3_exittail)
673 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
675 jbe LABEL(ashr_3_exittail)
680 jmp LABEL(gobble_ashr_3)
683 LABEL(ashr_3_exittail):
684 movdqa (%rsi, %rcx), %xmm1
690 * The following cases will be handled by ashr_4
691 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
692 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
701 TOLOWER (%xmm1, %xmm2)
708 jnz LABEL(less32bytes)
711 UPDATE_STRNCMP_COUNTER
714 mov $16, %rcx /* index for loads */
715 mov $4, %r9d /* byte position left over from less32bytes case */
717 * Setup %r10 value allows us to detect crossing a page boundary.
718 * When %r10 goes positive we have crossed a page boundary and
719 * need to do a nibble.
722 and $0xfff, %r10 /* offset into 4K page */
723 sub $0x1000, %r10 /* subtract 4K pagesize */
728 jg LABEL(nibble_ashr_4)
730 LABEL(gobble_ashr_4):
731 movdqa (%rsi, %rcx), %xmm1
732 movdqa (%rdi, %rcx), %xmm2
738 por %xmm3, %xmm2 /* merge into one 16byte value */
740 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
742 TOLOWER (%xmm1, %xmm2)
751 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
753 jbe LABEL(strcmp_exitz)
760 jg LABEL(nibble_ashr_4) /* cross page boundary */
762 movdqa (%rsi, %rcx), %xmm1
763 movdqa (%rdi, %rcx), %xmm2
769 por %xmm3, %xmm2 /* merge into one 16byte value */
771 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
773 TOLOWER (%xmm1, %xmm2)
782 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
784 jbe LABEL(strcmp_exitz)
789 jmp LABEL(loop_ashr_4)
792 LABEL(nibble_ashr_4):
793 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
796 jnz LABEL(ashr_4_exittail)
798 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
800 jbe LABEL(ashr_4_exittail)
805 jmp LABEL(gobble_ashr_4)
808 LABEL(ashr_4_exittail):
809 movdqa (%rsi, %rcx), %xmm1
815 * The following cases will be handled by ashr_5
816 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
817 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
826 TOLOWER (%xmm1, %xmm2)
833 jnz LABEL(less32bytes)
836 UPDATE_STRNCMP_COUNTER
839 mov $16, %rcx /* index for loads */
840 mov $5, %r9d /* byte position left over from less32bytes case */
842 * Setup %r10 value allows us to detect crossing a page boundary.
843 * When %r10 goes positive we have crossed a page boundary and
844 * need to do a nibble.
847 and $0xfff, %r10 /* offset into 4K page */
848 sub $0x1000, %r10 /* subtract 4K pagesize */
853 jg LABEL(nibble_ashr_5)
855 LABEL(gobble_ashr_5):
856 movdqa (%rsi, %rcx), %xmm1
857 movdqa (%rdi, %rcx), %xmm2
863 por %xmm3, %xmm2 /* merge into one 16byte value */
865 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
867 TOLOWER (%xmm1, %xmm2)
876 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
878 jbe LABEL(strcmp_exitz)
885 jg LABEL(nibble_ashr_5) /* cross page boundary */
887 movdqa (%rsi, %rcx), %xmm1
888 movdqa (%rdi, %rcx), %xmm2
894 por %xmm3, %xmm2 /* merge into one 16byte value */
896 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
898 TOLOWER (%xmm1, %xmm2)
907 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
909 jbe LABEL(strcmp_exitz)
914 jmp LABEL(loop_ashr_5)
917 LABEL(nibble_ashr_5):
918 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
921 jnz LABEL(ashr_5_exittail)
923 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
925 jbe LABEL(ashr_5_exittail)
930 jmp LABEL(gobble_ashr_5)
933 LABEL(ashr_5_exittail):
934 movdqa (%rsi, %rcx), %xmm1
940 * The following cases will be handled by ashr_6
941 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
942 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
951 TOLOWER (%xmm1, %xmm2)
958 jnz LABEL(less32bytes)
961 UPDATE_STRNCMP_COUNTER
964 mov $16, %rcx /* index for loads */
965 mov $6, %r9d /* byte position left over from less32bytes case */
967 * Setup %r10 value allows us to detect crossing a page boundary.
968 * When %r10 goes positive we have crossed a page boundary and
969 * need to do a nibble.
972 and $0xfff, %r10 /* offset into 4K page */
973 sub $0x1000, %r10 /* subtract 4K pagesize */
978 jg LABEL(nibble_ashr_6)
980 LABEL(gobble_ashr_6):
981 movdqa (%rsi, %rcx), %xmm1
982 movdqa (%rdi, %rcx), %xmm2
988 por %xmm3, %xmm2 /* merge into one 16byte value */
990 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
992 TOLOWER (%xmm1, %xmm2)
1001 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1003 jbe LABEL(strcmp_exitz)
1010 jg LABEL(nibble_ashr_6) /* cross page boundary */
1012 movdqa (%rsi, %rcx), %xmm1
1013 movdqa (%rdi, %rcx), %xmm2
1019 por %xmm3, %xmm2 /* merge into one 16byte value */
1021 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
1023 TOLOWER (%xmm1, %xmm2)
1025 pcmpeqb %xmm1, %xmm0
1026 pcmpeqb %xmm2, %xmm1
1028 pmovmskb %xmm1, %edx
1032 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1034 jbe LABEL(strcmp_exitz)
1039 jmp LABEL(loop_ashr_6)
1042 LABEL(nibble_ashr_6):
1043 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1044 pmovmskb %xmm0, %edx
1046 jnz LABEL(ashr_6_exittail)
1048 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1050 jbe LABEL(ashr_6_exittail)
1055 jmp LABEL(gobble_ashr_6)
1058 LABEL(ashr_6_exittail):
1059 movdqa (%rsi, %rcx), %xmm1
1062 jmp LABEL(aftertail)
1065 * The following cases will be handled by ashr_7
1066 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1067 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
1072 movdqa (%rdi), %xmm2
1073 movdqa (%rsi), %xmm1
1074 pcmpeqb %xmm1, %xmm0
1076 TOLOWER (%xmm1, %xmm2)
1077 pcmpeqb %xmm1, %xmm2
1079 pmovmskb %xmm2, %r9d
1083 jnz LABEL(less32bytes)
1084 movdqa (%rdi), %xmm3
1086 UPDATE_STRNCMP_COUNTER
1089 mov $16, %rcx /* index for loads */
1090 mov $7, %r9d /* byte position left over from less32bytes case */
1092 * Setup %r10 value allows us to detect crossing a page boundary.
1093 * When %r10 goes positive we have crossed a page boundary and
1094 * need to do a nibble.
1097 and $0xfff, %r10 /* offset into 4K page */
1098 sub $0x1000, %r10 /* subtract 4K pagesize */
1103 jg LABEL(nibble_ashr_7)
1105 LABEL(gobble_ashr_7):
1106 movdqa (%rsi, %rcx), %xmm1
1107 movdqa (%rdi, %rcx), %xmm2
1113 por %xmm3, %xmm2 /* merge into one 16byte value */
1115 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1117 TOLOWER (%xmm1, %xmm2)
1119 pcmpeqb %xmm1, %xmm0
1120 pcmpeqb %xmm2, %xmm1
1122 pmovmskb %xmm1, %edx
1126 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1128 jbe LABEL(strcmp_exitz)
1135 jg LABEL(nibble_ashr_7) /* cross page boundary */
1137 movdqa (%rsi, %rcx), %xmm1
1138 movdqa (%rdi, %rcx), %xmm2
1144 por %xmm3, %xmm2 /* merge into one 16byte value */
1146 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1148 TOLOWER (%xmm1, %xmm2)
1150 pcmpeqb %xmm1, %xmm0
1151 pcmpeqb %xmm2, %xmm1
1153 pmovmskb %xmm1, %edx
1157 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1159 jbe LABEL(strcmp_exitz)
1164 jmp LABEL(loop_ashr_7)
1167 LABEL(nibble_ashr_7):
1168 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1169 pmovmskb %xmm0, %edx
1171 jnz LABEL(ashr_7_exittail)
1173 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1175 jbe LABEL(ashr_7_exittail)
1180 jmp LABEL(gobble_ashr_7)
1183 LABEL(ashr_7_exittail):
1184 movdqa (%rsi, %rcx), %xmm1
1187 jmp LABEL(aftertail)
1190 * The following cases will be handled by ashr_8
1191 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1192 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
1197 movdqa (%rdi), %xmm2
1198 movdqa (%rsi), %xmm1
1199 pcmpeqb %xmm1, %xmm0
1201 TOLOWER (%xmm1, %xmm2)
1202 pcmpeqb %xmm1, %xmm2
1204 pmovmskb %xmm2, %r9d
1208 jnz LABEL(less32bytes)
1209 movdqa (%rdi), %xmm3
1211 UPDATE_STRNCMP_COUNTER
1214 mov $16, %rcx /* index for loads */
1215 mov $8, %r9d /* byte position left over from less32bytes case */
1217 * Setup %r10 value allows us to detect crossing a page boundary.
1218 * When %r10 goes positive we have crossed a page boundary and
1219 * need to do a nibble.
1222 and $0xfff, %r10 /* offset into 4K page */
1223 sub $0x1000, %r10 /* subtract 4K pagesize */
1228 jg LABEL(nibble_ashr_8)
1230 LABEL(gobble_ashr_8):
1231 movdqa (%rsi, %rcx), %xmm1
1232 movdqa (%rdi, %rcx), %xmm2
1238 por %xmm3, %xmm2 /* merge into one 16byte value */
1240 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1242 TOLOWER (%xmm1, %xmm2)
1244 pcmpeqb %xmm1, %xmm0
1245 pcmpeqb %xmm2, %xmm1
1247 pmovmskb %xmm1, %edx
1251 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1253 jbe LABEL(strcmp_exitz)
1260 jg LABEL(nibble_ashr_8) /* cross page boundary */
1262 movdqa (%rsi, %rcx), %xmm1
1263 movdqa (%rdi, %rcx), %xmm2
1269 por %xmm3, %xmm2 /* merge into one 16byte value */
1271 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1273 TOLOWER (%xmm1, %xmm2)
1275 pcmpeqb %xmm1, %xmm0
1276 pcmpeqb %xmm2, %xmm1
1278 pmovmskb %xmm1, %edx
1282 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1284 jbe LABEL(strcmp_exitz)
1289 jmp LABEL(loop_ashr_8)
1292 LABEL(nibble_ashr_8):
1293 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1294 pmovmskb %xmm0, %edx
1296 jnz LABEL(ashr_8_exittail)
1298 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1300 jbe LABEL(ashr_8_exittail)
1305 jmp LABEL(gobble_ashr_8)
1308 LABEL(ashr_8_exittail):
1309 movdqa (%rsi, %rcx), %xmm1
1312 jmp LABEL(aftertail)
1315 * The following cases will be handled by ashr_9
1316 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1317 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
1322 movdqa (%rdi), %xmm2
1323 movdqa (%rsi), %xmm1
1324 pcmpeqb %xmm1, %xmm0
1326 TOLOWER (%xmm1, %xmm2)
1327 pcmpeqb %xmm1, %xmm2
1329 pmovmskb %xmm2, %r9d
1333 jnz LABEL(less32bytes)
1334 movdqa (%rdi), %xmm3
1336 UPDATE_STRNCMP_COUNTER
1339 mov $16, %rcx /* index for loads */
1340 mov $9, %r9d /* byte position left over from less32bytes case */
1342 * Setup %r10 value allows us to detect crossing a page boundary.
1343 * When %r10 goes positive we have crossed a page boundary and
1344 * need to do a nibble.
1347 and $0xfff, %r10 /* offset into 4K page */
1348 sub $0x1000, %r10 /* subtract 4K pagesize */
1353 jg LABEL(nibble_ashr_9)
1355 LABEL(gobble_ashr_9):
1356 movdqa (%rsi, %rcx), %xmm1
1357 movdqa (%rdi, %rcx), %xmm2
1363 por %xmm3, %xmm2 /* merge into one 16byte value */
1365 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1367 TOLOWER (%xmm1, %xmm2)
1369 pcmpeqb %xmm1, %xmm0
1370 pcmpeqb %xmm2, %xmm1
1372 pmovmskb %xmm1, %edx
1376 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1378 jbe LABEL(strcmp_exitz)
1385 jg LABEL(nibble_ashr_9) /* cross page boundary */
1387 movdqa (%rsi, %rcx), %xmm1
1388 movdqa (%rdi, %rcx), %xmm2
1394 por %xmm3, %xmm2 /* merge into one 16byte value */
1396 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1398 TOLOWER (%xmm1, %xmm2)
1400 pcmpeqb %xmm1, %xmm0
1401 pcmpeqb %xmm2, %xmm1
1403 pmovmskb %xmm1, %edx
1407 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1409 jbe LABEL(strcmp_exitz)
1413 movdqa %xmm4, %xmm3 /* store for next cycle */
1414 jmp LABEL(loop_ashr_9)
1417 LABEL(nibble_ashr_9):
1418 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1419 pmovmskb %xmm0, %edx
1421 jnz LABEL(ashr_9_exittail)
1423 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1425 jbe LABEL(ashr_9_exittail)
1430 jmp LABEL(gobble_ashr_9)
1433 LABEL(ashr_9_exittail):
1434 movdqa (%rsi, %rcx), %xmm1
1437 jmp LABEL(aftertail)
1440 * The following cases will be handled by ashr_10
1441 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1442 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
1447 movdqa (%rdi), %xmm2
1448 movdqa (%rsi), %xmm1
1449 pcmpeqb %xmm1, %xmm0
1451 TOLOWER (%xmm1, %xmm2)
1452 pcmpeqb %xmm1, %xmm2
1454 pmovmskb %xmm2, %r9d
1458 jnz LABEL(less32bytes)
1459 movdqa (%rdi), %xmm3
1461 UPDATE_STRNCMP_COUNTER
1464 mov $16, %rcx /* index for loads */
1465 mov $10, %r9d /* byte position left over from less32bytes case */
1467 * Setup %r10 value allows us to detect crossing a page boundary.
1468 * When %r10 goes positive we have crossed a page boundary and
1469 * need to do a nibble.
1472 and $0xfff, %r10 /* offset into 4K page */
1473 sub $0x1000, %r10 /* subtract 4K pagesize */
1476 LABEL(loop_ashr_10):
1478 jg LABEL(nibble_ashr_10)
1480 LABEL(gobble_ashr_10):
1481 movdqa (%rsi, %rcx), %xmm1
1482 movdqa (%rdi, %rcx), %xmm2
1488 por %xmm3, %xmm2 /* merge into one 16byte value */
1490 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1492 TOLOWER (%xmm1, %xmm2)
1494 pcmpeqb %xmm1, %xmm0
1495 pcmpeqb %xmm2, %xmm1
1497 pmovmskb %xmm1, %edx
1501 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1503 jbe LABEL(strcmp_exitz)
1510 jg LABEL(nibble_ashr_10) /* cross page boundary */
1512 movdqa (%rsi, %rcx), %xmm1
1513 movdqa (%rdi, %rcx), %xmm2
1519 por %xmm3, %xmm2 /* merge into one 16byte value */
1521 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1523 TOLOWER (%xmm1, %xmm2)
1525 pcmpeqb %xmm1, %xmm0
1526 pcmpeqb %xmm2, %xmm1
1528 pmovmskb %xmm1, %edx
1532 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1534 jbe LABEL(strcmp_exitz)
1539 jmp LABEL(loop_ashr_10)
1542 LABEL(nibble_ashr_10):
1543 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1544 pmovmskb %xmm0, %edx
1546 jnz LABEL(ashr_10_exittail)
1548 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1550 jbe LABEL(ashr_10_exittail)
1555 jmp LABEL(gobble_ashr_10)
1558 LABEL(ashr_10_exittail):
1559 movdqa (%rsi, %rcx), %xmm1
1562 jmp LABEL(aftertail)
1565 * The following cases will be handled by ashr_11
1566 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1567 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
1572 movdqa (%rdi), %xmm2
1573 movdqa (%rsi), %xmm1
1574 pcmpeqb %xmm1, %xmm0
1576 TOLOWER (%xmm1, %xmm2)
1577 pcmpeqb %xmm1, %xmm2
1579 pmovmskb %xmm2, %r9d
1583 jnz LABEL(less32bytes)
1584 movdqa (%rdi), %xmm3
1586 UPDATE_STRNCMP_COUNTER
1589 mov $16, %rcx /* index for loads */
1590 mov $11, %r9d /* byte position left over from less32bytes case */
1592 * Setup %r10 value allows us to detect crossing a page boundary.
1593 * When %r10 goes positive we have crossed a page boundary and
1594 * need to do a nibble.
1597 and $0xfff, %r10 /* offset into 4K page */
1598 sub $0x1000, %r10 /* subtract 4K pagesize */
1601 LABEL(loop_ashr_11):
1603 jg LABEL(nibble_ashr_11)
1605 LABEL(gobble_ashr_11):
1606 movdqa (%rsi, %rcx), %xmm1
1607 movdqa (%rdi, %rcx), %xmm2
1613 por %xmm3, %xmm2 /* merge into one 16byte value */
1615 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1617 TOLOWER (%xmm1, %xmm2)
1619 pcmpeqb %xmm1, %xmm0
1620 pcmpeqb %xmm2, %xmm1
1622 pmovmskb %xmm1, %edx
1626 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1628 jbe LABEL(strcmp_exitz)
1635 jg LABEL(nibble_ashr_11) /* cross page boundary */
1637 movdqa (%rsi, %rcx), %xmm1
1638 movdqa (%rdi, %rcx), %xmm2
1644 por %xmm3, %xmm2 /* merge into one 16byte value */
1646 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1648 TOLOWER (%xmm1, %xmm2)
1650 pcmpeqb %xmm1, %xmm0
1651 pcmpeqb %xmm2, %xmm1
1653 pmovmskb %xmm1, %edx
1657 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1659 jbe LABEL(strcmp_exitz)
1664 jmp LABEL(loop_ashr_11)
1667 LABEL(nibble_ashr_11):
1668 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1669 pmovmskb %xmm0, %edx
1671 jnz LABEL(ashr_11_exittail)
1673 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1675 jbe LABEL(ashr_11_exittail)
1680 jmp LABEL(gobble_ashr_11)
1683 LABEL(ashr_11_exittail):
1684 movdqa (%rsi, %rcx), %xmm1
1687 jmp LABEL(aftertail)
1690 * The following cases will be handled by ashr_12
1691 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1692 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
1697 movdqa (%rdi), %xmm2
1698 movdqa (%rsi), %xmm1
1699 pcmpeqb %xmm1, %xmm0
1701 TOLOWER (%xmm1, %xmm2)
1702 pcmpeqb %xmm1, %xmm2
1704 pmovmskb %xmm2, %r9d
1708 jnz LABEL(less32bytes)
1709 movdqa (%rdi), %xmm3
1711 UPDATE_STRNCMP_COUNTER
1714 mov $16, %rcx /* index for loads */
1715 mov $12, %r9d /* byte position left over from less32bytes case */
1717 * Setup %r10 value allows us to detect crossing a page boundary.
1718 * When %r10 goes positive we have crossed a page boundary and
1719 * need to do a nibble.
1722 and $0xfff, %r10 /* offset into 4K page */
1723 sub $0x1000, %r10 /* subtract 4K pagesize */
1726 LABEL(loop_ashr_12):
1728 jg LABEL(nibble_ashr_12)
1730 LABEL(gobble_ashr_12):
1731 movdqa (%rsi, %rcx), %xmm1
1732 movdqa (%rdi, %rcx), %xmm2
1738 por %xmm3, %xmm2 /* merge into one 16byte value */
1740 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1742 TOLOWER (%xmm1, %xmm2)
1744 pcmpeqb %xmm1, %xmm0
1745 pcmpeqb %xmm2, %xmm1
1747 pmovmskb %xmm1, %edx
1751 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1753 jbe LABEL(strcmp_exitz)
1760 jg LABEL(nibble_ashr_12) /* cross page boundary */
1762 movdqa (%rsi, %rcx), %xmm1
1763 movdqa (%rdi, %rcx), %xmm2
1769 por %xmm3, %xmm2 /* merge into one 16byte value */
1771 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1773 TOLOWER (%xmm1, %xmm2)
1775 pcmpeqb %xmm1, %xmm0
1776 pcmpeqb %xmm2, %xmm1
1778 pmovmskb %xmm1, %edx
1782 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1784 jbe LABEL(strcmp_exitz)
1789 jmp LABEL(loop_ashr_12)
1792 LABEL(nibble_ashr_12):
1793 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1794 pmovmskb %xmm0, %edx
1796 jnz LABEL(ashr_12_exittail)
1798 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1800 jbe LABEL(ashr_12_exittail)
1805 jmp LABEL(gobble_ashr_12)
1808 LABEL(ashr_12_exittail):
1809 movdqa (%rsi, %rcx), %xmm1
1812 jmp LABEL(aftertail)
1815 * The following cases will be handled by ashr_13
1816 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1817 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
1822 movdqa (%rdi), %xmm2
1823 movdqa (%rsi), %xmm1
1824 pcmpeqb %xmm1, %xmm0
1826 TOLOWER (%xmm1, %xmm2)
1827 pcmpeqb %xmm1, %xmm2
1829 pmovmskb %xmm2, %r9d
1833 jnz LABEL(less32bytes)
1834 movdqa (%rdi), %xmm3
1836 UPDATE_STRNCMP_COUNTER
1839 mov $16, %rcx /* index for loads */
1840 mov $13, %r9d /* byte position left over from less32bytes case */
1842 * Setup %r10 value allows us to detect crossing a page boundary.
1843 * When %r10 goes positive we have crossed a page boundary and
1844 * need to do a nibble.
1847 and $0xfff, %r10 /* offset into 4K page */
1848 sub $0x1000, %r10 /* subtract 4K pagesize */
1851 LABEL(loop_ashr_13):
1853 jg LABEL(nibble_ashr_13)
1855 LABEL(gobble_ashr_13):
1856 movdqa (%rsi, %rcx), %xmm1
1857 movdqa (%rdi, %rcx), %xmm2
1863 por %xmm3, %xmm2 /* merge into one 16byte value */
1865 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1867 TOLOWER (%xmm1, %xmm2)
1869 pcmpeqb %xmm1, %xmm0
1870 pcmpeqb %xmm2, %xmm1
1872 pmovmskb %xmm1, %edx
1876 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1878 jbe LABEL(strcmp_exitz)
1885 jg LABEL(nibble_ashr_13) /* cross page boundary */
1887 movdqa (%rsi, %rcx), %xmm1
1888 movdqa (%rdi, %rcx), %xmm2
1894 por %xmm3, %xmm2 /* merge into one 16byte value */
1896 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1898 TOLOWER (%xmm1, %xmm2)
1900 pcmpeqb %xmm1, %xmm0
1901 pcmpeqb %xmm2, %xmm1
1903 pmovmskb %xmm1, %edx
1907 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1909 jbe LABEL(strcmp_exitz)
1914 jmp LABEL(loop_ashr_13)
1917 LABEL(nibble_ashr_13):
1918 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1919 pmovmskb %xmm0, %edx
1921 jnz LABEL(ashr_13_exittail)
1923 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1925 jbe LABEL(ashr_13_exittail)
1930 jmp LABEL(gobble_ashr_13)
1933 LABEL(ashr_13_exittail):
1934 movdqa (%rsi, %rcx), %xmm1
1937 jmp LABEL(aftertail)
1940 * The following cases will be handled by ashr_14
1941 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1942 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
1947 movdqa (%rdi), %xmm2
1948 movdqa (%rsi), %xmm1
1949 pcmpeqb %xmm1, %xmm0
1951 TOLOWER (%xmm1, %xmm2)
1952 pcmpeqb %xmm1, %xmm2
1954 pmovmskb %xmm2, %r9d
1958 jnz LABEL(less32bytes)
1959 movdqa (%rdi), %xmm3
1961 UPDATE_STRNCMP_COUNTER
1964 mov $16, %rcx /* index for loads */
1965 mov $14, %r9d /* byte position left over from less32bytes case */
1967 * Setup %r10 value allows us to detect crossing a page boundary.
1968 * When %r10 goes positive we have crossed a page boundary and
1969 * need to do a nibble.
1972 and $0xfff, %r10 /* offset into 4K page */
1973 sub $0x1000, %r10 /* subtract 4K pagesize */
1976 LABEL(loop_ashr_14):
1978 jg LABEL(nibble_ashr_14)
1980 LABEL(gobble_ashr_14):
1981 movdqa (%rsi, %rcx), %xmm1
1982 movdqa (%rdi, %rcx), %xmm2
1988 por %xmm3, %xmm2 /* merge into one 16byte value */
1990 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
1992 TOLOWER (%xmm1, %xmm2)
1994 pcmpeqb %xmm1, %xmm0
1995 pcmpeqb %xmm2, %xmm1
1997 pmovmskb %xmm1, %edx
2001 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2003 jbe LABEL(strcmp_exitz)
2010 jg LABEL(nibble_ashr_14) /* cross page boundary */
2012 movdqa (%rsi, %rcx), %xmm1
2013 movdqa (%rdi, %rcx), %xmm2
2019 por %xmm3, %xmm2 /* merge into one 16byte value */
2021 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
2023 TOLOWER (%xmm1, %xmm2)
2025 pcmpeqb %xmm1, %xmm0
2026 pcmpeqb %xmm2, %xmm1
2028 pmovmskb %xmm1, %edx
2032 #if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
2034 jbe LABEL(strcmp_exitz)
2039 jmp LABEL(loop_ashr_14)
2042 LABEL(nibble_ashr_14):
2043 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2044 pmovmskb %xmm0, %edx
2046 jnz LABEL(ashr_14_exittail)
2048 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2050 jbe LABEL(ashr_14_exittail)
2055 jmp LABEL(gobble_ashr_14)
2058 LABEL(ashr_14_exittail):
2059 movdqa (%rsi, %rcx), %xmm1
2062 jmp LABEL(aftertail)
2065 * The following cases will be handled by ashr_15
2066 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2067 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
2072 movdqa (%rdi), %xmm2
2073 movdqa (%rsi), %xmm1
2074 pcmpeqb %xmm1, %xmm0
2076 TOLOWER (%xmm1, %xmm2)
2077 pcmpeqb %xmm1, %xmm2
2079 pmovmskb %xmm2, %r9d
2083 jnz LABEL(less32bytes)
2085 movdqa (%rdi), %xmm3
2087 UPDATE_STRNCMP_COUNTER
2090 mov $16, %rcx /* index for loads */
2091 mov $15, %r9d /* byte position left over from less32bytes case */
2093 * Setup %r10 value allows us to detect crossing a page boundary.
2094 * When %r10 goes positive we have crossed a page boundary and
2095 * need to do a nibble.
2098 and $0xfff, %r10 /* offset into 4K page */
2100 sub $0x1000, %r10 /* subtract 4K pagesize */
2103 LABEL(loop_ashr_15):
2105 jg LABEL(nibble_ashr_15)
2107 LABEL(gobble_ashr_15):
2108 movdqa (%rsi, %rcx), %xmm1
2109 movdqa (%rdi, %rcx), %xmm2
2115 por %xmm3, %xmm2 /* merge into one 16byte value */
2117 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2119 TOLOWER (%xmm1, %xmm2)
2121 pcmpeqb %xmm1, %xmm0
2122 pcmpeqb %xmm2, %xmm1
2124 pmovmskb %xmm1, %edx
2128 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2130 jbe LABEL(strcmp_exitz)
2137 jg LABEL(nibble_ashr_15) /* cross page boundary */
2139 movdqa (%rsi, %rcx), %xmm1
2140 movdqa (%rdi, %rcx), %xmm2
2146 por %xmm3, %xmm2 /* merge into one 16byte value */
2148 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2150 TOLOWER (%xmm1, %xmm2)
2152 pcmpeqb %xmm1, %xmm0
2153 pcmpeqb %xmm2, %xmm1
2155 pmovmskb %xmm1, %edx
2159 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2161 jbe LABEL(strcmp_exitz)
2166 jmp LABEL(loop_ashr_15)
2169 LABEL(nibble_ashr_15):
2170 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2171 pmovmskb %xmm0, %edx
2173 jnz LABEL(ashr_15_exittail)
2175 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2177 jbe LABEL(ashr_15_exittail)
2182 jmp LABEL(gobble_ashr_15)
2185 LABEL(ashr_15_exittail):
2186 movdqa (%rsi, %rcx), %xmm1
2192 TOLOWER (%xmm1, %xmm3)
2193 pcmpeqb %xmm3, %xmm1
2195 pmovmskb %xmm1, %edx
2200 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
2202 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
2203 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
2206 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
2211 bsf %rdx, %rdx /* find and store bit index in %rdx */
2213 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2215 jbe LABEL(strcmp_exitz)
2217 movzbl (%rsi, %rdx), %ecx
2218 movzbl (%rdi, %rdx), %eax
2220 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2221 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2222 movl (%rdx,%rcx,4), %ecx
2223 movl (%rdx,%rax,4), %eax
2229 LABEL(strcmp_exitz):
2238 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2239 leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
2240 movl (%rdx,%rcx,4), %ecx
2241 movl (%rdx,%rax,4), %eax
2248 .section .rodata,"a",@progbits
2250 LABEL(unaligned_table):
2251 .int LABEL(ashr_1) - LABEL(unaligned_table)
2252 .int LABEL(ashr_2) - LABEL(unaligned_table)
2253 .int LABEL(ashr_3) - LABEL(unaligned_table)
2254 .int LABEL(ashr_4) - LABEL(unaligned_table)
2255 .int LABEL(ashr_5) - LABEL(unaligned_table)
2256 .int LABEL(ashr_6) - LABEL(unaligned_table)
2257 .int LABEL(ashr_7) - LABEL(unaligned_table)
2258 .int LABEL(ashr_8) - LABEL(unaligned_table)
2259 .int LABEL(ashr_9) - LABEL(unaligned_table)
2260 .int LABEL(ashr_10) - LABEL(unaligned_table)
2261 .int LABEL(ashr_11) - LABEL(unaligned_table)
2262 .int LABEL(ashr_12) - LABEL(unaligned_table)
2263 .int LABEL(ashr_13) - LABEL(unaligned_table)
2264 .int LABEL(ashr_14) - LABEL(unaligned_table)
2265 .int LABEL(ashr_15) - LABEL(unaligned_table)
2266 .int LABEL(ashr_0) - LABEL(unaligned_table)
2267 libc_hidden_builtin_def (STRCMP)