1 /* Highly optimized version for x86-64.
2 Copyright (C) 1999, 2000, 2002, 2003, 2005, 2009
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Based on i686 version contributed by Ulrich Drepper
6 <drepper@cygnus.com>, 1999.
7 Updated with SSE2 support contributed by Intel Corporation.
9 The GNU C Library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
14 The GNU C Library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
19 You should have received a copy of the GNU Lesser General Public
20 License along with the GNU C Library; if not, write to the Free
21 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 #include "asm-syntax.h"
29 #undef UPDATE_STRNCMP_COUNTER
36 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
37 if the new counter > the old one or is 0. */
38 # define UPDATE_STRNCMP_COUNTER \
39 /* calculate left number to compare */ \
40 lea -16(%rcx, %r11), %r9; \
42 jb LABEL(strcmp_exitz); \
44 je LABEL(strcmp_exitz); \
48 # define UPDATE_STRNCMP_COUNTER
50 # define STRCMP strcmp
57 .section .text.ssse3,"ax",@progbits
60 ENTRY (BP_SYM (STRCMP))
62 /* Simple version since we can't use SSE registers in ld.so. */
63 L(oop): movb (%rdi), %al
79 #else /* NOT_IN_libc */
81 * This implementation uses SSE to compare up to 16 bytes at a time.
85 je LABEL(strcmp_exitz)
92 /* Use 64bit AND here to avoid long NOP padding. */
93 and $0x3f, %rcx /* rsi alignment in cache line */
94 and $0x3f, %rax /* rdi alignment in cache line */
96 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
98 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
101 movhpd 8(%rdi), %xmm1
102 movhpd 8(%rsi), %xmm2
103 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
104 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
105 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
106 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
108 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
109 jnz LABEL(less16bytes) /* If not, find different value or null char */
110 #ifdef USE_AS_STRNCMP
112 jbe LABEL(strcmp_exitz) /* finish comparision */
114 add $16, %rsi /* prepare to search next 16 bytes */
115 add $16, %rdi /* prepare to search next 16 bytes */
118 * Determine source and destination string offsets from 16-byte alignment.
119 * Use relative offset difference between the two to determine which case
124 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
125 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
126 mov $0xffff, %edx /* for equivalent offset */
128 and $0xf, %ecx /* offset of rsi */
129 and $0xf, %eax /* offset of rdi */
131 je LABEL(ashr_0) /* rsi and rdi relative offset same */
133 mov %edx, %r8d /* r8d is offset flag for exit tail */
139 lea LABEL(unaligned_table)(%rip), %r10
140 movslq (%r10, %r9,4), %r9
141 lea (%r10, %r9), %r10
142 jmp *%r10 /* jump to corresponding case */
145 * The following cases will be handled by ashr_0
146 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
147 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
153 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
154 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
155 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
156 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
158 shr %cl, %edx /* adjust 0xffff for offset */
159 shr %cl, %r9d /* adjust for 16-byte offset */
162 * edx must be the same with r9d if in left byte (16-rcx) is equal to
163 * the start from (16-rax) and no null char was seen.
165 jne LABEL(less32bytes) /* mismatch or null char */
166 UPDATE_STRNCMP_COUNTER
169 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
172 * Now both strings are aligned at 16-byte boundary. Loop over strings
173 * checking 32-bytes per iteration.
177 movdqa (%rsi, %rcx), %xmm1
178 movdqa (%rdi, %rcx), %xmm2
185 jnz LABEL(exit) /* mismatch or null char seen */
187 #ifdef USE_AS_STRNCMP
189 jbe LABEL(strcmp_exitz)
192 movdqa (%rsi, %rcx), %xmm1
193 movdqa (%rdi, %rcx), %xmm2
201 #ifdef USE_AS_STRNCMP
203 jbe LABEL(strcmp_exitz)
206 jmp LABEL(loop_ashr_0)
209 * The following cases will be handled by ashr_1
210 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
211 * n(15) n -15 0(15 +(n-15) - n) ashr_1
218 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
219 pslldq $15, %xmm2 /* shift first string to align with second */
220 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
221 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
223 shr %cl, %edx /* adjust 0xffff for offset */
224 shr %cl, %r9d /* adjust for 16-byte offset */
226 jnz LABEL(less32bytes) /* mismatch or null char seen */
228 UPDATE_STRNCMP_COUNTER
231 mov $16, %rcx /* index for loads*/
232 mov $1, %r9d /* byte position left over from less32bytes case */
234 * Setup %r10 value allows us to detect crossing a page boundary.
235 * When %r10 goes positive we have crossed a page boundary and
236 * need to do a nibble.
239 and $0xfff, %r10 /* offset into 4K page */
240 sub $0x1000, %r10 /* subtract 4K pagesize */
245 jg LABEL(nibble_ashr_1) /* cross page boundary */
247 LABEL(gobble_ashr_1):
248 movdqa (%rsi, %rcx), %xmm1
249 movdqa (%rdi, %rcx), %xmm2
250 movdqa %xmm2, %xmm4 /* store for next cycle */
255 por %xmm3, %xmm2 /* merge into one 16byte value */
257 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
267 #ifdef USE_AS_STRNCMP
269 jbe LABEL(strcmp_exitz)
275 jg LABEL(nibble_ashr_1) /* cross page boundary */
277 movdqa (%rsi, %rcx), %xmm1
278 movdqa (%rdi, %rcx), %xmm2
279 movdqa %xmm2, %xmm4 /* store for next cycle */
284 por %xmm3, %xmm2 /* merge into one 16byte value */
286 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
296 #ifdef USE_AS_STRNCMP
298 jbe LABEL(strcmp_exitz)
302 jmp LABEL(loop_ashr_1)
305 * Nibble avoids loads across page boundary. This is to avoid a potential
306 * access into unmapped memory.
309 LABEL(nibble_ashr_1):
310 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
313 jnz LABEL(ashr_1_exittail) /* find null char*/
315 #ifdef USE_AS_STRNCMP
317 jbe LABEL(ashr_1_exittail)
321 sub $0x1000, %r10 /* substract 4K from %r10 */
322 jmp LABEL(gobble_ashr_1)
325 * Once find null char, determine if there is a string mismatch
326 * before the null char.
329 LABEL(ashr_1_exittail):
330 movdqa (%rsi, %rcx), %xmm1
336 * The following cases will be handled by ashr_2
337 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
338 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
353 jnz LABEL(less32bytes)
355 UPDATE_STRNCMP_COUNTER
358 mov $16, %rcx /* index for loads */
359 mov $2, %r9d /* byte position left over from less32bytes case */
361 * Setup %r10 value allows us to detect crossing a page boundary.
362 * When %r10 goes positive we have crossed a page boundary and
363 * need to do a nibble.
366 and $0xfff, %r10 /* offset into 4K page */
367 sub $0x1000, %r10 /* subtract 4K pagesize */
372 jg LABEL(nibble_ashr_2)
374 LABEL(gobble_ashr_2):
375 movdqa (%rsi, %rcx), %xmm1
376 movdqa (%rdi, %rcx), %xmm2
382 por %xmm3, %xmm2 /* merge into one 16byte value */
384 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
394 #ifdef USE_AS_STRNCMP
396 jbe LABEL(strcmp_exitz)
403 jg LABEL(nibble_ashr_2) /* cross page boundary */
405 movdqa (%rsi, %rcx), %xmm1
406 movdqa (%rdi, %rcx), %xmm2
412 por %xmm3, %xmm2 /* merge into one 16byte value */
414 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
424 #ifdef USE_AS_STRNCMP
426 jbe LABEL(strcmp_exitz)
431 jmp LABEL(loop_ashr_2)
434 LABEL(nibble_ashr_2):
435 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
438 jnz LABEL(ashr_2_exittail)
440 #ifdef USE_AS_STRNCMP
442 jbe LABEL(ashr_2_exittail)
447 jmp LABEL(gobble_ashr_2)
450 LABEL(ashr_2_exittail):
451 movdqa (%rsi, %rcx), %xmm1
457 * The following cases will be handled by ashr_3
458 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
459 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
474 jnz LABEL(less32bytes)
477 UPDATE_STRNCMP_COUNTER
480 mov $16, %rcx /* index for loads */
481 mov $3, %r9d /* byte position left over from less32bytes case */
483 * Setup %r10 value allows us to detect crossing a page boundary.
484 * When %r10 goes positive we have crossed a page boundary and
485 * need to do a nibble.
488 and $0xfff, %r10 /* offset into 4K page */
489 sub $0x1000, %r10 /* subtract 4K pagesize */
494 jg LABEL(nibble_ashr_3)
496 LABEL(gobble_ashr_3):
497 movdqa (%rsi, %rcx), %xmm1
498 movdqa (%rdi, %rcx), %xmm2
504 por %xmm3, %xmm2 /* merge into one 16byte value */
506 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
516 #ifdef USE_AS_STRNCMP
518 jbe LABEL(strcmp_exitz)
525 jg LABEL(nibble_ashr_3) /* cross page boundary */
527 movdqa (%rsi, %rcx), %xmm1
528 movdqa (%rdi, %rcx), %xmm2
534 por %xmm3, %xmm2 /* merge into one 16byte value */
536 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
546 #ifdef USE_AS_STRNCMP
548 jbe LABEL(strcmp_exitz)
553 jmp LABEL(loop_ashr_3)
556 LABEL(nibble_ashr_3):
557 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
560 jnz LABEL(ashr_3_exittail)
562 #ifdef USE_AS_STRNCMP
564 jbe LABEL(ashr_3_exittail)
569 jmp LABEL(gobble_ashr_3)
572 LABEL(ashr_3_exittail):
573 movdqa (%rsi, %rcx), %xmm1
579 * The following cases will be handled by ashr_4
580 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
581 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
596 jnz LABEL(less32bytes)
599 UPDATE_STRNCMP_COUNTER
602 mov $16, %rcx /* index for loads */
603 mov $4, %r9d /* byte position left over from less32bytes case */
605 * Setup %r10 value allows us to detect crossing a page boundary.
606 * When %r10 goes positive we have crossed a page boundary and
607 * need to do a nibble.
610 and $0xfff, %r10 /* offset into 4K page */
611 sub $0x1000, %r10 /* subtract 4K pagesize */
616 jg LABEL(nibble_ashr_4)
618 LABEL(gobble_ashr_4):
619 movdqa (%rsi, %rcx), %xmm1
620 movdqa (%rdi, %rcx), %xmm2
626 por %xmm3, %xmm2 /* merge into one 16byte value */
628 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
638 #ifdef USE_AS_STRNCMP
640 jbe LABEL(strcmp_exitz)
647 jg LABEL(nibble_ashr_4) /* cross page boundary */
649 movdqa (%rsi, %rcx), %xmm1
650 movdqa (%rdi, %rcx), %xmm2
656 por %xmm3, %xmm2 /* merge into one 16byte value */
658 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
668 #ifdef USE_AS_STRNCMP
670 jbe LABEL(strcmp_exitz)
675 jmp LABEL(loop_ashr_4)
678 LABEL(nibble_ashr_4):
679 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
682 jnz LABEL(ashr_4_exittail)
684 #ifdef USE_AS_STRNCMP
686 jbe LABEL(ashr_4_exittail)
691 jmp LABEL(gobble_ashr_4)
694 LABEL(ashr_4_exittail):
695 movdqa (%rsi, %rcx), %xmm1
701 * The following cases will be handled by ashr_5
702 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
703 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
718 jnz LABEL(less32bytes)
721 UPDATE_STRNCMP_COUNTER
724 mov $16, %rcx /* index for loads */
725 mov $5, %r9d /* byte position left over from less32bytes case */
727 * Setup %r10 value allows us to detect crossing a page boundary.
728 * When %r10 goes positive we have crossed a page boundary and
729 * need to do a nibble.
732 and $0xfff, %r10 /* offset into 4K page */
733 sub $0x1000, %r10 /* subtract 4K pagesize */
738 jg LABEL(nibble_ashr_5)
740 LABEL(gobble_ashr_5):
741 movdqa (%rsi, %rcx), %xmm1
742 movdqa (%rdi, %rcx), %xmm2
748 por %xmm3, %xmm2 /* merge into one 16byte value */
750 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
760 #ifdef USE_AS_STRNCMP
762 jbe LABEL(strcmp_exitz)
769 jg LABEL(nibble_ashr_5) /* cross page boundary */
771 movdqa (%rsi, %rcx), %xmm1
772 movdqa (%rdi, %rcx), %xmm2
778 por %xmm3, %xmm2 /* merge into one 16byte value */
780 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
790 #ifdef USE_AS_STRNCMP
792 jbe LABEL(strcmp_exitz)
797 jmp LABEL(loop_ashr_5)
800 LABEL(nibble_ashr_5):
801 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
804 jnz LABEL(ashr_5_exittail)
806 #ifdef USE_AS_STRNCMP
808 jbe LABEL(ashr_5_exittail)
813 jmp LABEL(gobble_ashr_5)
816 LABEL(ashr_5_exittail):
817 movdqa (%rsi, %rcx), %xmm1
823 * The following cases will be handled by ashr_6
824 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
825 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
840 jnz LABEL(less32bytes)
843 UPDATE_STRNCMP_COUNTER
846 mov $16, %rcx /* index for loads */
847 mov $6, %r9d /* byte position left over from less32bytes case */
849 * Setup %r10 value allows us to detect crossing a page boundary.
850 * When %r10 goes positive we have crossed a page boundary and
851 * need to do a nibble.
854 and $0xfff, %r10 /* offset into 4K page */
855 sub $0x1000, %r10 /* subtract 4K pagesize */
860 jg LABEL(nibble_ashr_6)
862 LABEL(gobble_ashr_6):
863 movdqa (%rsi, %rcx), %xmm1
864 movdqa (%rdi, %rcx), %xmm2
870 por %xmm3, %xmm2 /* merge into one 16byte value */
872 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
882 #ifdef USE_AS_STRNCMP
884 jbe LABEL(strcmp_exitz)
891 jg LABEL(nibble_ashr_6) /* cross page boundary */
893 movdqa (%rsi, %rcx), %xmm1
894 movdqa (%rdi, %rcx), %xmm2
900 por %xmm3, %xmm2 /* merge into one 16byte value */
902 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
912 #ifdef USE_AS_STRNCMP
914 jbe LABEL(strcmp_exitz)
919 jmp LABEL(loop_ashr_6)
922 LABEL(nibble_ashr_6):
923 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
926 jnz LABEL(ashr_6_exittail)
928 #ifdef USE_AS_STRNCMP
930 jbe LABEL(ashr_6_exittail)
935 jmp LABEL(gobble_ashr_6)
938 LABEL(ashr_6_exittail):
939 movdqa (%rsi, %rcx), %xmm1
945 * The following cases will be handled by ashr_7
946 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
947 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
962 jnz LABEL(less32bytes)
965 UPDATE_STRNCMP_COUNTER
968 mov $16, %rcx /* index for loads */
969 mov $7, %r9d /* byte position left over from less32bytes case */
971 * Setup %r10 value allows us to detect crossing a page boundary.
972 * When %r10 goes positive we have crossed a page boundary and
973 * need to do a nibble.
976 and $0xfff, %r10 /* offset into 4K page */
977 sub $0x1000, %r10 /* subtract 4K pagesize */
982 jg LABEL(nibble_ashr_7)
984 LABEL(gobble_ashr_7):
985 movdqa (%rsi, %rcx), %xmm1
986 movdqa (%rdi, %rcx), %xmm2
992 por %xmm3, %xmm2 /* merge into one 16byte value */
994 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1000 pmovmskb %xmm1, %edx
1004 #ifdef USE_AS_STRNCMP
1006 jbe LABEL(strcmp_exitz)
1013 jg LABEL(nibble_ashr_7) /* cross page boundary */
1015 movdqa (%rsi, %rcx), %xmm1
1016 movdqa (%rdi, %rcx), %xmm2
1022 por %xmm3, %xmm2 /* merge into one 16byte value */
1024 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
1027 pcmpeqb %xmm1, %xmm0
1028 pcmpeqb %xmm2, %xmm1
1030 pmovmskb %xmm1, %edx
1034 #ifdef USE_AS_STRNCMP
1036 jbe LABEL(strcmp_exitz)
1041 jmp LABEL(loop_ashr_7)
1044 LABEL(nibble_ashr_7):
1045 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1046 pmovmskb %xmm0, %edx
1048 jnz LABEL(ashr_7_exittail)
1050 #ifdef USE_AS_STRNCMP
1052 jbe LABEL(ashr_7_exittail)
1057 jmp LABEL(gobble_ashr_7)
1060 LABEL(ashr_7_exittail):
1061 movdqa (%rsi, %rcx), %xmm1
1064 jmp LABEL(aftertail)
1067 * The following cases will be handled by ashr_8
1068 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1069 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
1074 movdqa (%rdi), %xmm2
1075 movdqa (%rsi), %xmm1
1076 pcmpeqb %xmm1, %xmm0
1078 pcmpeqb %xmm1, %xmm2
1080 pmovmskb %xmm2, %r9d
1084 jnz LABEL(less32bytes)
1085 movdqa (%rdi), %xmm3
1087 UPDATE_STRNCMP_COUNTER
1090 mov $16, %rcx /* index for loads */
1091 mov $8, %r9d /* byte position left over from less32bytes case */
1093 * Setup %r10 value allows us to detect crossing a page boundary.
1094 * When %r10 goes positive we have crossed a page boundary and
1095 * need to do a nibble.
1098 and $0xfff, %r10 /* offset into 4K page */
1099 sub $0x1000, %r10 /* subtract 4K pagesize */
1104 jg LABEL(nibble_ashr_8)
1106 LABEL(gobble_ashr_8):
1107 movdqa (%rsi, %rcx), %xmm1
1108 movdqa (%rdi, %rcx), %xmm2
1114 por %xmm3, %xmm2 /* merge into one 16byte value */
1116 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1119 pcmpeqb %xmm1, %xmm0
1120 pcmpeqb %xmm2, %xmm1
1122 pmovmskb %xmm1, %edx
1126 #ifdef USE_AS_STRNCMP
1128 jbe LABEL(strcmp_exitz)
1135 jg LABEL(nibble_ashr_8) /* cross page boundary */
1137 movdqa (%rsi, %rcx), %xmm1
1138 movdqa (%rdi, %rcx), %xmm2
1144 por %xmm3, %xmm2 /* merge into one 16byte value */
1146 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
1149 pcmpeqb %xmm1, %xmm0
1150 pcmpeqb %xmm2, %xmm1
1152 pmovmskb %xmm1, %edx
1156 #ifdef USE_AS_STRNCMP
1158 jbe LABEL(strcmp_exitz)
1163 jmp LABEL(loop_ashr_8)
1166 LABEL(nibble_ashr_8):
1167 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1168 pmovmskb %xmm0, %edx
1170 jnz LABEL(ashr_8_exittail)
1172 #ifdef USE_AS_STRNCMP
1174 jbe LABEL(ashr_8_exittail)
1179 jmp LABEL(gobble_ashr_8)
1182 LABEL(ashr_8_exittail):
1183 movdqa (%rsi, %rcx), %xmm1
1186 jmp LABEL(aftertail)
1189 * The following cases will be handled by ashr_9
1190 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1191 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
1196 movdqa (%rdi), %xmm2
1197 movdqa (%rsi), %xmm1
1198 pcmpeqb %xmm1, %xmm0
1200 pcmpeqb %xmm1, %xmm2
1202 pmovmskb %xmm2, %r9d
1206 jnz LABEL(less32bytes)
1207 movdqa (%rdi), %xmm3
1209 UPDATE_STRNCMP_COUNTER
1212 mov $16, %rcx /* index for loads */
1213 mov $9, %r9d /* byte position left over from less32bytes case */
1215 * Setup %r10 value allows us to detect crossing a page boundary.
1216 * When %r10 goes positive we have crossed a page boundary and
1217 * need to do a nibble.
1220 and $0xfff, %r10 /* offset into 4K page */
1221 sub $0x1000, %r10 /* subtract 4K pagesize */
1226 jg LABEL(nibble_ashr_9)
1228 LABEL(gobble_ashr_9):
1229 movdqa (%rsi, %rcx), %xmm1
1230 movdqa (%rdi, %rcx), %xmm2
1236 por %xmm3, %xmm2 /* merge into one 16byte value */
1238 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1241 pcmpeqb %xmm1, %xmm0
1242 pcmpeqb %xmm2, %xmm1
1244 pmovmskb %xmm1, %edx
1248 #ifdef USE_AS_STRNCMP
1250 jbe LABEL(strcmp_exitz)
1257 jg LABEL(nibble_ashr_9) /* cross page boundary */
1259 movdqa (%rsi, %rcx), %xmm1
1260 movdqa (%rdi, %rcx), %xmm2
1266 por %xmm3, %xmm2 /* merge into one 16byte value */
1268 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
1271 pcmpeqb %xmm1, %xmm0
1272 pcmpeqb %xmm2, %xmm1
1274 pmovmskb %xmm1, %edx
1278 #ifdef USE_AS_STRNCMP
1280 jbe LABEL(strcmp_exitz)
1284 movdqa %xmm4, %xmm3 /* store for next cycle */
1285 jmp LABEL(loop_ashr_9)
1288 LABEL(nibble_ashr_9):
1289 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1290 pmovmskb %xmm0, %edx
1292 jnz LABEL(ashr_9_exittail)
1294 #ifdef USE_AS_STRNCMP
1296 jbe LABEL(ashr_9_exittail)
1301 jmp LABEL(gobble_ashr_9)
1304 LABEL(ashr_9_exittail):
1305 movdqa (%rsi, %rcx), %xmm1
1308 jmp LABEL(aftertail)
1311 * The following cases will be handled by ashr_10
1312 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1313 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
1318 movdqa (%rdi), %xmm2
1319 movdqa (%rsi), %xmm1
1320 pcmpeqb %xmm1, %xmm0
1322 pcmpeqb %xmm1, %xmm2
1324 pmovmskb %xmm2, %r9d
1328 jnz LABEL(less32bytes)
1329 movdqa (%rdi), %xmm3
1331 UPDATE_STRNCMP_COUNTER
1334 mov $16, %rcx /* index for loads */
1335 mov $10, %r9d /* byte position left over from less32bytes case */
1337 * Setup %r10 value allows us to detect crossing a page boundary.
1338 * When %r10 goes positive we have crossed a page boundary and
1339 * need to do a nibble.
1342 and $0xfff, %r10 /* offset into 4K page */
1343 sub $0x1000, %r10 /* subtract 4K pagesize */
1346 LABEL(loop_ashr_10):
1348 jg LABEL(nibble_ashr_10)
1350 LABEL(gobble_ashr_10):
1351 movdqa (%rsi, %rcx), %xmm1
1352 movdqa (%rdi, %rcx), %xmm2
1358 por %xmm3, %xmm2 /* merge into one 16byte value */
1360 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1363 pcmpeqb %xmm1, %xmm0
1364 pcmpeqb %xmm2, %xmm1
1366 pmovmskb %xmm1, %edx
1370 #ifdef USE_AS_STRNCMP
1372 jbe LABEL(strcmp_exitz)
1379 jg LABEL(nibble_ashr_10) /* cross page boundary */
1381 movdqa (%rsi, %rcx), %xmm1
1382 movdqa (%rdi, %rcx), %xmm2
1388 por %xmm3, %xmm2 /* merge into one 16byte value */
1390 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
1393 pcmpeqb %xmm1, %xmm0
1394 pcmpeqb %xmm2, %xmm1
1396 pmovmskb %xmm1, %edx
1400 #ifdef USE_AS_STRNCMP
1402 jbe LABEL(strcmp_exitz)
1407 jmp LABEL(loop_ashr_10)
1410 LABEL(nibble_ashr_10):
1411 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1412 pmovmskb %xmm0, %edx
1414 jnz LABEL(ashr_10_exittail)
1416 #ifdef USE_AS_STRNCMP
1418 jbe LABEL(ashr_10_exittail)
1423 jmp LABEL(gobble_ashr_10)
1426 LABEL(ashr_10_exittail):
1427 movdqa (%rsi, %rcx), %xmm1
1430 jmp LABEL(aftertail)
1433 * The following cases will be handled by ashr_11
1434 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1435 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
1440 movdqa (%rdi), %xmm2
1441 movdqa (%rsi), %xmm1
1442 pcmpeqb %xmm1, %xmm0
1444 pcmpeqb %xmm1, %xmm2
1446 pmovmskb %xmm2, %r9d
1450 jnz LABEL(less32bytes)
1451 movdqa (%rdi), %xmm3
1453 UPDATE_STRNCMP_COUNTER
1456 mov $16, %rcx /* index for loads */
1457 mov $11, %r9d /* byte position left over from less32bytes case */
1459 * Setup %r10 value allows us to detect crossing a page boundary.
1460 * When %r10 goes positive we have crossed a page boundary and
1461 * need to do a nibble.
1464 and $0xfff, %r10 /* offset into 4K page */
1465 sub $0x1000, %r10 /* subtract 4K pagesize */
1468 LABEL(loop_ashr_11):
1470 jg LABEL(nibble_ashr_11)
1472 LABEL(gobble_ashr_11):
1473 movdqa (%rsi, %rcx), %xmm1
1474 movdqa (%rdi, %rcx), %xmm2
1480 por %xmm3, %xmm2 /* merge into one 16byte value */
1482 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1485 pcmpeqb %xmm1, %xmm0
1486 pcmpeqb %xmm2, %xmm1
1488 pmovmskb %xmm1, %edx
1492 #ifdef USE_AS_STRNCMP
1494 jbe LABEL(strcmp_exitz)
1501 jg LABEL(nibble_ashr_11) /* cross page boundary */
1503 movdqa (%rsi, %rcx), %xmm1
1504 movdqa (%rdi, %rcx), %xmm2
1510 por %xmm3, %xmm2 /* merge into one 16byte value */
1512 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
1515 pcmpeqb %xmm1, %xmm0
1516 pcmpeqb %xmm2, %xmm1
1518 pmovmskb %xmm1, %edx
1522 #ifdef USE_AS_STRNCMP
1524 jbe LABEL(strcmp_exitz)
1529 jmp LABEL(loop_ashr_11)
1532 LABEL(nibble_ashr_11):
1533 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1534 pmovmskb %xmm0, %edx
1536 jnz LABEL(ashr_11_exittail)
1538 #ifdef USE_AS_STRNCMP
1540 jbe LABEL(ashr_11_exittail)
1545 jmp LABEL(gobble_ashr_11)
1548 LABEL(ashr_11_exittail):
1549 movdqa (%rsi, %rcx), %xmm1
1552 jmp LABEL(aftertail)
1555 * The following cases will be handled by ashr_12
1556 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1557 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
1562 movdqa (%rdi), %xmm2
1563 movdqa (%rsi), %xmm1
1564 pcmpeqb %xmm1, %xmm0
1566 pcmpeqb %xmm1, %xmm2
1568 pmovmskb %xmm2, %r9d
1572 jnz LABEL(less32bytes)
1573 movdqa (%rdi), %xmm3
1575 UPDATE_STRNCMP_COUNTER
1578 mov $16, %rcx /* index for loads */
1579 mov $12, %r9d /* byte position left over from less32bytes case */
1581 * Setup %r10 value allows us to detect crossing a page boundary.
1582 * When %r10 goes positive we have crossed a page boundary and
1583 * need to do a nibble.
1586 and $0xfff, %r10 /* offset into 4K page */
1587 sub $0x1000, %r10 /* subtract 4K pagesize */
1590 LABEL(loop_ashr_12):
1592 jg LABEL(nibble_ashr_12)
1594 LABEL(gobble_ashr_12):
1595 movdqa (%rsi, %rcx), %xmm1
1596 movdqa (%rdi, %rcx), %xmm2
1602 por %xmm3, %xmm2 /* merge into one 16byte value */
1604 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1607 pcmpeqb %xmm1, %xmm0
1608 pcmpeqb %xmm2, %xmm1
1610 pmovmskb %xmm1, %edx
1614 #ifdef USE_AS_STRNCMP
1616 jbe LABEL(strcmp_exitz)
1623 jg LABEL(nibble_ashr_12) /* cross page boundary */
1625 movdqa (%rsi, %rcx), %xmm1
1626 movdqa (%rdi, %rcx), %xmm2
1632 por %xmm3, %xmm2 /* merge into one 16byte value */
1634 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
1637 pcmpeqb %xmm1, %xmm0
1638 pcmpeqb %xmm2, %xmm1
1640 pmovmskb %xmm1, %edx
1644 #ifdef USE_AS_STRNCMP
1646 jbe LABEL(strcmp_exitz)
1651 jmp LABEL(loop_ashr_12)
1654 LABEL(nibble_ashr_12):
1655 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1656 pmovmskb %xmm0, %edx
1658 jnz LABEL(ashr_12_exittail)
1660 #ifdef USE_AS_STRNCMP
1662 jbe LABEL(ashr_12_exittail)
1667 jmp LABEL(gobble_ashr_12)
1670 LABEL(ashr_12_exittail):
1671 movdqa (%rsi, %rcx), %xmm1
1674 jmp LABEL(aftertail)
1677 * The following cases will be handled by ashr_13
1678 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1679 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
1684 movdqa (%rdi), %xmm2
1685 movdqa (%rsi), %xmm1
1686 pcmpeqb %xmm1, %xmm0
1688 pcmpeqb %xmm1, %xmm2
1690 pmovmskb %xmm2, %r9d
1694 jnz LABEL(less32bytes)
1695 movdqa (%rdi), %xmm3
1697 UPDATE_STRNCMP_COUNTER
1700 mov $16, %rcx /* index for loads */
1701 mov $13, %r9d /* byte position left over from less32bytes case */
1703 * Setup %r10 value allows us to detect crossing a page boundary.
1704 * When %r10 goes positive we have crossed a page boundary and
1705 * need to do a nibble.
1708 and $0xfff, %r10 /* offset into 4K page */
1709 sub $0x1000, %r10 /* subtract 4K pagesize */
1712 LABEL(loop_ashr_13):
1714 jg LABEL(nibble_ashr_13)
1716 LABEL(gobble_ashr_13):
1717 movdqa (%rsi, %rcx), %xmm1
1718 movdqa (%rdi, %rcx), %xmm2
1724 por %xmm3, %xmm2 /* merge into one 16byte value */
1726 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1729 pcmpeqb %xmm1, %xmm0
1730 pcmpeqb %xmm2, %xmm1
1732 pmovmskb %xmm1, %edx
1736 #ifdef USE_AS_STRNCMP
1738 jbe LABEL(strcmp_exitz)
1745 jg LABEL(nibble_ashr_13) /* cross page boundary */
1747 movdqa (%rsi, %rcx), %xmm1
1748 movdqa (%rdi, %rcx), %xmm2
1754 por %xmm3, %xmm2 /* merge into one 16byte value */
1756 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
1759 pcmpeqb %xmm1, %xmm0
1760 pcmpeqb %xmm2, %xmm1
1762 pmovmskb %xmm1, %edx
1766 #ifdef USE_AS_STRNCMP
1768 jbe LABEL(strcmp_exitz)
1773 jmp LABEL(loop_ashr_13)
1776 LABEL(nibble_ashr_13):
1777 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1778 pmovmskb %xmm0, %edx
1780 jnz LABEL(ashr_13_exittail)
1782 #ifdef USE_AS_STRNCMP
1784 jbe LABEL(ashr_13_exittail)
1789 jmp LABEL(gobble_ashr_13)
1792 LABEL(ashr_13_exittail):
1793 movdqa (%rsi, %rcx), %xmm1
1796 jmp LABEL(aftertail)
1799 * The following cases will be handled by ashr_14
1800 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1801 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
1806 movdqa (%rdi), %xmm2
1807 movdqa (%rsi), %xmm1
1808 pcmpeqb %xmm1, %xmm0
1810 pcmpeqb %xmm1, %xmm2
1812 pmovmskb %xmm2, %r9d
1816 jnz LABEL(less32bytes)
1817 movdqa (%rdi), %xmm3
1819 UPDATE_STRNCMP_COUNTER
1822 mov $16, %rcx /* index for loads */
1823 mov $14, %r9d /* byte position left over from less32bytes case */
1825 * Setup %r10 value allows us to detect crossing a page boundary.
1826 * When %r10 goes positive we have crossed a page boundary and
1827 * need to do a nibble.
1830 and $0xfff, %r10 /* offset into 4K page */
1831 sub $0x1000, %r10 /* subtract 4K pagesize */
1834 LABEL(loop_ashr_14):
1836 jg LABEL(nibble_ashr_14)
1838 LABEL(gobble_ashr_14):
1839 movdqa (%rsi, %rcx), %xmm1
1840 movdqa (%rdi, %rcx), %xmm2
1846 por %xmm3, %xmm2 /* merge into one 16byte value */
1848 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
1851 pcmpeqb %xmm1, %xmm0
1852 pcmpeqb %xmm2, %xmm1
1854 pmovmskb %xmm1, %edx
1858 #ifdef USE_AS_STRNCMP
1860 jbe LABEL(strcmp_exitz)
1867 jg LABEL(nibble_ashr_14) /* cross page boundary */
1869 movdqa (%rsi, %rcx), %xmm1
1870 movdqa (%rdi, %rcx), %xmm2
1876 por %xmm3, %xmm2 /* merge into one 16byte value */
1878 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
1881 pcmpeqb %xmm1, %xmm0
1882 pcmpeqb %xmm2, %xmm1
1884 pmovmskb %xmm1, %edx
1888 #ifdef USE_AS_STRNCMP
1890 jbe LABEL(strcmp_exitz)
1895 jmp LABEL(loop_ashr_14)
1898 LABEL(nibble_ashr_14):
1899 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1900 pmovmskb %xmm0, %edx
1902 jnz LABEL(ashr_14_exittail)
1904 #ifdef USE_AS_STRNCMP
1906 jbe LABEL(ashr_14_exittail)
1911 jmp LABEL(gobble_ashr_14)
1914 LABEL(ashr_14_exittail):
1915 movdqa (%rsi, %rcx), %xmm1
1918 jmp LABEL(aftertail)
1921 * The following cases will be handled by ashr_15
1922 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1923 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
1928 movdqa (%rdi), %xmm2
1929 movdqa (%rsi), %xmm1
1930 pcmpeqb %xmm1, %xmm0
1932 pcmpeqb %xmm1, %xmm2
1934 pmovmskb %xmm2, %r9d
1938 jnz LABEL(less32bytes)
1940 movdqa (%rdi), %xmm3
1942 UPDATE_STRNCMP_COUNTER
1945 mov $16, %rcx /* index for loads */
1946 mov $15, %r9d /* byte position left over from less32bytes case */
1948 * Setup %r10 value allows us to detect crossing a page boundary.
1949 * When %r10 goes positive we have crossed a page boundary and
1950 * need to do a nibble.
1953 and $0xfff, %r10 /* offset into 4K page */
1955 sub $0x1000, %r10 /* subtract 4K pagesize */
1958 LABEL(loop_ashr_15):
1960 jg LABEL(nibble_ashr_15)
1962 LABEL(gobble_ashr_15):
1963 movdqa (%rsi, %rcx), %xmm1
1964 movdqa (%rdi, %rcx), %xmm2
1970 por %xmm3, %xmm2 /* merge into one 16byte value */
1972 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
1975 pcmpeqb %xmm1, %xmm0
1976 pcmpeqb %xmm2, %xmm1
1978 pmovmskb %xmm1, %edx
1982 #ifdef USE_AS_STRNCMP
1984 jbe LABEL(strcmp_exitz)
1991 jg LABEL(nibble_ashr_15) /* cross page boundary */
1993 movdqa (%rsi, %rcx), %xmm1
1994 movdqa (%rdi, %rcx), %xmm2
2000 por %xmm3, %xmm2 /* merge into one 16byte value */
2002 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
2005 pcmpeqb %xmm1, %xmm0
2006 pcmpeqb %xmm2, %xmm1
2008 pmovmskb %xmm1, %edx
2012 #ifdef USE_AS_STRNCMP
2014 jbe LABEL(strcmp_exitz)
2019 jmp LABEL(loop_ashr_15)
2022 LABEL(nibble_ashr_15):
2023 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
2024 pmovmskb %xmm0, %edx
2026 jnz LABEL(ashr_15_exittail)
2028 #ifdef USE_AS_STRNCMP
2030 je LABEL(ashr_15_exittail)
2035 jmp LABEL(gobble_ashr_15)
2038 LABEL(ashr_15_exittail):
2039 movdqa (%rsi, %rcx), %xmm1
2045 pcmpeqb %xmm3, %xmm1
2047 pmovmskb %xmm1, %edx
2052 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
2054 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
2055 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
2058 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
2063 bsf %rdx, %rdx /* find and store bit index in %rdx */
2065 #ifdef USE_AS_STRNCMP
2067 jbe LABEL(strcmp_exitz)
2069 movzbl (%rsi, %rdx), %ecx
2070 movzbl (%rdi, %rdx), %eax
2075 LABEL(strcmp_exitz):
2086 END (BP_SYM (STRCMP))
2088 .section .rodata,"a",@progbits
2090 LABEL(unaligned_table):
2091 .int LABEL(ashr_1) - LABEL(unaligned_table)
2092 .int LABEL(ashr_2) - LABEL(unaligned_table)
2093 .int LABEL(ashr_3) - LABEL(unaligned_table)
2094 .int LABEL(ashr_4) - LABEL(unaligned_table)
2095 .int LABEL(ashr_5) - LABEL(unaligned_table)
2096 .int LABEL(ashr_6) - LABEL(unaligned_table)
2097 .int LABEL(ashr_7) - LABEL(unaligned_table)
2098 .int LABEL(ashr_8) - LABEL(unaligned_table)
2099 .int LABEL(ashr_9) - LABEL(unaligned_table)
2100 .int LABEL(ashr_10) - LABEL(unaligned_table)
2101 .int LABEL(ashr_11) - LABEL(unaligned_table)
2102 .int LABEL(ashr_12) - LABEL(unaligned_table)
2103 .int LABEL(ashr_13) - LABEL(unaligned_table)
2104 .int LABEL(ashr_14) - LABEL(unaligned_table)
2105 .int LABEL(ashr_15) - LABEL(unaligned_table)
2106 .int LABEL(ashr_0) - LABEL(unaligned_table)
2107 #endif /* NOT_IN_libc */
2108 libc_hidden_builtin_def (STRCMP)