1 /* Highly optimized version for x86-64.
2 Copyright (C) 1999, 2000, 2002, 2003, 2005, 2009
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Based on i686 version contributed by Ulrich Drepper
6 <drepper@cygnus.com>, 1999.
7 Updated with SSE2 support contributed by Intel Corporation.
9 The GNU C Library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
14 The GNU C Library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
19 You should have received a copy of the GNU Lesser General Public
20 License along with the GNU C Library; if not, write to the Free
21 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 #include "asm-syntax.h"
29 #undef UPDATE_STRNCMP_COUNTER
36 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
37 if the new counter > the old one or is 0. */
38 # define UPDATE_STRNCMP_COUNTER \
39 /* calculate left number to compare */ \
40 lea -16(%rcx, %r11), %r9; \
42 jb LABEL(strcmp_exitz); \
44 je LABEL(strcmp_exitz); \
48 # define UPDATE_STRNCMP_COUNTER
50 # define STRCMP strcmp
55 ENTRY (BP_SYM (STRCMP))
57 /* Simple version since we can't use SSE registers in ld.so. */
58 L(oop): movb (%rdi), %al
74 #else /* NOT_IN_libc */
76 * This implementation uses SSE to compare up to 16 bytes at a time.
80 je LABEL(strcmp_exitz)
87 /* Use 64bit AND here to avoid long NOP padding. */
88 and $0x3f, %rcx /* rsi alignment in cache line */
89 and $0x3f, %rax /* rdi alignment in cache line */
91 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
93 ja LABEL(crosscache) /* rdi: 16-byte load will cross cache line */
98 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
99 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
100 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
101 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
103 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
104 jnz LABEL(less16bytes) /* If not, find different value or null char */
105 #ifdef USE_AS_STRNCMP
107 jbe LABEL(strcmp_exitz) /* finish comparision */
109 add $16, %rsi /* prepare to search next 16 bytes */
110 add $16, %rdi /* prepare to search next 16 bytes */
113 * Determine source and destination string offsets from 16-byte alignment.
114 * Use relative offset difference between the two to determine which case
119 and $0xfffffffffffffff0, %rsi /* force %rsi is 16 byte aligned */
120 and $0xfffffffffffffff0, %rdi /* force %rdi is 16 byte aligned */
121 mov $0xffff, %edx /* for equivalent offset */
123 and $0xf, %ecx /* offset of rsi */
124 and $0xf, %eax /* offset of rdi */
126 je LABEL(ashr_0) /* rsi and rdi relative offset same */
128 mov %edx, %r8d /* r8d is offset flag for exit tail */
134 lea LABEL(unaligned_table)(%rip), %r10
135 movslq (%r10, %r9,4), %r9
136 lea (%r10, %r9), %r10
137 jmp *%r10 /* jump to corresponding case */
140 * The following cases will be handled by ashr_0
141 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
142 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
148 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
149 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
150 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
151 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
153 shr %cl, %edx /* adjust 0xffff for offset */
154 shr %cl, %r9d /* adjust for 16-byte offset */
157 * edx must be the same with r9d if in left byte (16-rcx) is equal to
158 * the start from (16-rax) and no null char was seen.
160 jne LABEL(less32bytes) /* mismatch or null char */
161 UPDATE_STRNCMP_COUNTER
164 pxor %xmm0, %xmm0 /* clear xmm0, may have changed above */
167 * Now both strings are aligned at 16-byte boundary. Loop over strings
168 * checking 32-bytes per iteration.
172 movdqa (%rsi, %rcx), %xmm1
173 movdqa (%rdi, %rcx), %xmm2
180 jnz LABEL(exit) /* mismatch or null char seen */
182 #ifdef USE_AS_STRNCMP
184 jbe LABEL(strcmp_exitz)
187 movdqa (%rsi, %rcx), %xmm1
188 movdqa (%rdi, %rcx), %xmm2
196 #ifdef USE_AS_STRNCMP
198 jbe LABEL(strcmp_exitz)
201 jmp LABEL(loop_ashr_0)
204 * The following cases will be handled by ashr_1
205 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
206 * n(15) n -15 0(15 +(n-15) - n) ashr_1
213 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
214 pslldq $15, %xmm2 /* shift first string to align with second */
215 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
216 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
218 shr %cl, %edx /* adjust 0xffff for offset */
219 shr %cl, %r9d /* adjust for 16-byte offset */
221 jnz LABEL(less32bytes) /* mismatch or null char seen */
223 UPDATE_STRNCMP_COUNTER
226 mov $16, %rcx /* index for loads*/
227 mov $1, %r9d /* byte position left over from less32bytes case */
229 * Setup %r10 value allows us to detect crossing a page boundary.
230 * When %r10 goes positive we have crossed a page boundary and
231 * need to do a nibble.
234 and $0xfff, %r10 /* offset into 4K page */
235 sub $0x1000, %r10 /* subtract 4K pagesize */
240 jg LABEL(nibble_ashr_1) /* cross page boundary */
242 LABEL(gobble_ashr_1):
243 movdqa (%rsi, %rcx), %xmm1
244 movdqa (%rdi, %rcx), %xmm2
245 movdqa %xmm2, %xmm4 /* store for next cycle */
249 por %xmm3, %xmm2 /* merge into one 16byte value */
258 #ifdef USE_AS_STRNCMP
260 jbe LABEL(strcmp_exitz)
266 jg LABEL(nibble_ashr_1) /* cross page boundary */
268 movdqa (%rsi, %rcx), %xmm1
269 movdqa (%rdi, %rcx), %xmm2
270 movdqa %xmm2, %xmm4 /* store for next cycle */
274 por %xmm3, %xmm2 /* merge into one 16byte value */
283 #ifdef USE_AS_STRNCMP
285 jbe LABEL(strcmp_exitz)
289 jmp LABEL(loop_ashr_1)
292 * Nibble avoids loads across page boundary. This is to avoid a potential
293 * access into unmapped memory.
296 LABEL(nibble_ashr_1):
297 pcmpeqb %xmm3, %xmm0 /* check nibble for null char*/
300 jnz LABEL(ashr_1_exittail) /* find null char*/
302 #ifdef USE_AS_STRNCMP
304 jbe LABEL(ashr_1_exittail)
308 sub $0x1000, %r10 /* substract 4K from %r10 */
309 jmp LABEL(gobble_ashr_1)
312 * Once find null char, determine if there is a string mismatch
313 * before the null char.
316 LABEL(ashr_1_exittail):
317 movdqa (%rsi, %rcx), %xmm1
323 * The following cases will be handled by ashr_2
324 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
325 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
340 jnz LABEL(less32bytes)
342 UPDATE_STRNCMP_COUNTER
345 mov $16, %rcx /* index for loads */
346 mov $2, %r9d /* byte position left over from less32bytes case */
348 * Setup %r10 value allows us to detect crossing a page boundary.
349 * When %r10 goes positive we have crossed a page boundary and
350 * need to do a nibble.
353 and $0xfff, %r10 /* offset into 4K page */
354 sub $0x1000, %r10 /* subtract 4K pagesize */
359 jg LABEL(nibble_ashr_2)
361 LABEL(gobble_ashr_2):
362 movdqa (%rsi, %rcx), %xmm1
363 movdqa (%rdi, %rcx), %xmm2
377 #ifdef USE_AS_STRNCMP
379 jbe LABEL(strcmp_exitz)
386 jg LABEL(nibble_ashr_2) /* cross page boundary */
388 movdqa (%rsi, %rcx), %xmm1
389 movdqa (%rdi, %rcx), %xmm2
403 #ifdef USE_AS_STRNCMP
405 jbe LABEL(strcmp_exitz)
410 jmp LABEL(loop_ashr_2)
413 LABEL(nibble_ashr_2):
414 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
417 jnz LABEL(ashr_2_exittail)
419 #ifdef USE_AS_STRNCMP
421 jbe LABEL(ashr_2_exittail)
426 jmp LABEL(gobble_ashr_2)
429 LABEL(ashr_2_exittail):
430 movdqa (%rsi, %rcx), %xmm1
436 * The following cases will be handled by ashr_3
437 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
438 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
453 jnz LABEL(less32bytes)
456 UPDATE_STRNCMP_COUNTER
459 mov $16, %rcx /* index for loads */
460 mov $3, %r9d /* byte position left over from less32bytes case */
462 * Setup %r10 value allows us to detect crossing a page boundary.
463 * When %r10 goes positive we have crossed a page boundary and
464 * need to do a nibble.
467 and $0xfff, %r10 /* offset into 4K page */
468 sub $0x1000, %r10 /* subtract 4K pagesize */
473 jg LABEL(nibble_ashr_3)
475 LABEL(gobble_ashr_3):
476 movdqa (%rsi, %rcx), %xmm1
477 movdqa (%rdi, %rcx), %xmm2
491 #ifdef USE_AS_STRNCMP
493 jbe LABEL(strcmp_exitz)
500 jg LABEL(nibble_ashr_3) /* cross page boundary */
502 movdqa (%rsi, %rcx), %xmm1
503 movdqa (%rdi, %rcx), %xmm2
517 #ifdef USE_AS_STRNCMP
519 jbe LABEL(strcmp_exitz)
524 jmp LABEL(loop_ashr_3)
527 LABEL(nibble_ashr_3):
528 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
531 jnz LABEL(ashr_3_exittail)
533 #ifdef USE_AS_STRNCMP
535 jbe LABEL(ashr_3_exittail)
540 jmp LABEL(gobble_ashr_3)
543 LABEL(ashr_3_exittail):
544 movdqa (%rsi, %rcx), %xmm1
550 * The following cases will be handled by ashr_4
551 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
552 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
567 jnz LABEL(less32bytes)
570 UPDATE_STRNCMP_COUNTER
573 mov $16, %rcx /* index for loads */
574 mov $4, %r9d /* byte position left over from less32bytes case */
576 * Setup %r10 value allows us to detect crossing a page boundary.
577 * When %r10 goes positive we have crossed a page boundary and
578 * need to do a nibble.
581 and $0xfff, %r10 /* offset into 4K page */
582 sub $0x1000, %r10 /* subtract 4K pagesize */
587 jg LABEL(nibble_ashr_4)
589 LABEL(gobble_ashr_4):
590 movdqa (%rsi, %rcx), %xmm1
591 movdqa (%rdi, %rcx), %xmm2
605 #ifdef USE_AS_STRNCMP
607 jbe LABEL(strcmp_exitz)
614 jg LABEL(nibble_ashr_4) /* cross page boundary */
616 movdqa (%rsi, %rcx), %xmm1
617 movdqa (%rdi, %rcx), %xmm2
631 #ifdef USE_AS_STRNCMP
633 jbe LABEL(strcmp_exitz)
638 jmp LABEL(loop_ashr_4)
641 LABEL(nibble_ashr_4):
642 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
645 jnz LABEL(ashr_4_exittail)
647 #ifdef USE_AS_STRNCMP
649 jbe LABEL(ashr_4_exittail)
654 jmp LABEL(gobble_ashr_4)
657 LABEL(ashr_4_exittail):
658 movdqa (%rsi, %rcx), %xmm1
664 * The following cases will be handled by ashr_5
665 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
666 * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
681 jnz LABEL(less32bytes)
684 UPDATE_STRNCMP_COUNTER
687 mov $16, %rcx /* index for loads */
688 mov $5, %r9d /* byte position left over from less32bytes case */
690 * Setup %r10 value allows us to detect crossing a page boundary.
691 * When %r10 goes positive we have crossed a page boundary and
692 * need to do a nibble.
695 and $0xfff, %r10 /* offset into 4K page */
696 sub $0x1000, %r10 /* subtract 4K pagesize */
701 jg LABEL(nibble_ashr_5)
703 LABEL(gobble_ashr_5):
704 movdqa (%rsi, %rcx), %xmm1
705 movdqa (%rdi, %rcx), %xmm2
719 #ifdef USE_AS_STRNCMP
721 jbe LABEL(strcmp_exitz)
728 jg LABEL(nibble_ashr_5) /* cross page boundary */
730 movdqa (%rsi, %rcx), %xmm1
731 movdqa (%rdi, %rcx), %xmm2
745 #ifdef USE_AS_STRNCMP
747 jbe LABEL(strcmp_exitz)
752 jmp LABEL(loop_ashr_5)
755 LABEL(nibble_ashr_5):
756 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
759 jnz LABEL(ashr_5_exittail)
761 #ifdef USE_AS_STRNCMP
763 jbe LABEL(ashr_5_exittail)
768 jmp LABEL(gobble_ashr_5)
771 LABEL(ashr_5_exittail):
772 movdqa (%rsi, %rcx), %xmm1
778 * The following cases will be handled by ashr_6
779 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
780 * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
795 jnz LABEL(less32bytes)
798 UPDATE_STRNCMP_COUNTER
801 mov $16, %rcx /* index for loads */
802 mov $6, %r9d /* byte position left over from less32bytes case */
804 * Setup %r10 value allows us to detect crossing a page boundary.
805 * When %r10 goes positive we have crossed a page boundary and
806 * need to do a nibble.
809 and $0xfff, %r10 /* offset into 4K page */
810 sub $0x1000, %r10 /* subtract 4K pagesize */
815 jg LABEL(nibble_ashr_6)
817 LABEL(gobble_ashr_6):
818 movdqa (%rsi, %rcx), %xmm1
819 movdqa (%rdi, %rcx), %xmm2
833 #ifdef USE_AS_STRNCMP
835 jbe LABEL(strcmp_exitz)
842 jg LABEL(nibble_ashr_6) /* cross page boundary */
844 movdqa (%rsi, %rcx), %xmm1
845 movdqa (%rdi, %rcx), %xmm2
859 #ifdef USE_AS_STRNCMP
861 jbe LABEL(strcmp_exitz)
866 jmp LABEL(loop_ashr_6)
869 LABEL(nibble_ashr_6):
870 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
873 jnz LABEL(ashr_6_exittail)
875 #ifdef USE_AS_STRNCMP
877 jbe LABEL(ashr_6_exittail)
882 jmp LABEL(gobble_ashr_6)
885 LABEL(ashr_6_exittail):
886 movdqa (%rsi, %rcx), %xmm1
892 * The following cases will be handled by ashr_7
893 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
894 * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
909 jnz LABEL(less32bytes)
912 UPDATE_STRNCMP_COUNTER
915 mov $16, %rcx /* index for loads */
916 mov $7, %r9d /* byte position left over from less32bytes case */
918 * Setup %r10 value allows us to detect crossing a page boundary.
919 * When %r10 goes positive we have crossed a page boundary and
920 * need to do a nibble.
923 and $0xfff, %r10 /* offset into 4K page */
924 sub $0x1000, %r10 /* subtract 4K pagesize */
929 jg LABEL(nibble_ashr_7)
931 LABEL(gobble_ashr_7):
932 movdqa (%rsi, %rcx), %xmm1
933 movdqa (%rdi, %rcx), %xmm2
947 #ifdef USE_AS_STRNCMP
949 jbe LABEL(strcmp_exitz)
956 jg LABEL(nibble_ashr_7) /* cross page boundary */
958 movdqa (%rsi, %rcx), %xmm1
959 movdqa (%rdi, %rcx), %xmm2
973 #ifdef USE_AS_STRNCMP
975 jbe LABEL(strcmp_exitz)
980 jmp LABEL(loop_ashr_7)
983 LABEL(nibble_ashr_7):
984 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
987 jnz LABEL(ashr_7_exittail)
989 #ifdef USE_AS_STRNCMP
991 jbe LABEL(ashr_7_exittail)
996 jmp LABEL(gobble_ashr_7)
999 LABEL(ashr_7_exittail):
1000 movdqa (%rsi, %rcx), %xmm1
1003 jmp LABEL(aftertail)
1006 * The following cases will be handled by ashr_8
1007 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1008 * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
1013 movdqa (%rdi), %xmm2
1014 movdqa (%rsi), %xmm1
1015 pcmpeqb %xmm1, %xmm0
1017 pcmpeqb %xmm1, %xmm2
1019 pmovmskb %xmm2, %r9d
1023 jnz LABEL(less32bytes)
1024 movdqa (%rdi), %xmm3
1026 UPDATE_STRNCMP_COUNTER
1029 mov $16, %rcx /* index for loads */
1030 mov $8, %r9d /* byte position left over from less32bytes case */
1032 * Setup %r10 value allows us to detect crossing a page boundary.
1033 * When %r10 goes positive we have crossed a page boundary and
1034 * need to do a nibble.
1037 and $0xfff, %r10 /* offset into 4K page */
1038 sub $0x1000, %r10 /* subtract 4K pagesize */
1043 jg LABEL(nibble_ashr_8)
1045 LABEL(gobble_ashr_8):
1046 movdqa (%rsi, %rcx), %xmm1
1047 movdqa (%rdi, %rcx), %xmm2
1054 pcmpeqb %xmm1, %xmm0
1055 pcmpeqb %xmm2, %xmm1
1057 pmovmskb %xmm1, %edx
1061 #ifdef USE_AS_STRNCMP
1063 jbe LABEL(strcmp_exitz)
1070 jg LABEL(nibble_ashr_8) /* cross page boundary */
1072 movdqa (%rsi, %rcx), %xmm1
1073 movdqa (%rdi, %rcx), %xmm2
1080 pcmpeqb %xmm1, %xmm0
1081 pcmpeqb %xmm2, %xmm1
1083 pmovmskb %xmm1, %edx
1087 #ifdef USE_AS_STRNCMP
1089 jbe LABEL(strcmp_exitz)
1094 jmp LABEL(loop_ashr_8)
1097 LABEL(nibble_ashr_8):
1098 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1099 pmovmskb %xmm0, %edx
1101 jnz LABEL(ashr_8_exittail)
1103 #ifdef USE_AS_STRNCMP
1105 jbe LABEL(ashr_8_exittail)
1110 jmp LABEL(gobble_ashr_8)
1113 LABEL(ashr_8_exittail):
1114 movdqa (%rsi, %rcx), %xmm1
1117 jmp LABEL(aftertail)
1120 * The following cases will be handled by ashr_9
1121 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1122 * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
1127 movdqa (%rdi), %xmm2
1128 movdqa (%rsi), %xmm1
1129 pcmpeqb %xmm1, %xmm0
1131 pcmpeqb %xmm1, %xmm2
1133 pmovmskb %xmm2, %r9d
1137 jnz LABEL(less32bytes)
1138 movdqa (%rdi), %xmm3
1140 UPDATE_STRNCMP_COUNTER
1143 mov $16, %rcx /* index for loads */
1144 mov $9, %r9d /* byte position left over from less32bytes case */
1146 * Setup %r10 value allows us to detect crossing a page boundary.
1147 * When %r10 goes positive we have crossed a page boundary and
1148 * need to do a nibble.
1151 and $0xfff, %r10 /* offset into 4K page */
1152 sub $0x1000, %r10 /* subtract 4K pagesize */
1157 jg LABEL(nibble_ashr_9)
1159 LABEL(gobble_ashr_9):
1160 movdqa (%rsi, %rcx), %xmm1
1161 movdqa (%rdi, %rcx), %xmm2
1168 pcmpeqb %xmm1, %xmm0
1169 pcmpeqb %xmm2, %xmm1
1171 pmovmskb %xmm1, %edx
1175 #ifdef USE_AS_STRNCMP
1177 jbe LABEL(strcmp_exitz)
1184 jg LABEL(nibble_ashr_9) /* cross page boundary */
1186 movdqa (%rsi, %rcx), %xmm1
1187 movdqa (%rdi, %rcx), %xmm2
1194 pcmpeqb %xmm1, %xmm0
1195 pcmpeqb %xmm2, %xmm1
1197 pmovmskb %xmm1, %edx
1201 #ifdef USE_AS_STRNCMP
1203 jbe LABEL(strcmp_exitz)
1207 movdqa %xmm4, %xmm3 /* store for next cycle */
1208 jmp LABEL(loop_ashr_9)
1211 LABEL(nibble_ashr_9):
1212 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1213 pmovmskb %xmm0, %edx
1215 jnz LABEL(ashr_9_exittail)
1217 #ifdef USE_AS_STRNCMP
1219 jbe LABEL(ashr_9_exittail)
1224 jmp LABEL(gobble_ashr_9)
1227 LABEL(ashr_9_exittail):
1228 movdqa (%rsi, %rcx), %xmm1
1231 jmp LABEL(aftertail)
1234 * The following cases will be handled by ashr_10
1235 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1236 * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
1241 movdqa (%rdi), %xmm2
1242 movdqa (%rsi), %xmm1
1243 pcmpeqb %xmm1, %xmm0
1245 pcmpeqb %xmm1, %xmm2
1247 pmovmskb %xmm2, %r9d
1251 jnz LABEL(less32bytes)
1252 movdqa (%rdi), %xmm3
1254 UPDATE_STRNCMP_COUNTER
1257 mov $16, %rcx /* index for loads */
1258 mov $10, %r9d /* byte position left over from less32bytes case */
1260 * Setup %r10 value allows us to detect crossing a page boundary.
1261 * When %r10 goes positive we have crossed a page boundary and
1262 * need to do a nibble.
1265 and $0xfff, %r10 /* offset into 4K page */
1266 sub $0x1000, %r10 /* subtract 4K pagesize */
1269 LABEL(loop_ashr_10):
1271 jg LABEL(nibble_ashr_10)
1273 LABEL(gobble_ashr_10):
1274 movdqa (%rsi, %rcx), %xmm1
1275 movdqa (%rdi, %rcx), %xmm2
1282 pcmpeqb %xmm1, %xmm0
1283 pcmpeqb %xmm2, %xmm1
1285 pmovmskb %xmm1, %edx
1289 #ifdef USE_AS_STRNCMP
1291 jbe LABEL(strcmp_exitz)
1298 jg LABEL(nibble_ashr_10) /* cross page boundary */
1300 movdqa (%rsi, %rcx), %xmm1
1301 movdqa (%rdi, %rcx), %xmm2
1308 pcmpeqb %xmm1, %xmm0
1309 pcmpeqb %xmm2, %xmm1
1311 pmovmskb %xmm1, %edx
1315 #ifdef USE_AS_STRNCMP
1317 jbe LABEL(strcmp_exitz)
1322 jmp LABEL(loop_ashr_10)
1325 LABEL(nibble_ashr_10):
1326 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1327 pmovmskb %xmm0, %edx
1329 jnz LABEL(ashr_10_exittail)
1331 #ifdef USE_AS_STRNCMP
1333 jbe LABEL(ashr_10_exittail)
1338 jmp LABEL(gobble_ashr_10)
1341 LABEL(ashr_10_exittail):
1342 movdqa (%rsi, %rcx), %xmm1
1345 jmp LABEL(aftertail)
1348 * The following cases will be handled by ashr_11
1349 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1350 * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
1355 movdqa (%rdi), %xmm2
1356 movdqa (%rsi), %xmm1
1357 pcmpeqb %xmm1, %xmm0
1359 pcmpeqb %xmm1, %xmm2
1361 pmovmskb %xmm2, %r9d
1365 jnz LABEL(less32bytes)
1366 movdqa (%rdi), %xmm3
1368 UPDATE_STRNCMP_COUNTER
1371 mov $16, %rcx /* index for loads */
1372 mov $11, %r9d /* byte position left over from less32bytes case */
1374 * Setup %r10 value allows us to detect crossing a page boundary.
1375 * When %r10 goes positive we have crossed a page boundary and
1376 * need to do a nibble.
1379 and $0xfff, %r10 /* offset into 4K page */
1380 sub $0x1000, %r10 /* subtract 4K pagesize */
1383 LABEL(loop_ashr_11):
1385 jg LABEL(nibble_ashr_11)
1387 LABEL(gobble_ashr_11):
1388 movdqa (%rsi, %rcx), %xmm1
1389 movdqa (%rdi, %rcx), %xmm2
1396 pcmpeqb %xmm1, %xmm0
1397 pcmpeqb %xmm2, %xmm1
1399 pmovmskb %xmm1, %edx
1403 #ifdef USE_AS_STRNCMP
1405 jbe LABEL(strcmp_exitz)
1412 jg LABEL(nibble_ashr_11) /* cross page boundary */
1414 movdqa (%rsi, %rcx), %xmm1
1415 movdqa (%rdi, %rcx), %xmm2
1422 pcmpeqb %xmm1, %xmm0
1423 pcmpeqb %xmm2, %xmm1
1425 pmovmskb %xmm1, %edx
1429 #ifdef USE_AS_STRNCMP
1431 jbe LABEL(strcmp_exitz)
1436 jmp LABEL(loop_ashr_11)
1439 LABEL(nibble_ashr_11):
1440 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1441 pmovmskb %xmm0, %edx
1443 jnz LABEL(ashr_11_exittail)
1445 #ifdef USE_AS_STRNCMP
1447 jbe LABEL(ashr_11_exittail)
1452 jmp LABEL(gobble_ashr_11)
1455 LABEL(ashr_11_exittail):
1456 movdqa (%rsi, %rcx), %xmm1
1459 jmp LABEL(aftertail)
1462 * The following cases will be handled by ashr_12
1463 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1464 * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
1469 movdqa (%rdi), %xmm2
1470 movdqa (%rsi), %xmm1
1471 pcmpeqb %xmm1, %xmm0
1473 pcmpeqb %xmm1, %xmm2
1475 pmovmskb %xmm2, %r9d
1479 jnz LABEL(less32bytes)
1480 movdqa (%rdi), %xmm3
1482 UPDATE_STRNCMP_COUNTER
1485 mov $16, %rcx /* index for loads */
1486 mov $12, %r9d /* byte position left over from less32bytes case */
1488 * Setup %r10 value allows us to detect crossing a page boundary.
1489 * When %r10 goes positive we have crossed a page boundary and
1490 * need to do a nibble.
1493 and $0xfff, %r10 /* offset into 4K page */
1494 sub $0x1000, %r10 /* subtract 4K pagesize */
1497 LABEL(loop_ashr_12):
1499 jg LABEL(nibble_ashr_12)
1501 LABEL(gobble_ashr_12):
1502 movdqa (%rsi, %rcx), %xmm1
1503 movdqa (%rdi, %rcx), %xmm2
1510 pcmpeqb %xmm1, %xmm0
1511 pcmpeqb %xmm2, %xmm1
1513 pmovmskb %xmm1, %edx
1517 #ifdef USE_AS_STRNCMP
1519 jbe LABEL(strcmp_exitz)
1526 jg LABEL(nibble_ashr_12) /* cross page boundary */
1528 movdqa (%rsi, %rcx), %xmm1
1529 movdqa (%rdi, %rcx), %xmm2
1536 pcmpeqb %xmm1, %xmm0
1537 pcmpeqb %xmm2, %xmm1
1539 pmovmskb %xmm1, %edx
1543 #ifdef USE_AS_STRNCMP
1545 jbe LABEL(strcmp_exitz)
1550 jmp LABEL(loop_ashr_12)
1553 LABEL(nibble_ashr_12):
1554 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1555 pmovmskb %xmm0, %edx
1557 jnz LABEL(ashr_12_exittail)
1559 #ifdef USE_AS_STRNCMP
1561 jbe LABEL(ashr_12_exittail)
1566 jmp LABEL(gobble_ashr_12)
1569 LABEL(ashr_12_exittail):
1570 movdqa (%rsi, %rcx), %xmm1
1573 jmp LABEL(aftertail)
1576 * The following cases will be handled by ashr_13
1577 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1578 * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
1583 movdqa (%rdi), %xmm2
1584 movdqa (%rsi), %xmm1
1585 pcmpeqb %xmm1, %xmm0
1587 pcmpeqb %xmm1, %xmm2
1589 pmovmskb %xmm2, %r9d
1593 jnz LABEL(less32bytes)
1594 movdqa (%rdi), %xmm3
1596 UPDATE_STRNCMP_COUNTER
1599 mov $16, %rcx /* index for loads */
1600 mov $13, %r9d /* byte position left over from less32bytes case */
1602 * Setup %r10 value allows us to detect crossing a page boundary.
1603 * When %r10 goes positive we have crossed a page boundary and
1604 * need to do a nibble.
1607 and $0xfff, %r10 /* offset into 4K page */
1608 sub $0x1000, %r10 /* subtract 4K pagesize */
1611 LABEL(loop_ashr_13):
1613 jg LABEL(nibble_ashr_13)
1615 LABEL(gobble_ashr_13):
1616 movdqa (%rsi, %rcx), %xmm1
1617 movdqa (%rdi, %rcx), %xmm2
1624 pcmpeqb %xmm1, %xmm0
1625 pcmpeqb %xmm2, %xmm1
1627 pmovmskb %xmm1, %edx
1631 #ifdef USE_AS_STRNCMP
1633 jbe LABEL(strcmp_exitz)
1640 jg LABEL(nibble_ashr_13) /* cross page boundary */
1642 movdqa (%rsi, %rcx), %xmm1
1643 movdqa (%rdi, %rcx), %xmm2
1650 pcmpeqb %xmm1, %xmm0
1651 pcmpeqb %xmm2, %xmm1
1653 pmovmskb %xmm1, %edx
1657 #ifdef USE_AS_STRNCMP
1659 jbe LABEL(strcmp_exitz)
1664 jmp LABEL(loop_ashr_13)
1667 LABEL(nibble_ashr_13):
1668 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1669 pmovmskb %xmm0, %edx
1671 jnz LABEL(ashr_13_exittail)
1673 #ifdef USE_AS_STRNCMP
1675 jbe LABEL(ashr_13_exittail)
1680 jmp LABEL(gobble_ashr_13)
1683 LABEL(ashr_13_exittail):
1684 movdqa (%rsi, %rcx), %xmm1
1687 jmp LABEL(aftertail)
1690 * The following cases will be handled by ashr_14
1691 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1692 * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
1697 movdqa (%rdi), %xmm2
1698 movdqa (%rsi), %xmm1
1699 pcmpeqb %xmm1, %xmm0
1701 pcmpeqb %xmm1, %xmm2
1703 pmovmskb %xmm2, %r9d
1707 jnz LABEL(less32bytes)
1708 movdqa (%rdi), %xmm3
1710 UPDATE_STRNCMP_COUNTER
1713 mov $16, %rcx /* index for loads */
1714 mov $14, %r9d /* byte position left over from less32bytes case */
1716 * Setup %r10 value allows us to detect crossing a page boundary.
1717 * When %r10 goes positive we have crossed a page boundary and
1718 * need to do a nibble.
1721 and $0xfff, %r10 /* offset into 4K page */
1722 sub $0x1000, %r10 /* subtract 4K pagesize */
1725 LABEL(loop_ashr_14):
1727 jg LABEL(nibble_ashr_14)
1729 LABEL(gobble_ashr_14):
1730 movdqa (%rsi, %rcx), %xmm1
1731 movdqa (%rdi, %rcx), %xmm2
1738 pcmpeqb %xmm1, %xmm0
1739 pcmpeqb %xmm2, %xmm1
1741 pmovmskb %xmm1, %edx
1745 #ifdef USE_AS_STRNCMP
1747 jbe LABEL(strcmp_exitz)
1754 jg LABEL(nibble_ashr_14) /* cross page boundary */
1756 movdqa (%rsi, %rcx), %xmm1
1757 movdqa (%rdi, %rcx), %xmm2
1764 pcmpeqb %xmm1, %xmm0
1765 pcmpeqb %xmm2, %xmm1
1767 pmovmskb %xmm1, %edx
1771 #ifdef USE_AS_STRNCMP
1773 jbe LABEL(strcmp_exitz)
1778 jmp LABEL(loop_ashr_14)
1781 LABEL(nibble_ashr_14):
1782 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1783 pmovmskb %xmm0, %edx
1785 jnz LABEL(ashr_14_exittail)
1787 #ifdef USE_AS_STRNCMP
1789 jbe LABEL(ashr_14_exittail)
1794 jmp LABEL(gobble_ashr_14)
1797 LABEL(ashr_14_exittail):
1798 movdqa (%rsi, %rcx), %xmm1
1801 jmp LABEL(aftertail)
1804 * The following cases will be handled by ashr_15
1805 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1806 * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
1811 movdqa (%rdi), %xmm2
1812 movdqa (%rsi), %xmm1
1813 pcmpeqb %xmm1, %xmm0
1815 pcmpeqb %xmm1, %xmm2
1817 pmovmskb %xmm2, %r9d
1821 jnz LABEL(less32bytes)
1823 movdqa (%rdi), %xmm3
1825 UPDATE_STRNCMP_COUNTER
1828 mov $16, %rcx /* index for loads */
1829 mov $15, %r9d /* byte position left over from less32bytes case */
1831 * Setup %r10 value allows us to detect crossing a page boundary.
1832 * When %r10 goes positive we have crossed a page boundary and
1833 * need to do a nibble.
1836 and $0xfff, %r10 /* offset into 4K page */
1838 sub $0x1000, %r10 /* subtract 4K pagesize */
1841 LABEL(loop_ashr_15):
1843 jg LABEL(nibble_ashr_15)
1845 LABEL(gobble_ashr_15):
1846 movdqa (%rsi, %rcx), %xmm1
1847 movdqa (%rdi, %rcx), %xmm2
1854 pcmpeqb %xmm1, %xmm0
1855 pcmpeqb %xmm2, %xmm1
1857 pmovmskb %xmm1, %edx
1861 #ifdef USE_AS_STRNCMP
1863 jbe LABEL(strcmp_exitz)
1870 jg LABEL(nibble_ashr_15) /* cross page boundary */
1872 movdqa (%rsi, %rcx), %xmm1
1873 movdqa (%rdi, %rcx), %xmm2
1880 pcmpeqb %xmm1, %xmm0
1881 pcmpeqb %xmm2, %xmm1
1883 pmovmskb %xmm1, %edx
1887 #ifdef USE_AS_STRNCMP
1889 jbe LABEL(strcmp_exitz)
1894 jmp LABEL(loop_ashr_15)
1897 LABEL(nibble_ashr_15):
1898 pcmpeqb %xmm3, %xmm0 /* check nibble for null char */
1899 pmovmskb %xmm0, %edx
1901 jnz LABEL(ashr_15_exittail)
1903 #ifdef USE_AS_STRNCMP
1905 je LABEL(ashr_15_exittail)
1910 jmp LABEL(gobble_ashr_15)
1913 LABEL(ashr_15_exittail):
1914 movdqa (%rsi, %rcx), %xmm1
1920 pcmpeqb %xmm3, %xmm1
1922 pmovmskb %xmm1, %edx
1927 lea -16(%r9, %rcx), %rax /* locate the exact offset for rdi */
1929 lea (%rdi, %rax), %rdi /* locate the exact address for first operand(rdi) */
1930 lea (%rsi, %rcx), %rsi /* locate the exact address for second operand(rsi) */
1933 xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
1938 bsf %rdx, %rdx /* find and store bit index in %rdx */
1940 #ifdef USE_AS_STRNCMP
1942 jbe LABEL(strcmp_exitz)
1944 movzbl (%rsi, %rdx), %ecx
1945 movzbl (%rdi, %rdx), %eax
1950 LABEL(strcmp_exitz):
1961 END (BP_SYM (STRCMP))
1963 .section .rodata,"a",@progbits
1965 LABEL(unaligned_table):
1966 .int LABEL(ashr_1) - LABEL(unaligned_table)
1967 .int LABEL(ashr_2) - LABEL(unaligned_table)
1968 .int LABEL(ashr_3) - LABEL(unaligned_table)
1969 .int LABEL(ashr_4) - LABEL(unaligned_table)
1970 .int LABEL(ashr_5) - LABEL(unaligned_table)
1971 .int LABEL(ashr_6) - LABEL(unaligned_table)
1972 .int LABEL(ashr_7) - LABEL(unaligned_table)
1973 .int LABEL(ashr_8) - LABEL(unaligned_table)
1974 .int LABEL(ashr_9) - LABEL(unaligned_table)
1975 .int LABEL(ashr_10) - LABEL(unaligned_table)
1976 .int LABEL(ashr_11) - LABEL(unaligned_table)
1977 .int LABEL(ashr_12) - LABEL(unaligned_table)
1978 .int LABEL(ashr_13) - LABEL(unaligned_table)
1979 .int LABEL(ashr_14) - LABEL(unaligned_table)
1980 .int LABEL(ashr_15) - LABEL(unaligned_table)
1981 .int LABEL(ashr_0) - LABEL(unaligned_table)
1982 #endif /* NOT_IN_libc */
1983 libc_hidden_builtin_def (STRCMP)