2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 # define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
29 # define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
33 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
34 # define POP(REG) popl REG; CFI_POP (REG)
36 # define ENTRANCE PUSH(%esi); PUSH(%edi)
37 # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
42 /* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
47 * This implementation uses SSE to compare up to 16 bytes at a time.
82 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
85 and $63, %eax /* esi alignment in cache line */
86 and $63, %edx /* edi alignment in cache line */
132 movdqu 16(%edi), %xmm1
133 movdqu 16(%esi), %xmm2
134 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
135 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
136 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
138 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
139 jnz L(less4_double_words_16)
141 movdqu 32(%edi), %xmm1
142 movdqu 32(%esi), %xmm2
143 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
144 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
145 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
147 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
148 jnz L(less4_double_words_32)
150 movdqu 48(%edi), %xmm1
151 movdqu 48(%esi), %xmm2
152 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
153 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
154 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
156 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
157 jnz L(less4_double_words_48)
161 jmp L(continue_48_48)
199 movdqu 16(%edi), %xmm1
200 movdqu 16(%esi), %xmm2
201 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
202 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
203 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
205 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
206 jnz L(less4_double_words_16)
208 movdqu 32(%edi), %xmm1
209 movdqu 32(%esi), %xmm2
210 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
211 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
212 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
214 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
215 jnz L(less4_double_words_32)
258 pcmpeqd (%edi), %xmm0
262 jnz L(less4_double_words1)
279 movdqu 16(%esi), %xmm2
280 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
281 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
282 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
284 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
285 jnz L(less4_double_words_16)
287 movdqu 32(%esi), %xmm2
288 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
289 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
290 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
292 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
293 jnz L(less4_double_words_32)
295 movdqu 48(%esi), %xmm2
296 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
297 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
298 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
300 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
301 jnz L(less4_double_words_48)
305 jmp L(continue_00_48)
368 movdqu 32(%edi), %xmm1
369 movdqu 32(%esi), %xmm2
370 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
371 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
372 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
374 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
375 jnz L(less4_double_words_32)
377 movdqu 48(%edi), %xmm1
378 movdqu 48(%esi), %xmm2
379 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
380 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
381 psubb %xmm0, %xmm1 /* packed sub of comparison results */
383 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
384 jnz L(less4_double_words_48)
388 jmp L(continue_32_48)
427 movdqu 16(%edi), %xmm1
428 movdqu 16(%esi), %xmm2
429 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
430 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
431 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
433 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
434 jnz L(less4_double_words_16)
460 movdqu 48(%edi), %xmm1
461 movdqu 48(%esi), %xmm2
462 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
463 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
464 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
466 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
467 jnz L(less4_double_words_48)
471 jmp L(continue_16_48)
476 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
477 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
478 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
480 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
481 jnz L(less4_double_words)
483 movdqa 16(%edi), %xmm3
484 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
485 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
486 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
488 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
489 jnz L(less4_double_words_16)
491 movdqa 32(%edi), %xmm5
492 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
493 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
494 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
496 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
497 jnz L(less4_double_words_32)
499 movdqa 48(%edi), %xmm1
500 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
501 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
502 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
504 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
505 jnz L(less4_double_words_48)
509 jmp L(continue_00_00)
514 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
515 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
516 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
518 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
519 jnz L(less4_double_words)
523 jmp L(continue_00_48)
528 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
529 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
530 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
532 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
533 jnz L(less4_double_words)
535 movdqu 16(%esi), %xmm2
536 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
537 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
538 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
540 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
541 jnz L(less4_double_words_16)
545 jmp L(continue_00_48)
550 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
551 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
552 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
554 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
555 jnz L(less4_double_words)
557 movdqu 16(%esi), %xmm2
558 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
559 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
560 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
562 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
563 jnz L(less4_double_words_16)
565 movdqu 32(%esi), %xmm2
566 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
567 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
568 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
570 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
571 jnz L(less4_double_words_32)
575 jmp L(continue_00_48)
579 pcmpeqd (%esi), %xmm0
583 jnz L(less4_double_words1)
600 movdqu 16(%edi), %xmm1
601 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
602 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
603 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
605 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
606 jnz L(less4_double_words_16)
608 movdqu 32(%edi), %xmm1
609 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
610 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
611 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
613 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
614 jnz L(less4_double_words_32)
616 movdqu 48(%edi), %xmm1
617 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
618 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
619 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
621 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
622 jnz L(less4_double_words_48)
626 jmp L(continue_48_00)
631 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
632 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
633 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
635 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
636 jnz L(less4_double_words)
640 jmp L(continue_48_00)
645 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
646 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
647 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
649 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
650 jnz L(less4_double_words)
652 movdqu 16(%edi), %xmm1
653 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
654 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
655 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
657 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
658 jnz L(less4_double_words_16)
662 jmp L(continue_48_00)
667 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
668 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
669 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
671 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
672 jnz L(less4_double_words)
674 movdqu 16(%edi), %xmm1
675 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
676 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
677 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
679 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
680 jnz L(less4_double_words_16)
682 movdqu 32(%edi), %xmm1
683 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
684 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
685 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
687 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
688 jnz L(less4_double_words_32)
692 jmp L(continue_48_00)
698 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
699 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
700 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
702 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
703 jnz L(less4_double_words)
707 jmp L(continue_48_48)
713 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
714 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
715 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
717 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
718 jnz L(less4_double_words)
720 movdqu 16(%edi), %xmm3
721 movdqu 16(%esi), %xmm4
722 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
723 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
724 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
726 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
727 jnz L(less4_double_words_16)
731 jmp L(continue_48_48)
737 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
738 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
739 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
741 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
742 jnz L(less4_double_words)
744 movdqu 16(%edi), %xmm3
745 movdqu 16(%esi), %xmm4
746 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
747 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
748 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
750 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
751 jnz L(less4_double_words_16)
753 movdqu 32(%edi), %xmm1
754 movdqu 32(%esi), %xmm2
755 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
756 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
757 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
759 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
760 jnz L(less4_double_words_32)
764 jmp L(continue_48_48)
770 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
771 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
772 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
774 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
775 jnz L(less4_double_words)
777 movdqu 16(%edi), %xmm1
778 movdqu 16(%esi), %xmm2
779 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
780 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
781 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
783 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
784 jnz L(less4_double_words_16)
788 jmp L(continue_32_48)
794 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
795 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
796 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
798 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
799 jnz L(less4_double_words)
803 jmp L(continue_16_48)
809 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
810 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
811 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
813 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
814 jnz L(less4_double_words)
818 jmp L(continue_32_48)
821 L(less4_double_words1):
846 L(less4_double_words):
849 jz L(next_two_double_words)
851 jz L(second_double_word)
858 L(second_double_word):
865 L(next_two_double_words):
867 jz L(fourth_double_word)
874 L(fourth_double_word):
881 L(less4_double_words_16):
884 jz L(next_two_double_words_16)
886 jz L(second_double_word_16)
893 L(second_double_word_16):
900 L(next_two_double_words_16):
902 jz L(fourth_double_word_16)
909 L(fourth_double_word_16):
916 L(less4_double_words_32):
919 jz L(next_two_double_words_32)
921 jz L(second_double_word_32)
928 L(second_double_word_32):
935 L(next_two_double_words_32):
937 jz L(fourth_double_word_32)
944 L(fourth_double_word_32):
951 L(less4_double_words_48):
954 jz L(next_two_double_words_48)
956 jz L(second_double_word_48)
963 L(second_double_word_48):
970 L(next_two_double_words_48):
972 jz L(fourth_double_word_48)
979 L(fourth_double_word_48):