2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
24 # define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
28 # define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
32 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
33 # define POP(REG) popl REG; CFI_POP (REG)
35 # define ENTRANCE PUSH(%esi); PUSH(%edi)
36 # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
41 /* Note: wcscmp uses signed comparison, not unsugned as in strcmp function. */
46 * This implementation uses SSE to compare up to 16 bytes at a time.
81 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
84 and $63, %eax /* esi alignment in cache line */
85 and $63, %edx /* edi alignment in cache line */
131 movdqu 16(%edi), %xmm1
132 movdqu 16(%esi), %xmm2
133 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
134 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
135 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
137 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
138 jnz L(less4_double_words_16)
140 movdqu 32(%edi), %xmm1
141 movdqu 32(%esi), %xmm2
142 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
143 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
144 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
146 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
147 jnz L(less4_double_words_32)
149 movdqu 48(%edi), %xmm1
150 movdqu 48(%esi), %xmm2
151 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
152 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
153 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
155 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
156 jnz L(less4_double_words_48)
160 jmp L(continue_48_48)
198 movdqu 16(%edi), %xmm1
199 movdqu 16(%esi), %xmm2
200 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
201 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
202 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
204 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
205 jnz L(less4_double_words_16)
207 movdqu 32(%edi), %xmm1
208 movdqu 32(%esi), %xmm2
209 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
210 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
211 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
213 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
214 jnz L(less4_double_words_32)
257 pcmpeqd (%edi), %xmm0
261 jnz L(less4_double_words1)
278 movdqu 16(%esi), %xmm2
279 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
280 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
281 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
283 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
284 jnz L(less4_double_words_16)
286 movdqu 32(%esi), %xmm2
287 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
288 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
289 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
291 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
292 jnz L(less4_double_words_32)
294 movdqu 48(%esi), %xmm2
295 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
296 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */
297 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
299 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
300 jnz L(less4_double_words_48)
304 jmp L(continue_00_48)
367 movdqu 32(%edi), %xmm1
368 movdqu 32(%esi), %xmm2
369 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
370 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
371 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
373 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
374 jnz L(less4_double_words_32)
376 movdqu 48(%edi), %xmm1
377 movdqu 48(%esi), %xmm2
378 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
379 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
380 psubb %xmm0, %xmm1 /* packed sub of comparison results */
382 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
383 jnz L(less4_double_words_48)
387 jmp L(continue_32_48)
426 movdqu 16(%edi), %xmm1
427 movdqu 16(%esi), %xmm2
428 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
429 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
430 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
432 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
433 jnz L(less4_double_words_16)
459 movdqu 48(%edi), %xmm1
460 movdqu 48(%esi), %xmm2
461 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
462 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
463 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
465 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
466 jnz L(less4_double_words_48)
470 jmp L(continue_16_48)
475 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
476 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
477 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
479 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
480 jnz L(less4_double_words)
482 movdqa 16(%edi), %xmm3
483 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
484 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */
485 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
487 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
488 jnz L(less4_double_words_16)
490 movdqa 32(%edi), %xmm5
491 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
492 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */
493 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
495 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
496 jnz L(less4_double_words_32)
498 movdqa 48(%edi), %xmm1
499 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
500 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
501 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
503 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
504 jnz L(less4_double_words_48)
508 jmp L(continue_00_00)
513 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
514 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
515 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
517 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
518 jnz L(less4_double_words)
522 jmp L(continue_00_48)
527 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
528 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
529 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
531 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
532 jnz L(less4_double_words)
534 movdqu 16(%esi), %xmm2
535 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
536 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
537 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
539 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
540 jnz L(less4_double_words_16)
544 jmp L(continue_00_48)
549 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
550 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */
551 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
553 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
554 jnz L(less4_double_words)
556 movdqu 16(%esi), %xmm2
557 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
558 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */
559 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
561 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
562 jnz L(less4_double_words_16)
564 movdqu 32(%esi), %xmm2
565 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
566 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */
567 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
569 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
570 jnz L(less4_double_words_32)
574 jmp L(continue_00_48)
578 pcmpeqd (%esi), %xmm0
582 jnz L(less4_double_words1)
599 movdqu 16(%edi), %xmm1
600 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
601 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
602 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
604 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
605 jnz L(less4_double_words_16)
607 movdqu 32(%edi), %xmm1
608 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
609 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
610 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
612 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
613 jnz L(less4_double_words_32)
615 movdqu 48(%edi), %xmm1
616 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
617 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */
618 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
620 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
621 jnz L(less4_double_words_48)
625 jmp L(continue_48_00)
630 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
631 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
632 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
634 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
635 jnz L(less4_double_words)
639 jmp L(continue_48_00)
644 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
645 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
646 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
648 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
649 jnz L(less4_double_words)
651 movdqu 16(%edi), %xmm1
652 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
653 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
654 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
656 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
657 jnz L(less4_double_words_16)
661 jmp L(continue_48_00)
666 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
667 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */
668 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
670 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
671 jnz L(less4_double_words)
673 movdqu 16(%edi), %xmm1
674 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
675 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */
676 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
678 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
679 jnz L(less4_double_words_16)
681 movdqu 32(%edi), %xmm1
682 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
683 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */
684 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
686 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
687 jnz L(less4_double_words_32)
691 jmp L(continue_48_00)
697 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
698 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
699 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
701 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
702 jnz L(less4_double_words)
706 jmp L(continue_48_48)
712 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
713 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
714 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
716 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
717 jnz L(less4_double_words)
719 movdqu 16(%edi), %xmm3
720 movdqu 16(%esi), %xmm4
721 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
722 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
723 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
725 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
726 jnz L(less4_double_words_16)
730 jmp L(continue_48_48)
736 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
737 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
738 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
740 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
741 jnz L(less4_double_words)
743 movdqu 16(%edi), %xmm3
744 movdqu 16(%esi), %xmm4
745 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
746 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
747 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
749 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
750 jnz L(less4_double_words_16)
752 movdqu 32(%edi), %xmm1
753 movdqu 32(%esi), %xmm2
754 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
755 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
756 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
758 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
759 jnz L(less4_double_words_32)
763 jmp L(continue_48_48)
769 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
770 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
771 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
773 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
774 jnz L(less4_double_words)
776 movdqu 16(%edi), %xmm1
777 movdqu 16(%esi), %xmm2
778 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
779 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
780 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
782 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
783 jnz L(less4_double_words_16)
787 jmp L(continue_32_48)
793 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
794 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
795 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
797 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
798 jnz L(less4_double_words)
802 jmp L(continue_16_48)
808 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
809 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
810 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
812 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
813 jnz L(less4_double_words)
817 jmp L(continue_32_48)
820 L(less4_double_words1):
845 L(less4_double_words):
848 jz L(next_two_double_words)
850 jz L(second_double_word)
857 L(second_double_word):
864 L(next_two_double_words):
866 jz L(fourth_double_word)
873 L(fourth_double_word):
880 L(less4_double_words_16):
883 jz L(next_two_double_words_16)
885 jz L(second_double_word_16)
892 L(second_double_word_16):
899 L(next_two_double_words_16):
901 jz L(fourth_double_word_16)
908 L(fourth_double_word_16):
915 L(less4_double_words_32):
918 jz L(next_two_double_words_32)
920 jz L(second_double_word_32)
927 L(second_double_word_32):
934 L(next_two_double_words_32):
936 jz L(fourth_double_word_32)
943 L(fourth_double_word_32):
950 L(less4_double_words_48):
953 jz L(next_two_double_words_48)
955 jz L(second_double_word_48)
962 L(second_double_word_48):
969 L(next_two_double_words_48):
971 jz L(fourth_double_word_48)
978 L(fourth_double_word_48):