1 /* Optimized wcscmp for x86-64 with SSE2.
2 Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
22 /* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */
27 * This implementation uses SSE to compare up to 16 bytes at a time.
31 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
34 and $63, %eax /* rsi alignment in cache line */
35 and $63, %edx /* rdi alignment in cache line */
81 movdqu 16(%rdi), %xmm1
82 movdqu 16(%rsi), %xmm2
83 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
84 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
85 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
87 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
88 jnz L(less4_double_words_16)
90 movdqu 32(%rdi), %xmm1
91 movdqu 32(%rsi), %xmm2
92 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
93 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
94 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
96 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
97 jnz L(less4_double_words_32)
99 movdqu 48(%rdi), %xmm1
100 movdqu 48(%rsi), %xmm2
101 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
102 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
103 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
105 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
106 jnz L(less4_double_words_48)
110 jmp L(continue_48_48)
148 movdqu 16(%rdi), %xmm1
149 movdqu 16(%rsi), %xmm2
150 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
151 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
152 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
154 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
155 jnz L(less4_double_words_16)
157 movdqu 32(%rdi), %xmm1
158 movdqu 32(%rsi), %xmm2
159 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
160 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
161 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
163 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
164 jnz L(less4_double_words_32)
207 pcmpeqd (%rdi), %xmm0
211 jnz L(less4_double_words1)
228 movdqu 16(%rsi), %xmm2
229 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
230 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
231 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
233 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
234 jnz L(less4_double_words_16)
236 movdqu 32(%rsi), %xmm2
237 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
238 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
239 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
241 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
242 jnz L(less4_double_words_32)
244 movdqu 48(%rsi), %xmm2
245 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
246 pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */
247 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
249 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
250 jnz L(less4_double_words_48)
254 jmp L(continue_00_48)
317 movdqu 32(%rdi), %xmm1
318 movdqu 32(%rsi), %xmm2
319 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
320 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
321 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
323 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
324 jnz L(less4_double_words_32)
326 movdqu 48(%rdi), %xmm1
327 movdqu 48(%rsi), %xmm2
328 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
329 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
330 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
332 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
333 jnz L(less4_double_words_48)
337 jmp L(continue_32_48)
376 movdqu 16(%rdi), %xmm1
377 movdqu 16(%rsi), %xmm2
378 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
379 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
380 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
382 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
383 jnz L(less4_double_words_16)
409 movdqu 48(%rdi), %xmm1
410 movdqu 48(%rsi), %xmm2
411 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
412 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
413 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
415 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
416 jnz L(less4_double_words_48)
420 jmp L(continue_16_48)
425 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
426 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
427 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
429 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
430 jnz L(less4_double_words)
432 movdqa 16(%rdi), %xmm3
433 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
434 pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */
435 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
437 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
438 jnz L(less4_double_words_16)
440 movdqa 32(%rdi), %xmm5
441 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */
442 pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */
443 psubb %xmm0, %xmm5 /* packed sub of comparison results*/
445 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
446 jnz L(less4_double_words_32)
448 movdqa 48(%rdi), %xmm1
449 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
450 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
451 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
453 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
454 jnz L(less4_double_words_48)
458 jmp L(continue_00_00)
463 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
464 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
465 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
467 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
468 jnz L(less4_double_words)
472 jmp L(continue_00_48)
477 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
478 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
479 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
481 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
482 jnz L(less4_double_words)
484 movdqu 16(%rsi), %xmm2
485 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
486 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
487 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
489 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
490 jnz L(less4_double_words_16)
494 jmp L(continue_00_48)
499 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
500 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */
501 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
503 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
504 jnz L(less4_double_words)
506 movdqu 16(%rsi), %xmm2
507 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
508 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */
509 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
511 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
512 jnz L(less4_double_words_16)
514 movdqu 32(%rsi), %xmm2
515 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */
516 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */
517 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
519 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
520 jnz L(less4_double_words_32)
524 jmp L(continue_00_48)
528 pcmpeqd (%rsi), %xmm0
532 jnz L(less4_double_words1)
549 movdqu 16(%rdi), %xmm1
550 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
551 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
552 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
554 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
555 jnz L(less4_double_words_16)
557 movdqu 32(%rdi), %xmm1
558 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
559 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
560 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
562 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
563 jnz L(less4_double_words_32)
565 movdqu 48(%rdi), %xmm1
566 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
567 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */
568 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
570 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
571 jnz L(less4_double_words_48)
575 jmp L(continue_48_00)
580 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
581 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
582 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
584 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
585 jnz L(less4_double_words)
589 jmp L(continue_48_00)
594 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
595 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
596 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
598 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
599 jnz L(less4_double_words)
601 movdqu 16(%rdi), %xmm1
602 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
603 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
604 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
606 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
607 jnz L(less4_double_words_16)
611 jmp L(continue_48_00)
616 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
617 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */
618 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
620 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
621 jnz L(less4_double_words)
623 movdqu 16(%rdi), %xmm1
624 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
625 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */
626 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
628 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
629 jnz L(less4_double_words_16)
631 movdqu 32(%rdi), %xmm1
632 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
633 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */
634 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
636 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
637 jnz L(less4_double_words_32)
641 jmp L(continue_48_00)
647 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
648 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
649 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
651 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
652 jnz L(less4_double_words)
656 jmp L(continue_48_48)
662 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
663 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
664 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
666 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
667 jnz L(less4_double_words)
669 movdqu 16(%rdi), %xmm3
670 movdqu 16(%rsi), %xmm4
671 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
672 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
673 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
675 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
676 jnz L(less4_double_words_16)
680 jmp L(continue_48_48)
686 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
687 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
688 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
690 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
691 jnz L(less4_double_words)
693 movdqu 16(%rdi), %xmm3
694 movdqu 16(%rsi), %xmm4
695 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */
696 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */
697 psubb %xmm0, %xmm3 /* packed sub of comparison results*/
699 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
700 jnz L(less4_double_words_16)
702 movdqu 32(%rdi), %xmm1
703 movdqu 32(%rsi), %xmm2
704 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
705 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
706 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
708 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
709 jnz L(less4_double_words_32)
713 jmp L(continue_48_48)
719 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
720 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
721 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
723 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
724 jnz L(less4_double_words)
726 movdqu 16(%rdi), %xmm1
727 movdqu 16(%rsi), %xmm2
728 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
729 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
730 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
732 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
733 jnz L(less4_double_words_16)
737 jmp L(continue_32_48)
743 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
744 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
745 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
747 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
748 jnz L(less4_double_words)
752 jmp L(continue_16_48)
758 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */
759 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */
760 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
762 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */
763 jnz L(less4_double_words)
767 jmp L(continue_32_48)
770 L(less4_double_words1):
795 L(less4_double_words):
798 jz L(next_two_double_words)
800 jz L(second_double_word)
807 L(second_double_word):
814 L(next_two_double_words):
816 jz L(fourth_double_word)
823 L(fourth_double_word):
830 L(less4_double_words_16):
833 jz L(next_two_double_words_16)
835 jz L(second_double_word_16)
842 L(second_double_word_16):
849 L(next_two_double_words_16):
851 jz L(fourth_double_word_16)
858 L(fourth_double_word_16):
865 L(less4_double_words_32):
868 jz L(next_two_double_words_32)
870 jz L(second_double_word_32)
877 L(second_double_word_32):
884 L(next_two_double_words_32):
886 jz L(fourth_double_word_32)
893 L(fourth_double_word_32):
900 L(less4_double_words_48):
903 jz L(next_two_double_words_48)
905 jz L(second_double_word_48)
912 L(second_double_word_48):
919 L(next_two_double_words_48):
921 jz L(fourth_double_word_48)
928 L(fourth_double_word_48):
949 libc_hidden_def (__wcscmp)
950 weak_alias (__wcscmp, wcscmp)