2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 #include "asm-syntax.h"
27 # define MEMCMP __memcmp_sse4_2
30 #define CFI_PUSH(REG) \
31 cfi_adjust_cfa_offset (4); \
32 cfi_rel_offset (REG, 0)
34 #define CFI_POP(REG) \
35 cfi_adjust_cfa_offset (-4); \
38 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
39 #define POP(REG) popl REG; CFI_POP (REG)
45 #define RETURN POP (%ebx); ret; CFI_PUSH (%ebx)
49 # define JMPTBL(I, B) I - B
51 /* Load an entry in a jump table into EBX and branch to it. TABLE is a
52 jump table with relative offsets. INDEX is a register contains the
53 index into the jump table. SCALE is the scale of INDEX. */
54 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
55 /* We first load PC into EBX. */ \
56 call __i686.get_pc_thunk.bx; \
57 /* Get the address of the jump table. */ \
58 addl $(TABLE - .), %ebx; \
59 /* Get the entry and convert the relative offset to the \
60 absolute address. */ \
61 addl (%ebx,INDEX,SCALE), %ebx; \
62 /* We loaded the jump table and adjuested EDX/ESI. Go. */ \
65 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
66 .globl __i686.get_pc_thunk.bx
67 .hidden __i686.get_pc_thunk.bx
69 .type __i686.get_pc_thunk.bx,@function
70 __i686.get_pc_thunk.bx:
74 # define JMPTBL(I, B) I
76 /* Load an entry in a jump table into EBX and branch to it. TABLE is a
77 jump table with relative offsets. INDEX is a register contains the
78 index into the jump table. SCALE is the scale of INDEX. */
79 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
80 jmp *TABLE(,INDEX,SCALE)
83 .section .text.sse4.2,"ax",@progbits
98 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
178 L(64bytesormore_loop):
185 movdqu 16(%eax), %xmm1
186 movdqu 16(%edx), %xmm2
191 movdqu 32(%eax), %xmm1
192 movdqu 32(%edx), %xmm2
197 movdqu 48(%eax), %xmm1
198 movdqu 48(%edx), %xmm2
205 jae L(64bytesormore_loop)
209 BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4)
249 movdqu -49(%eax), %xmm1
250 movdqu -49(%edx), %xmm2
256 movdqu -33(%eax), %xmm1
257 movdqu -33(%edx), %xmm2
282 movzbl -1(%eax), %ecx
291 movdqu -50(%eax), %xmm1
292 movdqu -50(%edx), %xmm2
298 movdqu -34(%eax), %xmm1
299 movdqu -34(%edx), %xmm2
324 movzwl -2(%eax), %ecx
325 movzwl -2(%edx), %ebx
336 movdqu -51(%eax), %xmm1
337 movdqu -51(%edx), %xmm2
343 movdqu -35(%eax), %xmm1
344 movdqu -35(%edx), %xmm2
369 movzwl -3(%eax), %ecx
370 movzwl -3(%edx), %ebx
376 movzbl -1(%eax), %eax
384 movdqu -52(%eax), %xmm1
385 movdqu -52(%edx), %xmm2
391 movdqu -36(%eax), %xmm1
392 movdqu -36(%edx), %xmm2
398 movdqu -20(%eax), %xmm1
399 movdqu -20(%edx), %xmm2
413 movdqu -53(%eax), %xmm1
414 movdqu -53(%edx), %xmm2
421 movdqu -37(%eax), %xmm1
422 movdqu -37(%edx), %xmm2
428 movdqu -21(%eax), %xmm1
429 movdqu -21(%edx), %xmm2
437 movzbl -1(%eax), %ecx
445 movdqu -54(%eax), %xmm1
446 movdqu -54(%edx), %xmm2
453 movdqu -38(%eax), %xmm1
454 movdqu -38(%edx), %xmm2
460 movdqu -22(%eax), %xmm1
461 movdqu -22(%edx), %xmm2
470 movzwl -2(%eax), %ecx
471 movzwl -2(%edx), %ebx
481 movdqu -55(%eax), %xmm1
482 movdqu -55(%edx), %xmm2
489 movdqu -39(%eax), %xmm1
490 movdqu -39(%edx), %xmm2
496 movdqu -23(%eax), %xmm1
497 movdqu -23(%edx), %xmm2
505 movzwl -3(%eax), %ecx
506 movzwl -3(%edx), %ebx
511 movzbl -1(%eax), %eax
519 movdqu -56(%eax), %xmm1
520 movdqu -56(%edx), %xmm2
527 movdqu -40(%eax), %xmm1
528 movdqu -40(%edx), %xmm2
534 movdqu -24(%eax), %xmm1
535 movdqu -24(%edx), %xmm2
554 movdqu -57(%eax), %xmm1
555 movdqu -57(%edx), %xmm2
562 movdqu -41(%eax), %xmm1
563 movdqu -41(%edx), %xmm2
569 movdqu -25(%eax), %xmm1
570 movdqu -25(%edx), %xmm2
582 movzbl -1(%eax), %ecx
590 movdqu -58(%eax), %xmm1
591 movdqu -58(%edx), %xmm2
598 movdqu -42(%eax), %xmm1
599 movdqu -42(%edx), %xmm2
605 movdqu -26(%eax), %xmm1
606 movdqu -26(%edx), %xmm2
621 movzwl -2(%eax), %ecx
622 movzwl -2(%edx), %ebx
632 movdqu -59(%eax), %xmm1
633 movdqu -59(%edx), %xmm2
640 movdqu -43(%eax), %xmm1
641 movdqu -43(%edx), %xmm2
647 movdqu -27(%eax), %xmm1
648 movdqu -27(%edx), %xmm2
660 movzwl -3(%eax), %ecx
661 movzwl -3(%edx), %ebx
666 movzbl -1(%eax), %eax
674 movdqu -60(%eax), %xmm1
675 movdqu -60(%edx), %xmm2
682 movdqu -44(%eax), %xmm1
683 movdqu -44(%edx), %xmm2
689 movdqu -28(%eax), %xmm1
690 movdqu -28(%edx), %xmm2
711 movdqu -61(%eax), %xmm1
712 movdqu -61(%edx), %xmm2
719 movdqu -45(%eax), %xmm1
720 movdqu -45(%edx), %xmm2
726 movdqu -29(%eax), %xmm1
727 movdqu -29(%edx), %xmm2
746 movzbl -1(%eax), %ecx
754 movdqu -62(%eax), %xmm1
755 movdqu -62(%edx), %xmm2
762 movdqu -46(%eax), %xmm1
763 movdqu -46(%edx), %xmm2
769 movdqu -30(%eax), %xmm1
770 movdqu -30(%edx), %xmm2
786 movzwl -2(%eax), %ecx
787 movzwl -2(%edx), %ebx
797 movdqu -63(%eax), %xmm1
798 movdqu -63(%edx), %xmm2
805 movdqu -47(%eax), %xmm1
806 movdqu -47(%edx), %xmm2
812 movdqu -31(%eax), %xmm1
813 movdqu -31(%edx), %xmm2
830 movzwl -3(%eax), %ecx
831 movzwl -3(%edx), %ebx
836 movzbl -1(%eax), %eax
844 movdqu -64(%eax), %xmm1
845 movdqu -64(%edx), %xmm2
851 movdqu -48(%eax), %xmm1
852 movdqu -48(%edx), %xmm2
858 movdqu -32(%eax), %xmm1
859 movdqu -32(%edx), %xmm2
934 .section .rodata.sse4.2,"a",@progbits
936 .type L(table_64bytes), @object
938 .int JMPTBL (L(0bytes), L(table_64bytes))
939 .int JMPTBL (L(1bytes), L(table_64bytes))
940 .int JMPTBL (L(2bytes), L(table_64bytes))
941 .int JMPTBL (L(3bytes), L(table_64bytes))
942 .int JMPTBL (L(4bytes), L(table_64bytes))
943 .int JMPTBL (L(5bytes), L(table_64bytes))
944 .int JMPTBL (L(6bytes), L(table_64bytes))
945 .int JMPTBL (L(7bytes), L(table_64bytes))
946 .int JMPTBL (L(8bytes), L(table_64bytes))
947 .int JMPTBL (L(9bytes), L(table_64bytes))
948 .int JMPTBL (L(10bytes), L(table_64bytes))
949 .int JMPTBL (L(11bytes), L(table_64bytes))
950 .int JMPTBL (L(12bytes), L(table_64bytes))
951 .int JMPTBL (L(13bytes), L(table_64bytes))
952 .int JMPTBL (L(14bytes), L(table_64bytes))
953 .int JMPTBL (L(15bytes), L(table_64bytes))
954 .int JMPTBL (L(16bytes), L(table_64bytes))
955 .int JMPTBL (L(17bytes), L(table_64bytes))
956 .int JMPTBL (L(18bytes), L(table_64bytes))
957 .int JMPTBL (L(19bytes), L(table_64bytes))
958 .int JMPTBL (L(20bytes), L(table_64bytes))
959 .int JMPTBL (L(21bytes), L(table_64bytes))
960 .int JMPTBL (L(22bytes), L(table_64bytes))
961 .int JMPTBL (L(23bytes), L(table_64bytes))
962 .int JMPTBL (L(24bytes), L(table_64bytes))
963 .int JMPTBL (L(25bytes), L(table_64bytes))
964 .int JMPTBL (L(26bytes), L(table_64bytes))
965 .int JMPTBL (L(27bytes), L(table_64bytes))
966 .int JMPTBL (L(28bytes), L(table_64bytes))
967 .int JMPTBL (L(29bytes), L(table_64bytes))
968 .int JMPTBL (L(30bytes), L(table_64bytes))
969 .int JMPTBL (L(31bytes), L(table_64bytes))
970 .int JMPTBL (L(32bytes), L(table_64bytes))
971 .int JMPTBL (L(33bytes), L(table_64bytes))
972 .int JMPTBL (L(34bytes), L(table_64bytes))
973 .int JMPTBL (L(35bytes), L(table_64bytes))
974 .int JMPTBL (L(36bytes), L(table_64bytes))
975 .int JMPTBL (L(37bytes), L(table_64bytes))
976 .int JMPTBL (L(38bytes), L(table_64bytes))
977 .int JMPTBL (L(39bytes), L(table_64bytes))
978 .int JMPTBL (L(40bytes), L(table_64bytes))
979 .int JMPTBL (L(41bytes), L(table_64bytes))
980 .int JMPTBL (L(42bytes), L(table_64bytes))
981 .int JMPTBL (L(43bytes), L(table_64bytes))
982 .int JMPTBL (L(44bytes), L(table_64bytes))
983 .int JMPTBL (L(45bytes), L(table_64bytes))
984 .int JMPTBL (L(46bytes), L(table_64bytes))
985 .int JMPTBL (L(47bytes), L(table_64bytes))
986 .int JMPTBL (L(48bytes), L(table_64bytes))
987 .int JMPTBL (L(49bytes), L(table_64bytes))
988 .int JMPTBL (L(50bytes), L(table_64bytes))
989 .int JMPTBL (L(51bytes), L(table_64bytes))
990 .int JMPTBL (L(52bytes), L(table_64bytes))
991 .int JMPTBL (L(53bytes), L(table_64bytes))
992 .int JMPTBL (L(54bytes), L(table_64bytes))
993 .int JMPTBL (L(55bytes), L(table_64bytes))
994 .int JMPTBL (L(56bytes), L(table_64bytes))
995 .int JMPTBL (L(57bytes), L(table_64bytes))
996 .int JMPTBL (L(58bytes), L(table_64bytes))
997 .int JMPTBL (L(59bytes), L(table_64bytes))
998 .int JMPTBL (L(60bytes), L(table_64bytes))
999 .int JMPTBL (L(61bytes), L(table_64bytes))
1000 .int JMPTBL (L(62bytes), L(table_64bytes))
1001 .int JMPTBL (L(63bytes), L(table_64bytes))
1002 .int JMPTBL (L(64bytes), L(table_64bytes))
1003 .size L(table_64bytes), .-L(table_64bytes)