2 Copyright (C) 2011-2013 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
38 # define JMPTBL(I, B) I - B
40 /* Load an entry in a jump table into ECX and branch to it. TABLE is a
41 jump table with relative offsets. INDEX is a register contains the
42 index into the jump table. SCALE is the scale of INDEX. */
44 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
45 /* We first load PC into ECX. */ \
47 /* Get the address of the jump table. */ \
48 addl $(TABLE - .), %ecx; \
49 /* Get the entry and convert the relative offset to the \
50 absolute address. */ \
51 addl (%ecx,INDEX,SCALE), %ecx; \
52 /* We loaded the jump table and adjusted ECX. Go. */ \
55 # define JMPTBL(I, B) I
57 /* Branch to an entry in a jump table. TABLE is a jump table with
58 absolute offsets. INDEX is a register contains the index into the
59 jump table. SCALE is the scale of INDEX. */
61 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
62 jmp *TABLE(,INDEX,SCALE)
66 # define STRCAT __strcat_sse2
73 # ifdef USE_AS_STRNCAT
80 # define USE_AS_STRCAT
81 # ifdef USE_AS_STRNCAT
82 # define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
84 # define RETURN POP(%esi); ret; CFI_PUSH(%esi);
92 # ifdef USE_AS_STRNCAT
108 ja L(alignment_prolog)
116 movdqu 16(%esi), %xmm6
133 pcmpeqb (%eax), %xmm0
135 movdqu 16(%esi), %xmm6
151 pcmpeqb 16(%eax), %xmm0
156 pcmpeqb 32(%eax), %xmm1
161 pcmpeqb 48(%eax), %xmm2
166 pcmpeqb 64(%eax), %xmm3
173 jmp L(StartStrcpyPart)
178 lea 16(%eax, %edx), %eax
179 jmp L(StartStrcpyPart)
184 lea 32(%eax, %edx), %eax
185 jmp L(StartStrcpyPart)
190 lea 48(%eax, %edx), %eax
191 jmp L(StartStrcpyPart)
198 jmp L(StartStrcpyPart)
208 # ifdef USE_AS_STRNCAT
210 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
213 jnz L(CopyFrom1To16BytesTail1)
217 # ifdef USE_AS_STRNCAT
219 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
222 jnz L(CopyFrom1To32Bytes1)
228 # ifdef USE_AS_STRNCAT
239 pcmpeqb (%eax), %xmm0
253 pcmpeqb 16(%eax), %xmm0
258 pcmpeqb 32(%eax), %xmm1
263 pcmpeqb 48(%eax), %xmm2
268 pcmpeqb 64(%eax), %xmm3
275 jmp L(StartStrcpyPart_1)
280 lea 16(%eax, %edx), %eax
281 jmp L(StartStrcpyPart_1)
286 lea 32(%eax, %edx), %eax
287 jmp L(StartStrcpyPart_1)
292 lea 48(%eax, %edx), %eax
293 jmp L(StartStrcpyPart_1)
302 L(StartStrcpyPart_1):
309 # ifdef USE_AS_STRNCAT
313 pcmpeqb (%esi), %xmm1
314 # ifdef USE_AS_STRNCAT
319 # ifdef USE_AS_STRNCAT
321 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
324 jnz L(CopyFrom1To16BytesTail)
326 pcmpeqb 16(%esi), %xmm0
328 # ifdef USE_AS_STRNCAT
330 jbe L(CopyFrom1To32BytesCase2OrCase3)
333 jnz L(CopyFrom1To32Bytes)
335 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
342 movdqa (%esi, %ecx), %xmm1
343 movaps 16(%esi, %ecx), %xmm2
344 movdqu %xmm1, (%eax, %ecx)
348 # ifdef USE_AS_STRNCAT
350 jbe L(CopyFrom1To16BytesCase2OrCase3)
353 jnz L(CopyFrom1To16Bytes)
354 L(Unalign16BothBigN):
355 movaps 16(%esi, %ecx), %xmm3
356 movdqu %xmm2, (%eax, %ecx)
360 # ifdef USE_AS_STRNCAT
362 jbe L(CopyFrom1To16BytesCase2OrCase3)
365 jnz L(CopyFrom1To16Bytes)
367 movaps 16(%esi, %ecx), %xmm4
368 movdqu %xmm3, (%eax, %ecx)
372 # ifdef USE_AS_STRNCAT
374 jbe L(CopyFrom1To16BytesCase2OrCase3)
377 jnz L(CopyFrom1To16Bytes)
379 movaps 16(%esi, %ecx), %xmm1
380 movdqu %xmm4, (%eax, %ecx)
384 # ifdef USE_AS_STRNCAT
386 jbe L(CopyFrom1To16BytesCase2OrCase3)
389 jnz L(CopyFrom1To16Bytes)
391 movaps 16(%esi, %ecx), %xmm2
392 movdqu %xmm1, (%eax, %ecx)
396 # ifdef USE_AS_STRNCAT
398 jbe L(CopyFrom1To16BytesCase2OrCase3)
401 jnz L(CopyFrom1To16Bytes)
403 movaps 16(%esi, %ecx), %xmm3
404 movdqu %xmm2, (%eax, %ecx)
408 # ifdef USE_AS_STRNCAT
410 jbe L(CopyFrom1To16BytesCase2OrCase3)
413 jnz L(CopyFrom1To16Bytes)
415 movdqu %xmm3, (%eax, %ecx)
417 lea 16(%esi, %ecx), %esi
421 # ifdef USE_AS_STRNCAT
422 lea 128(%ebx, %edx), %ebx
426 movaps 16(%esi), %xmm5
427 movaps 32(%esi), %xmm3
429 movaps 48(%esi), %xmm7
435 # ifdef USE_AS_STRNCAT
437 jbe L(UnalignedLeaveCase2OrCase3)
440 jnz L(Unaligned64Leave)
443 L(Unaligned64Loop_start):
446 movdqu %xmm4, -64(%eax)
449 movdqu %xmm5, -48(%eax)
450 movaps 16(%esi), %xmm5
452 movaps 32(%esi), %xmm3
453 movdqu %xmm6, -32(%eax)
455 movdqu %xmm7, -16(%eax)
456 movaps 48(%esi), %xmm7
461 # ifdef USE_AS_STRNCAT
463 jbe L(UnalignedLeaveCase2OrCase3)
466 jz L(Unaligned64Loop_start)
476 jnz L(CopyFrom1To16BytesUnaligned_0)
478 jnz L(CopyFrom1To16BytesUnaligned_16)
485 jnz L(CopyFrom1To16BytesUnaligned_32)
489 movdqu %xmm5, 16(%eax)
490 movdqu %xmm6, 32(%eax)
493 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
495 # ifdef USE_AS_STRNCAT
498 pcmpeqb (%esi), %xmm1
502 jnz L(CopyFrom1To16BytesTail)
504 pcmpeqb 16(%esi), %xmm0
507 jnz L(CopyFrom1To32Bytes)
509 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
516 movdqa (%esi, %ecx), %xmm1
517 movaps 16(%esi, %ecx), %xmm2
518 movdqu %xmm1, (%eax, %ecx)
523 jnz L(CopyFrom1To16Bytes)
524 jmp L(Unalign16BothBigN)
527 /*------------end of main part-------------------------------*/
531 L(CopyFrom1To16Bytes):
535 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
538 L(CopyFrom1To16BytesTail):
541 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
544 L(CopyFrom1To32Bytes1):
547 L(CopyFrom1To16BytesTail1):
549 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
552 L(CopyFrom1To32Bytes):
557 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
560 L(CopyFrom1To16BytesUnaligned_0):
562 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
565 L(CopyFrom1To16BytesUnaligned_16):
570 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
573 L(CopyFrom1To16BytesUnaligned_32):
576 movdqu %xmm5, 16(%eax)
579 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
581 # ifdef USE_AS_STRNCAT
584 L(CopyFrom1To16BytesExit):
585 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
590 L(CopyFrom1To16BytesCase2):
596 jb L(CopyFrom1To16BytesExit)
597 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
600 L(CopyFrom1To32BytesCase2):
607 jb L(CopyFrom1To16BytesExit)
608 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
610 L(CopyFrom1To16BytesTailCase2):
615 jb L(CopyFrom1To16BytesExit)
616 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
618 L(CopyFrom1To16BytesTail1Case2):
621 jb L(CopyFrom1To16BytesExit)
622 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
624 /* Case2 or Case3, Case3 */
627 L(CopyFrom1To16BytesCase2OrCase3):
629 jnz L(CopyFrom1To16BytesCase2)
630 L(CopyFrom1To16BytesCase3):
634 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
637 L(CopyFrom1To32BytesCase2OrCase3):
639 jnz L(CopyFrom1To32BytesCase2)
642 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
645 L(CopyFrom1To16BytesTailCase2OrCase3):
647 jnz L(CopyFrom1To16BytesTailCase2)
650 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
653 L(CopyFrom1To32Bytes1Case2OrCase3):
657 L(CopyFrom1To16BytesTail1Case2OrCase3):
659 jnz L(CopyFrom1To16BytesTail1Case2)
660 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
664 # ifdef USE_AS_STRNCAT
673 # ifdef USE_AS_STRNCAT
678 # ifdef USE_AS_STRNCAT
686 # ifdef USE_AS_STRNCAT
697 # ifdef USE_AS_STRNCAT
704 # ifdef USE_AS_STRNCAT
712 # ifdef USE_AS_STRNCAT
723 # ifdef USE_AS_STRNCAT
729 # ifdef USE_AS_STRNCAT
738 # ifdef USE_AS_STRNCAT
751 # ifdef USE_AS_STRNCAT
764 # ifdef USE_AS_STRNCAT
775 # ifdef USE_AS_STRNCAT
781 # ifdef USE_AS_STRNCAT
790 # ifdef USE_AS_STRNCAT
803 # ifdef USE_AS_STRNCAT
816 # ifdef USE_AS_STRNCAT
829 # ifdef USE_AS_STRNCAT
835 movlpd 5(%esi), %xmm1
837 movlpd %xmm1, 5(%eax)
842 # ifdef USE_AS_STRNCAT
848 movlpd 6(%esi), %xmm1
850 movlpd %xmm1, 6(%eax)
855 # ifdef USE_AS_STRNCAT
861 movlpd 7(%esi), %xmm1
863 movlpd %xmm1, 7(%eax)
868 # ifdef USE_AS_STRNCAT
879 # ifdef USE_AS_STRNCAT
885 # ifdef USE_AS_STRNCAT
894 # ifdef USE_AS_STRNCAT
907 # ifdef USE_AS_STRNCAT
920 # ifdef USE_AS_STRNCAT
933 # ifdef USE_AS_STRNCAT
940 # ifdef USE_AS_STRNCAT
950 # ifdef USE_AS_STRNCAT
956 movlpd 14(%esi), %xmm3
958 movlpd %xmm3, 14(%eax)
963 # ifdef USE_AS_STRNCAT
969 movlpd 15(%esi), %xmm3
971 movlpd %xmm3, 15(%eax)
976 # ifdef USE_AS_STRNCAT
982 movlpd 16(%esi), %xmm2
984 movlpd %xmm2, 16(%eax)
989 # ifdef USE_AS_STRNCAT
995 movlpd 16(%esi), %xmm2
996 # ifdef USE_AS_STRNCAT
1000 movlpd %xmm2, 16(%eax)
1002 mov STR3(%esp), %eax
1006 # ifdef USE_AS_STRNCAT
1011 movdqu (%esi), %xmm0
1012 movlpd 16(%esi), %xmm2
1014 movdqu %xmm0, (%eax)
1015 movlpd %xmm2, 16(%eax)
1017 mov STR3(%esp), %eax
1021 # ifdef USE_AS_STRNCAT
1026 movdqu (%esi), %xmm0
1027 movlpd 16(%esi), %xmm2
1029 movdqu %xmm0, (%eax)
1030 movlpd %xmm2, 16(%eax)
1032 mov STR3(%esp), %eax
1036 # ifdef USE_AS_STRNCAT
1041 movdqu (%esi), %xmm0
1042 movlpd 16(%esi), %xmm2
1044 movdqu %xmm0, (%eax)
1045 movlpd %xmm2, 16(%eax)
1047 mov STR3(%esp), %eax
1051 # ifdef USE_AS_STRNCAT
1056 movdqu (%esi), %xmm0
1057 movdqu 13(%esi), %xmm2
1058 movdqu %xmm0, (%eax)
1059 movdqu %xmm2, 13(%eax)
1060 mov STR3(%esp), %eax
1064 # ifdef USE_AS_STRNCAT
1069 movdqu (%esi), %xmm0
1070 movdqu 14(%esi), %xmm2
1071 movdqu %xmm0, (%eax)
1072 movdqu %xmm2, 14(%eax)
1073 mov STR3(%esp), %eax
1077 # ifdef USE_AS_STRNCAT
1082 movdqu (%esi), %xmm0
1083 movdqu 15(%esi), %xmm2
1084 movdqu %xmm0, (%eax)
1085 movdqu %xmm2, 15(%eax)
1086 mov STR3(%esp), %eax
1090 # ifdef USE_AS_STRNCAT
1095 movdqu (%esi), %xmm0
1096 movdqu 16(%esi), %xmm2
1097 movdqu %xmm0, (%eax)
1098 movdqu %xmm2, 16(%eax)
1099 mov STR3(%esp), %eax
1102 # ifdef USE_AS_STRNCAT
1105 L(UnalignedLeaveCase2OrCase3):
1107 jnz L(Unaligned64LeaveCase2)
1108 L(Unaligned64LeaveCase3):
1112 jl L(CopyFrom1To16BytesCase3)
1113 movdqu %xmm4, (%eax)
1115 jb L(CopyFrom1To16BytesCase3)
1116 movdqu %xmm5, 16(%eax)
1118 jb L(CopyFrom1To16BytesCase3)
1119 movdqu %xmm6, 32(%eax)
1121 jb L(CopyFrom1To16BytesCase3)
1122 movdqu %xmm7, 48(%eax)
1125 mov STR3(%esp), %eax
1129 L(Unaligned64LeaveCase2):
1131 pcmpeqb %xmm4, %xmm0
1132 pmovmskb %xmm0, %edx
1134 jle L(CopyFrom1To16BytesCase2OrCase3)
1136 jnz L(CopyFrom1To16Bytes)
1138 pcmpeqb %xmm5, %xmm0
1139 pmovmskb %xmm0, %edx
1140 movdqu %xmm4, (%eax)
1143 jbe L(CopyFrom1To16BytesCase2OrCase3)
1145 jnz L(CopyFrom1To16Bytes)
1147 pcmpeqb %xmm6, %xmm0
1148 pmovmskb %xmm0, %edx
1149 movdqu %xmm5, 16(%eax)
1152 jbe L(CopyFrom1To16BytesCase2OrCase3)
1154 jnz L(CopyFrom1To16Bytes)
1156 pcmpeqb %xmm7, %xmm0
1157 pmovmskb %xmm0, %edx
1158 movdqu %xmm6, 32(%eax)
1159 lea 16(%eax, %ecx), %eax
1160 lea 16(%esi, %ecx), %esi
1163 jb L(CopyFrom1To16BytesExit)
1164 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
1175 .int JMPTBL(L(Exit1), L(ExitTable))
1176 .int JMPTBL(L(Exit2), L(ExitTable))
1177 .int JMPTBL(L(Exit3), L(ExitTable))
1178 .int JMPTBL(L(Exit4), L(ExitTable))
1179 .int JMPTBL(L(Exit5), L(ExitTable))
1180 .int JMPTBL(L(Exit6), L(ExitTable))
1181 .int JMPTBL(L(Exit7), L(ExitTable))
1182 .int JMPTBL(L(Exit8), L(ExitTable))
1183 .int JMPTBL(L(Exit9), L(ExitTable))
1184 .int JMPTBL(L(Exit10), L(ExitTable))
1185 .int JMPTBL(L(Exit11), L(ExitTable))
1186 .int JMPTBL(L(Exit12), L(ExitTable))
1187 .int JMPTBL(L(Exit13), L(ExitTable))
1188 .int JMPTBL(L(Exit14), L(ExitTable))
1189 .int JMPTBL(L(Exit15), L(ExitTable))
1190 .int JMPTBL(L(Exit16), L(ExitTable))
1191 .int JMPTBL(L(Exit17), L(ExitTable))
1192 .int JMPTBL(L(Exit18), L(ExitTable))
1193 .int JMPTBL(L(Exit19), L(ExitTable))
1194 .int JMPTBL(L(Exit20), L(ExitTable))
1195 .int JMPTBL(L(Exit21), L(ExitTable))
1196 .int JMPTBL(L(Exit22), L(ExitTable))
1197 .int JMPTBL(L(Exit23), L(ExitTable))
1198 .int JMPTBL(L(Exit24), L(ExitTable))
1199 .int JMPTBL(L(Exit25), L(ExitTable))
1200 .int JMPTBL(L(Exit26), L(ExitTable))
1201 .int JMPTBL(L(Exit27), L(ExitTable))
1202 .int JMPTBL(L(Exit28), L(ExitTable))
1203 .int JMPTBL(L(Exit29), L(ExitTable))
1204 .int JMPTBL(L(Exit30), L(ExitTable))
1205 .int JMPTBL(L(Exit31), L(ExitTable))
1206 .int JMPTBL(L(Exit32), L(ExitTable))
1207 # ifdef USE_AS_STRNCAT
1208 L(ExitStrncatTable):
1209 .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
1210 .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
1211 .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
1212 .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
1213 .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
1214 .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
1215 .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
1216 .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
1217 .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
1218 .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
1219 .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
1220 .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
1221 .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
1222 .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
1223 .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
1224 .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
1225 .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
1226 .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
1227 .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
1228 .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
1229 .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
1230 .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
1231 .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
1232 .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
1233 .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
1234 .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
1235 .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
1236 .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
1237 .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
1238 .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
1239 .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
1240 .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
1241 .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))