2 Copyright (C) 2011-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
25 # define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
29 # define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
33 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
34 # define POP(REG) popl REG; CFI_POP (REG)
37 # define JMPTBL(I, B) I - B
39 /* Load an entry in a jump table into ECX and branch to it. TABLE is a
40 jump table with relative offsets. INDEX is a register contains the
41 index into the jump table. SCALE is the scale of INDEX. */
43 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
44 /* We first load PC into ECX. */ \
46 /* Get the address of the jump table. */ \
47 addl $(TABLE - .), %ecx; \
48 /* Get the entry and convert the relative offset to the \
49 absolute address. */ \
50 addl (%ecx,INDEX,SCALE), %ecx; \
51 /* We loaded the jump table and adjusted ECX. Go. */ \
52 _CET_NOTRACK jmp *%ecx
54 # define JMPTBL(I, B) I
56 /* Branch to an entry in a jump table. TABLE is a jump table with
57 absolute offsets. INDEX is a register contains the index into the
58 jump table. SCALE is the scale of INDEX. */
60 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
61 _CET_NOTRACK jmp *TABLE(,INDEX,SCALE)
65 # define STRCAT __strcat_sse2
72 # ifdef USE_AS_STRNCAT
79 # define USE_AS_STRCAT
80 # ifdef USE_AS_STRNCAT
81 # define RETURN POP(%ebx); POP(%esi); ret; CFI_PUSH(%ebx); CFI_PUSH(%esi);
83 # define RETURN POP(%esi); ret; CFI_PUSH(%esi);
91 # ifdef USE_AS_STRNCAT
107 ja L(alignment_prolog)
115 movdqu 16(%esi), %xmm6
132 pcmpeqb (%eax), %xmm0
134 movdqu 16(%esi), %xmm6
150 pcmpeqb 16(%eax), %xmm0
155 pcmpeqb 32(%eax), %xmm1
160 pcmpeqb 48(%eax), %xmm2
165 pcmpeqb 64(%eax), %xmm3
172 jmp L(StartStrcpyPart)
177 lea 16(%eax, %edx), %eax
178 jmp L(StartStrcpyPart)
183 lea 32(%eax, %edx), %eax
184 jmp L(StartStrcpyPart)
189 lea 48(%eax, %edx), %eax
190 jmp L(StartStrcpyPart)
197 jmp L(StartStrcpyPart)
207 # ifdef USE_AS_STRNCAT
209 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
212 jnz L(CopyFrom1To16BytesTail1)
216 # ifdef USE_AS_STRNCAT
218 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
221 jnz L(CopyFrom1To32Bytes1)
227 # ifdef USE_AS_STRNCAT
240 pcmpeqb (%eax), %xmm0
254 pcmpeqb 16(%eax), %xmm0
259 pcmpeqb 32(%eax), %xmm1
264 pcmpeqb 48(%eax), %xmm2
269 pcmpeqb 64(%eax), %xmm3
276 jmp L(StartStrcpyPart_1)
281 lea 16(%eax, %edx), %eax
282 jmp L(StartStrcpyPart_1)
287 lea 32(%eax, %edx), %eax
288 jmp L(StartStrcpyPart_1)
293 lea 48(%eax, %edx), %eax
294 jmp L(StartStrcpyPart_1)
303 L(StartStrcpyPart_1):
310 # ifdef USE_AS_STRNCAT
314 pcmpeqb (%esi), %xmm1
315 # ifdef USE_AS_STRNCAT
320 # ifdef USE_AS_STRNCAT
322 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
325 jnz L(CopyFrom1To16BytesTail)
327 pcmpeqb 16(%esi), %xmm0
329 # ifdef USE_AS_STRNCAT
331 jbe L(CopyFrom1To32BytesCase2OrCase3)
334 jnz L(CopyFrom1To32Bytes)
336 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
343 movdqa (%esi, %ecx), %xmm1
344 movaps 16(%esi, %ecx), %xmm2
345 movdqu %xmm1, (%eax, %ecx)
349 # ifdef USE_AS_STRNCAT
351 jbe L(CopyFrom1To16BytesCase2OrCase3)
354 jnz L(CopyFrom1To16Bytes)
355 L(Unalign16BothBigN):
356 movaps 16(%esi, %ecx), %xmm3
357 movdqu %xmm2, (%eax, %ecx)
361 # ifdef USE_AS_STRNCAT
363 jbe L(CopyFrom1To16BytesCase2OrCase3)
366 jnz L(CopyFrom1To16Bytes)
368 movaps 16(%esi, %ecx), %xmm4
369 movdqu %xmm3, (%eax, %ecx)
373 # ifdef USE_AS_STRNCAT
375 jbe L(CopyFrom1To16BytesCase2OrCase3)
378 jnz L(CopyFrom1To16Bytes)
380 movaps 16(%esi, %ecx), %xmm1
381 movdqu %xmm4, (%eax, %ecx)
385 # ifdef USE_AS_STRNCAT
387 jbe L(CopyFrom1To16BytesCase2OrCase3)
390 jnz L(CopyFrom1To16Bytes)
392 movaps 16(%esi, %ecx), %xmm2
393 movdqu %xmm1, (%eax, %ecx)
397 # ifdef USE_AS_STRNCAT
399 jbe L(CopyFrom1To16BytesCase2OrCase3)
402 jnz L(CopyFrom1To16Bytes)
404 movaps 16(%esi, %ecx), %xmm3
405 movdqu %xmm2, (%eax, %ecx)
409 # ifdef USE_AS_STRNCAT
411 jbe L(CopyFrom1To16BytesCase2OrCase3)
414 jnz L(CopyFrom1To16Bytes)
416 movdqu %xmm3, (%eax, %ecx)
418 lea 16(%esi, %ecx), %esi
422 # ifdef USE_AS_STRNCAT
423 lea 128(%ebx, %edx), %ebx
427 movaps 16(%esi), %xmm5
428 movaps 32(%esi), %xmm3
430 movaps 48(%esi), %xmm7
436 # ifdef USE_AS_STRNCAT
438 jbe L(UnalignedLeaveCase2OrCase3)
441 jnz L(Unaligned64Leave)
444 L(Unaligned64Loop_start):
447 movdqu %xmm4, -64(%eax)
450 movdqu %xmm5, -48(%eax)
451 movaps 16(%esi), %xmm5
453 movaps 32(%esi), %xmm3
454 movdqu %xmm6, -32(%eax)
456 movdqu %xmm7, -16(%eax)
457 movaps 48(%esi), %xmm7
462 # ifdef USE_AS_STRNCAT
464 jbe L(UnalignedLeaveCase2OrCase3)
467 jz L(Unaligned64Loop_start)
477 jnz L(CopyFrom1To16BytesUnaligned_0)
479 jnz L(CopyFrom1To16BytesUnaligned_16)
486 jnz L(CopyFrom1To16BytesUnaligned_32)
490 movdqu %xmm5, 16(%eax)
491 movdqu %xmm6, 32(%eax)
494 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
496 # ifdef USE_AS_STRNCAT
499 pcmpeqb (%esi), %xmm1
503 jnz L(CopyFrom1To16BytesTail)
505 pcmpeqb 16(%esi), %xmm0
508 jnz L(CopyFrom1To32Bytes)
510 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
517 movdqa (%esi, %ecx), %xmm1
518 movaps 16(%esi, %ecx), %xmm2
519 movdqu %xmm1, (%eax, %ecx)
524 jnz L(CopyFrom1To16Bytes)
525 jmp L(Unalign16BothBigN)
528 /*------------end of main part-------------------------------*/
532 L(CopyFrom1To16Bytes):
536 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
539 L(CopyFrom1To16BytesTail):
542 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
545 L(CopyFrom1To32Bytes1):
548 L(CopyFrom1To16BytesTail1):
550 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
553 L(CopyFrom1To32Bytes):
558 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
561 L(CopyFrom1To16BytesUnaligned_0):
563 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
566 L(CopyFrom1To16BytesUnaligned_16):
571 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
574 L(CopyFrom1To16BytesUnaligned_32):
577 movdqu %xmm5, 16(%eax)
580 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
582 # ifdef USE_AS_STRNCAT
585 L(CopyFrom1To16BytesExit):
586 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
591 L(CopyFrom1To16BytesCase2):
597 jb L(CopyFrom1To16BytesExit)
598 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
601 L(CopyFrom1To32BytesCase2):
608 jb L(CopyFrom1To16BytesExit)
609 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
611 L(CopyFrom1To16BytesTailCase2):
616 jb L(CopyFrom1To16BytesExit)
617 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
619 L(CopyFrom1To16BytesTail1Case2):
622 jb L(CopyFrom1To16BytesExit)
623 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
625 /* Case2 or Case3, Case3 */
628 L(CopyFrom1To16BytesCase2OrCase3):
630 jnz L(CopyFrom1To16BytesCase2)
631 L(CopyFrom1To16BytesCase3):
635 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
638 L(CopyFrom1To32BytesCase2OrCase3):
640 jnz L(CopyFrom1To32BytesCase2)
643 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
646 L(CopyFrom1To16BytesTailCase2OrCase3):
648 jnz L(CopyFrom1To16BytesTailCase2)
651 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
654 L(CopyFrom1To32Bytes1Case2OrCase3):
658 L(CopyFrom1To16BytesTail1Case2OrCase3):
660 jnz L(CopyFrom1To16BytesTail1Case2)
661 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
665 # ifdef USE_AS_STRNCAT
674 # ifdef USE_AS_STRNCAT
679 # ifdef USE_AS_STRNCAT
687 # ifdef USE_AS_STRNCAT
698 # ifdef USE_AS_STRNCAT
705 # ifdef USE_AS_STRNCAT
713 # ifdef USE_AS_STRNCAT
724 # ifdef USE_AS_STRNCAT
730 # ifdef USE_AS_STRNCAT
739 # ifdef USE_AS_STRNCAT
752 # ifdef USE_AS_STRNCAT
765 # ifdef USE_AS_STRNCAT
776 # ifdef USE_AS_STRNCAT
782 # ifdef USE_AS_STRNCAT
791 # ifdef USE_AS_STRNCAT
804 # ifdef USE_AS_STRNCAT
817 # ifdef USE_AS_STRNCAT
830 # ifdef USE_AS_STRNCAT
836 movlpd 5(%esi), %xmm1
838 movlpd %xmm1, 5(%eax)
843 # ifdef USE_AS_STRNCAT
849 movlpd 6(%esi), %xmm1
851 movlpd %xmm1, 6(%eax)
856 # ifdef USE_AS_STRNCAT
862 movlpd 7(%esi), %xmm1
864 movlpd %xmm1, 7(%eax)
869 # ifdef USE_AS_STRNCAT
880 # ifdef USE_AS_STRNCAT
886 # ifdef USE_AS_STRNCAT
895 # ifdef USE_AS_STRNCAT
908 # ifdef USE_AS_STRNCAT
921 # ifdef USE_AS_STRNCAT
934 # ifdef USE_AS_STRNCAT
941 # ifdef USE_AS_STRNCAT
951 # ifdef USE_AS_STRNCAT
957 movlpd 14(%esi), %xmm3
959 movlpd %xmm3, 14(%eax)
964 # ifdef USE_AS_STRNCAT
970 movlpd 15(%esi), %xmm3
972 movlpd %xmm3, 15(%eax)
977 # ifdef USE_AS_STRNCAT
983 movlpd 16(%esi), %xmm2
985 movlpd %xmm2, 16(%eax)
990 # ifdef USE_AS_STRNCAT
996 movlpd 16(%esi), %xmm2
997 # ifdef USE_AS_STRNCAT
1000 movdqu %xmm0, (%eax)
1001 movlpd %xmm2, 16(%eax)
1003 mov STR3(%esp), %eax
1007 # ifdef USE_AS_STRNCAT
1012 movdqu (%esi), %xmm0
1013 movlpd 16(%esi), %xmm2
1015 movdqu %xmm0, (%eax)
1016 movlpd %xmm2, 16(%eax)
1018 mov STR3(%esp), %eax
1022 # ifdef USE_AS_STRNCAT
1027 movdqu (%esi), %xmm0
1028 movlpd 16(%esi), %xmm2
1030 movdqu %xmm0, (%eax)
1031 movlpd %xmm2, 16(%eax)
1033 mov STR3(%esp), %eax
1037 # ifdef USE_AS_STRNCAT
1042 movdqu (%esi), %xmm0
1043 movlpd 16(%esi), %xmm2
1045 movdqu %xmm0, (%eax)
1046 movlpd %xmm2, 16(%eax)
1048 mov STR3(%esp), %eax
1052 # ifdef USE_AS_STRNCAT
1057 movdqu (%esi), %xmm0
1058 movdqu 13(%esi), %xmm2
1059 movdqu %xmm0, (%eax)
1060 movdqu %xmm2, 13(%eax)
1061 mov STR3(%esp), %eax
1065 # ifdef USE_AS_STRNCAT
1070 movdqu (%esi), %xmm0
1071 movdqu 14(%esi), %xmm2
1072 movdqu %xmm0, (%eax)
1073 movdqu %xmm2, 14(%eax)
1074 mov STR3(%esp), %eax
1078 # ifdef USE_AS_STRNCAT
1083 movdqu (%esi), %xmm0
1084 movdqu 15(%esi), %xmm2
1085 movdqu %xmm0, (%eax)
1086 movdqu %xmm2, 15(%eax)
1087 mov STR3(%esp), %eax
1091 # ifdef USE_AS_STRNCAT
1096 movdqu (%esi), %xmm0
1097 movdqu 16(%esi), %xmm2
1098 movdqu %xmm0, (%eax)
1099 movdqu %xmm2, 16(%eax)
1100 mov STR3(%esp), %eax
1103 # ifdef USE_AS_STRNCAT
1106 L(UnalignedLeaveCase2OrCase3):
1108 jnz L(Unaligned64LeaveCase2)
1109 L(Unaligned64LeaveCase3):
1113 jl L(CopyFrom1To16BytesCase3)
1114 movdqu %xmm4, (%eax)
1116 jb L(CopyFrom1To16BytesCase3)
1117 movdqu %xmm5, 16(%eax)
1119 jb L(CopyFrom1To16BytesCase3)
1120 movdqu %xmm6, 32(%eax)
1122 jb L(CopyFrom1To16BytesCase3)
1123 movdqu %xmm7, 48(%eax)
1126 mov STR3(%esp), %eax
1130 L(Unaligned64LeaveCase2):
1132 pcmpeqb %xmm4, %xmm0
1133 pmovmskb %xmm0, %edx
1135 jle L(CopyFrom1To16BytesCase2OrCase3)
1137 jnz L(CopyFrom1To16Bytes)
1139 pcmpeqb %xmm5, %xmm0
1140 pmovmskb %xmm0, %edx
1141 movdqu %xmm4, (%eax)
1144 jbe L(CopyFrom1To16BytesCase2OrCase3)
1146 jnz L(CopyFrom1To16Bytes)
1148 pcmpeqb %xmm6, %xmm0
1149 pmovmskb %xmm0, %edx
1150 movdqu %xmm5, 16(%eax)
1153 jbe L(CopyFrom1To16BytesCase2OrCase3)
1155 jnz L(CopyFrom1To16Bytes)
1157 pcmpeqb %xmm7, %xmm0
1158 pmovmskb %xmm0, %edx
1159 movdqu %xmm6, 32(%eax)
1160 lea 16(%eax, %ecx), %eax
1161 lea 16(%esi, %ecx), %esi
1164 jb L(CopyFrom1To16BytesExit)
1165 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
1176 .int JMPTBL(L(Exit1), L(ExitTable))
1177 .int JMPTBL(L(Exit2), L(ExitTable))
1178 .int JMPTBL(L(Exit3), L(ExitTable))
1179 .int JMPTBL(L(Exit4), L(ExitTable))
1180 .int JMPTBL(L(Exit5), L(ExitTable))
1181 .int JMPTBL(L(Exit6), L(ExitTable))
1182 .int JMPTBL(L(Exit7), L(ExitTable))
1183 .int JMPTBL(L(Exit8), L(ExitTable))
1184 .int JMPTBL(L(Exit9), L(ExitTable))
1185 .int JMPTBL(L(Exit10), L(ExitTable))
1186 .int JMPTBL(L(Exit11), L(ExitTable))
1187 .int JMPTBL(L(Exit12), L(ExitTable))
1188 .int JMPTBL(L(Exit13), L(ExitTable))
1189 .int JMPTBL(L(Exit14), L(ExitTable))
1190 .int JMPTBL(L(Exit15), L(ExitTable))
1191 .int JMPTBL(L(Exit16), L(ExitTable))
1192 .int JMPTBL(L(Exit17), L(ExitTable))
1193 .int JMPTBL(L(Exit18), L(ExitTable))
1194 .int JMPTBL(L(Exit19), L(ExitTable))
1195 .int JMPTBL(L(Exit20), L(ExitTable))
1196 .int JMPTBL(L(Exit21), L(ExitTable))
1197 .int JMPTBL(L(Exit22), L(ExitTable))
1198 .int JMPTBL(L(Exit23), L(ExitTable))
1199 .int JMPTBL(L(Exit24), L(ExitTable))
1200 .int JMPTBL(L(Exit25), L(ExitTable))
1201 .int JMPTBL(L(Exit26), L(ExitTable))
1202 .int JMPTBL(L(Exit27), L(ExitTable))
1203 .int JMPTBL(L(Exit28), L(ExitTable))
1204 .int JMPTBL(L(Exit29), L(ExitTable))
1205 .int JMPTBL(L(Exit30), L(ExitTable))
1206 .int JMPTBL(L(Exit31), L(ExitTable))
1207 .int JMPTBL(L(Exit32), L(ExitTable))
1208 # ifdef USE_AS_STRNCAT
1209 L(ExitStrncatTable):
1210 .int JMPTBL(L(StrncatExit0), L(ExitStrncatTable))
1211 .int JMPTBL(L(StrncatExit1), L(ExitStrncatTable))
1212 .int JMPTBL(L(StrncatExit2), L(ExitStrncatTable))
1213 .int JMPTBL(L(StrncatExit3), L(ExitStrncatTable))
1214 .int JMPTBL(L(StrncatExit4), L(ExitStrncatTable))
1215 .int JMPTBL(L(StrncatExit5), L(ExitStrncatTable))
1216 .int JMPTBL(L(StrncatExit6), L(ExitStrncatTable))
1217 .int JMPTBL(L(StrncatExit7), L(ExitStrncatTable))
1218 .int JMPTBL(L(StrncatExit8), L(ExitStrncatTable))
1219 .int JMPTBL(L(StrncatExit9), L(ExitStrncatTable))
1220 .int JMPTBL(L(StrncatExit10), L(ExitStrncatTable))
1221 .int JMPTBL(L(StrncatExit11), L(ExitStrncatTable))
1222 .int JMPTBL(L(StrncatExit12), L(ExitStrncatTable))
1223 .int JMPTBL(L(StrncatExit13), L(ExitStrncatTable))
1224 .int JMPTBL(L(StrncatExit14), L(ExitStrncatTable))
1225 .int JMPTBL(L(StrncatExit15), L(ExitStrncatTable))
1226 .int JMPTBL(L(StrncatExit16), L(ExitStrncatTable))
1227 .int JMPTBL(L(StrncatExit17), L(ExitStrncatTable))
1228 .int JMPTBL(L(StrncatExit18), L(ExitStrncatTable))
1229 .int JMPTBL(L(StrncatExit19), L(ExitStrncatTable))
1230 .int JMPTBL(L(StrncatExit20), L(ExitStrncatTable))
1231 .int JMPTBL(L(StrncatExit21), L(ExitStrncatTable))
1232 .int JMPTBL(L(StrncatExit22), L(ExitStrncatTable))
1233 .int JMPTBL(L(StrncatExit23), L(ExitStrncatTable))
1234 .int JMPTBL(L(StrncatExit24), L(ExitStrncatTable))
1235 .int JMPTBL(L(StrncatExit25), L(ExitStrncatTable))
1236 .int JMPTBL(L(StrncatExit26), L(ExitStrncatTable))
1237 .int JMPTBL(L(StrncatExit27), L(ExitStrncatTable))
1238 .int JMPTBL(L(StrncatExit28), L(ExitStrncatTable))
1239 .int JMPTBL(L(StrncatExit29), L(ExitStrncatTable))
1240 .int JMPTBL(L(StrncatExit30), L(ExitStrncatTable))
1241 .int JMPTBL(L(StrncatExit31), L(ExitStrncatTable))
1242 .int JMPTBL(L(StrncatExit32), L(ExitStrncatTable))