1 /* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
25 # define CFI_PUSH(REG) \
26 cfi_adjust_cfa_offset (4); \
27 cfi_rel_offset (REG, 0)
29 # define CFI_POP(REG) \
30 cfi_adjust_cfa_offset (-4); \
33 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
34 # define POP(REG) popl REG; CFI_POP (REG)
37 # define STRCPY __strcpy_sse2
44 # ifdef USE_AS_STRNCPY
46 # define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
47 # define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; \
48 CFI_PUSH(%ebx); CFI_PUSH(%esi); CFI_PUSH(%edi);
51 # define JMPTBL(I, B) I - B
53 /* Load an entry in a jump table into ECX and branch to it. TABLE is a
54 jump table with relative offsets.
55 INDEX is a register contains the index into the jump table.
56 SCALE is the scale of INDEX. */
58 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
59 /* We first load PC into ECX. */ \
61 /* Get the address of the jump table. */ \
62 addl $(TABLE - .), %ecx; \
63 /* Get the entry and convert the relative offset to the \
64 absolute address. */ \
65 addl (%ecx,INDEX,SCALE), %ecx; \
66 /* We loaded the jump table and adjusted ECX. Go. */ \
69 # define JMPTBL(I, B) I
71 /* Branch to an entry in a jump table. TABLE is a jump table with
72 absolute offsets. INDEX is a register contains the index into the
73 jump table. SCALE is the scale of INDEX. */
75 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
76 jmp *TABLE(,INDEX,SCALE)
89 # ifndef USE_AS_STPCPY
90 mov %edi, %eax /* save result */
93 jz L(SourceStringAlignmentZero)
103 # ifdef USE_AS_STPCPY
105 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
108 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
111 jnz L(CopyFrom1To16BytesTail)
113 pcmpeqb 16(%esi), %xmm0
115 # ifdef USE_AS_STPCPY
117 jbe L(CopyFrom1To32BytesCase2OrCase3)
120 jbe L(CopyFrom1To32BytesCase2OrCase3)
123 jnz L(CopyFrom1To32Bytes)
125 movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
130 /* If source address alignment != destination address alignment */
134 movdqa (%esi, %ecx), %xmm1
135 movaps 16(%esi, %ecx), %xmm2
136 movdqu %xmm1, (%edi, %ecx)
141 jbe L(CopyFrom1To16BytesCase2OrCase3)
143 jnz L(CopyFrom1To16BytesUnalignedXmm2)
145 movaps 16(%esi, %ecx), %xmm3
146 movdqu %xmm2, (%edi, %ecx)
151 jbe L(CopyFrom1To16BytesCase2OrCase3)
153 jnz L(CopyFrom1To16BytesUnalignedXmm3)
155 movaps 16(%esi, %ecx), %xmm4
156 movdqu %xmm3, (%edi, %ecx)
161 jbe L(CopyFrom1To16BytesCase2OrCase3)
163 jnz L(CopyFrom1To16BytesUnalignedXmm4)
165 movaps 16(%esi, %ecx), %xmm1
166 movdqu %xmm4, (%edi, %ecx)
171 jbe L(CopyFrom1To16BytesCase2OrCase3)
173 jnz L(CopyFrom1To16BytesUnalignedXmm1)
175 movaps 16(%esi, %ecx), %xmm2
176 movdqu %xmm1, (%edi, %ecx)
181 jbe L(CopyFrom1To16BytesCase2OrCase3)
183 jnz L(CopyFrom1To16BytesUnalignedXmm2)
185 movaps 16(%esi, %ecx), %xmm3
186 movdqu %xmm2, (%edi, %ecx)
191 jbe L(CopyFrom1To16BytesCase2OrCase3)
193 jnz L(CopyFrom1To16BytesUnalignedXmm3)
195 movdqu %xmm3, (%edi, %ecx)
197 lea 16(%esi, %ecx), %esi
201 lea 128(%ebx, %edx), %ebx
206 movaps 16(%esi), %xmm5
207 movaps 32(%esi), %xmm3
209 movaps 48(%esi), %xmm7
216 jbe L(UnalignedLeaveCase2OrCase3)
218 jnz L(Unaligned64Leave)
219 L(Unaligned64Loop_start):
222 movdqu %xmm4, -64(%edi)
225 movdqu %xmm5, -48(%edi)
226 movaps 16(%esi), %xmm5
228 movaps 32(%esi), %xmm3
229 movdqu %xmm6, -32(%edi)
231 movdqu %xmm7, -16(%edi)
232 movaps 48(%esi), %xmm7
238 jbe L(UnalignedLeaveCase2OrCase3)
240 jz L(Unaligned64Loop_start)
249 jnz L(CopyFrom1To16BytesUnaligned_0)
251 jnz L(CopyFrom1To16BytesUnaligned_16)
258 jnz L(CopyFrom1To16BytesUnaligned_32)
262 movdqu %xmm5, 16(%edi)
263 movdqu %xmm6, 32(%edi)
264 # ifdef USE_AS_STPCPY
265 lea 48(%edi, %edx), %eax
267 movdqu %xmm7, 48(%edi)
270 lea 49(%edi, %edx), %edi
271 jmp L(StrncpyFillTailWithZero)
273 /* If source address alignment == destination address alignment */
275 L(SourceStringAlignmentZero):
280 # ifdef USE_AS_STPCPY
282 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
285 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
288 jnz L(CopyFrom1To16BytesTail1)
290 pcmpeqb 16(%esi), %xmm0
293 # ifdef USE_AS_STPCPY
295 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
298 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
301 jnz L(CopyFrom1To32Bytes1)
305 /*-----------------End of main part---------------------------*/
309 L(CopyFrom1To16BytesTail):
313 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
316 L(CopyFrom1To32Bytes1):
320 L(CopyFrom1To16BytesTail1):
322 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
325 L(CopyFrom1To32Bytes):
331 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
334 L(CopyFrom1To16BytesUnaligned_0):
336 # ifdef USE_AS_STPCPY
337 lea (%edi, %edx), %eax
342 lea 1(%edi, %edx), %edi
343 jmp L(StrncpyFillTailWithZero)
346 L(CopyFrom1To16BytesUnaligned_16):
349 # ifdef USE_AS_STPCPY
350 lea 16(%edi, %edx), %eax
352 movdqu %xmm5, 16(%edi)
355 lea 17(%edi, %edx), %edi
356 jmp L(StrncpyFillTailWithZero)
359 L(CopyFrom1To16BytesUnaligned_32):
362 movdqu %xmm5, 16(%edi)
363 # ifdef USE_AS_STPCPY
364 lea 32(%edi, %edx), %eax
366 movdqu %xmm6, 32(%edi)
369 lea 33(%edi, %edx), %edi
370 jmp L(StrncpyFillTailWithZero)
373 L(CopyFrom1To16BytesUnalignedXmm6):
374 movdqu %xmm6, (%edi, %ecx)
375 jmp L(CopyFrom1To16BytesXmmExit)
378 L(CopyFrom1To16BytesUnalignedXmm5):
379 movdqu %xmm5, (%edi, %ecx)
380 jmp L(CopyFrom1To16BytesXmmExit)
383 L(CopyFrom1To16BytesUnalignedXmm4):
384 movdqu %xmm4, (%edi, %ecx)
385 jmp L(CopyFrom1To16BytesXmmExit)
388 L(CopyFrom1To16BytesUnalignedXmm3):
389 movdqu %xmm3, (%edi, %ecx)
390 jmp L(CopyFrom1To16BytesXmmExit)
393 L(CopyFrom1To16BytesUnalignedXmm1):
394 movdqu %xmm1, (%edi, %ecx)
395 jmp L(CopyFrom1To16BytesXmmExit)
398 L(CopyFrom1To16BytesExit):
399 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
404 L(CopyFrom1To16BytesCase2):
410 jb L(CopyFrom1To16BytesExit)
411 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
414 L(CopyFrom1To32BytesCase2):
421 jb L(CopyFrom1To16BytesExit)
422 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
424 L(CopyFrom1To16BytesTailCase2):
429 jb L(CopyFrom1To16BytesExit)
430 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
432 L(CopyFrom1To16BytesTail1Case2):
435 jb L(CopyFrom1To16BytesExit)
436 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
438 /* Case2 or Case3, Case3 */
441 L(CopyFrom1To16BytesCase2OrCase3):
443 jnz L(CopyFrom1To16BytesCase2)
444 L(CopyFrom1To16BytesCase3):
448 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
451 L(CopyFrom1To32BytesCase2OrCase3):
453 jnz L(CopyFrom1To32BytesCase2)
456 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
459 L(CopyFrom1To16BytesTailCase2OrCase3):
461 jnz L(CopyFrom1To16BytesTailCase2)
464 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
467 L(CopyFrom1To32Bytes1Case2OrCase3):
471 L(CopyFrom1To16BytesTail1Case2OrCase3):
473 jnz L(CopyFrom1To16BytesTail1Case2)
474 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
478 # ifdef USE_AS_STPCPY
486 # ifdef USE_AS_STPCPY
491 jnz L(StrncpyFillTailWithZero)
498 # ifdef USE_AS_STPCPY
503 jnz L(StrncpyFillTailWithZero)
511 # ifdef USE_AS_STPCPY
516 jnz L(StrncpyFillTailWithZero)
523 # ifdef USE_AS_STPCPY
528 jnz L(StrncpyFillTailWithZero)
536 # ifdef USE_AS_STPCPY
541 jnz L(StrncpyFillTailWithZero)
550 # ifdef USE_AS_STPCPY
555 jnz L(StrncpyFillTailWithZero)
564 # ifdef USE_AS_STPCPY
569 jnz L(StrncpyFillTailWithZero)
576 # ifdef USE_AS_STPCPY
581 jnz L(StrncpyFillTailWithZero)
589 # ifdef USE_AS_STPCPY
594 jnz L(StrncpyFillTailWithZero)
603 # ifdef USE_AS_STPCPY
608 jnz L(StrncpyFillTailWithZero)
617 # ifdef USE_AS_STPCPY
622 jnz L(StrncpyFillTailWithZero)
631 # ifdef USE_AS_STPCPY
636 jnz L(StrncpyFillTailWithZero)
642 movlpd 5(%esi), %xmm1
644 movlpd %xmm1, 5(%edi)
645 # ifdef USE_AS_STPCPY
650 jnz L(StrncpyFillTailWithZero)
656 movlpd 6(%esi), %xmm1
658 movlpd %xmm1, 6(%edi)
659 # ifdef USE_AS_STPCPY
664 jnz L(StrncpyFillTailWithZero)
670 movlpd 7(%esi), %xmm1
672 movlpd %xmm1, 7(%edi)
673 # ifdef USE_AS_STPCPY
678 jnz L(StrncpyFillTailWithZero)
685 # ifdef USE_AS_STPCPY
690 jnz L(StrncpyFillTailWithZero)
698 # ifdef USE_AS_STPCPY
703 jnz L(StrncpyFillTailWithZero)
712 # ifdef USE_AS_STPCPY
717 jnz L(StrncpyFillTailWithZero)
726 # ifdef USE_AS_STPCPY
731 jnz L(StrncpyFillTailWithZero)
740 # ifdef USE_AS_STPCPY
745 jnz L(StrncpyFillTailWithZero)
755 # ifdef USE_AS_STPCPY
760 jnz L(StrncpyFillTailWithZero)
766 movlpd 14(%esi), %xmm3
768 movlpd %xmm3, 14(%edi)
769 # ifdef USE_AS_STPCPY
774 jnz L(StrncpyFillTailWithZero)
780 movlpd 15(%esi), %xmm3
782 movlpd %xmm3, 15(%edi)
783 # ifdef USE_AS_STPCPY
788 jnz L(StrncpyFillTailWithZero)
794 movlpd 16(%esi), %xmm2
796 movlpd %xmm2, 16(%edi)
797 # ifdef USE_AS_STPCPY
802 jnz L(StrncpyFillTailWithZero)
808 movlpd 16(%esi), %xmm2
810 movlpd %xmm2, 16(%edi)
812 # ifdef USE_AS_STPCPY
817 jnz L(StrncpyFillTailWithZero)
823 movlpd 16(%esi), %xmm2
826 movlpd %xmm2, 16(%edi)
828 # ifdef USE_AS_STPCPY
833 jnz L(StrncpyFillTailWithZero)
839 movlpd 16(%esi), %xmm2
842 movlpd %xmm2, 16(%edi)
844 # ifdef USE_AS_STPCPY
849 jnz L(StrncpyFillTailWithZero)
855 movlpd 16(%esi), %xmm2
858 movlpd %xmm2, 16(%edi)
860 # ifdef USE_AS_STPCPY
865 jnz L(StrncpyFillTailWithZero)
871 movdqu 13(%esi), %xmm2
873 movdqu %xmm2, 13(%edi)
874 # ifdef USE_AS_STPCPY
879 jnz L(StrncpyFillTailWithZero)
885 movdqu 14(%esi), %xmm2
887 movdqu %xmm2, 14(%edi)
888 # ifdef USE_AS_STPCPY
893 jnz L(StrncpyFillTailWithZero)
900 movdqu 15(%esi), %xmm2
902 movdqu %xmm2, 15(%edi)
903 # ifdef USE_AS_STPCPY
908 jnz L(StrncpyFillTailWithZero)
914 movdqu 16(%esi), %xmm2
916 movdqu %xmm2, 16(%edi)
917 # ifdef USE_AS_STPCPY
922 jnz L(StrncpyFillTailWithZero)
929 # ifdef USE_AS_STPCPY
938 # ifdef USE_AS_STPCPY
949 # ifdef USE_AS_STPCPY
958 # ifdef USE_AS_STPCPY
969 # ifdef USE_AS_STPCPY
980 # ifdef USE_AS_STPCPY
991 # ifdef USE_AS_STPCPY
1000 # ifdef USE_AS_STPCPY
1007 movlpd (%esi), %xmm0
1009 movlpd %xmm0, (%edi)
1011 # ifdef USE_AS_STPCPY
1018 movlpd (%esi), %xmm0
1020 movlpd %xmm0, (%edi)
1022 # ifdef USE_AS_STPCPY
1029 movlpd (%esi), %xmm0
1031 movlpd %xmm0, (%edi)
1033 # ifdef USE_AS_STPCPY
1040 movlpd (%esi), %xmm0
1042 movlpd %xmm0, (%edi)
1044 # ifdef USE_AS_STPCPY
1051 movlpd (%esi), %xmm0
1052 movlpd 5(%esi), %xmm1
1053 movlpd %xmm0, (%edi)
1054 movlpd %xmm1, 5(%edi)
1055 # ifdef USE_AS_STPCPY
1062 movlpd (%esi), %xmm0
1063 movlpd 6(%esi), %xmm1
1064 movlpd %xmm0, (%edi)
1065 movlpd %xmm1, 6(%edi)
1066 # ifdef USE_AS_STPCPY
1073 movlpd (%esi), %xmm0
1074 movlpd 7(%esi), %xmm1
1075 movlpd %xmm0, (%edi)
1076 movlpd %xmm1, 7(%edi)
1077 # ifdef USE_AS_STPCPY
1084 movdqu (%esi), %xmm0
1085 movdqu %xmm0, (%edi)
1086 # ifdef USE_AS_STPCPY
1093 movdqu (%esi), %xmm0
1095 movdqu %xmm0, (%edi)
1097 # ifdef USE_AS_STPCPY
1104 movdqu (%esi), %xmm0
1106 movdqu %xmm0, (%edi)
1108 # ifdef USE_AS_STPCPY
1115 movdqu (%esi), %xmm0
1117 movdqu %xmm0, (%edi)
1119 # ifdef USE_AS_STPCPY
1126 movdqu (%esi), %xmm0
1128 movdqu %xmm0, (%edi)
1130 # ifdef USE_AS_STPCPY
1137 movdqu (%esi), %xmm0
1140 movdqu %xmm0, (%edi)
1143 # ifdef USE_AS_STPCPY
1150 movdqu (%esi), %xmm0
1151 movlpd 14(%esi), %xmm3
1152 movdqu %xmm0, (%edi)
1153 movlpd %xmm3, 14(%edi)
1154 # ifdef USE_AS_STPCPY
1161 movdqu (%esi), %xmm0
1162 movlpd 15(%esi), %xmm3
1163 movdqu %xmm0, (%edi)
1164 movlpd %xmm3, 15(%edi)
1165 # ifdef USE_AS_STPCPY
1172 movdqu (%esi), %xmm0
1173 movlpd 16(%esi), %xmm2
1174 movdqu %xmm0, (%edi)
1175 movlpd %xmm2, 16(%edi)
1176 # ifdef USE_AS_STPCPY
1183 movdqu (%esi), %xmm0
1184 movlpd 16(%esi), %xmm2
1186 movdqu %xmm0, (%edi)
1187 movlpd %xmm2, 16(%edi)
1189 # ifdef USE_AS_STPCPY
1196 movdqu (%esi), %xmm0
1197 movlpd 16(%esi), %xmm2
1199 movdqu %xmm0, (%edi)
1200 movlpd %xmm2, 16(%edi)
1202 # ifdef USE_AS_STPCPY
1209 movdqu (%esi), %xmm0
1210 movlpd 16(%esi), %xmm2
1212 movdqu %xmm0, (%edi)
1213 movlpd %xmm2, 16(%edi)
1215 # ifdef USE_AS_STPCPY
1222 movdqu (%esi), %xmm0
1223 movlpd 16(%esi), %xmm2
1225 movdqu %xmm0, (%edi)
1226 movlpd %xmm2, 16(%edi)
1228 # ifdef USE_AS_STPCPY
1235 movdqu (%esi), %xmm0
1236 movdqu 13(%esi), %xmm2
1237 movdqu %xmm0, (%edi)
1238 movdqu %xmm2, 13(%edi)
1239 # ifdef USE_AS_STPCPY
1246 movdqu (%esi), %xmm0
1247 movdqu 14(%esi), %xmm2
1248 movdqu %xmm0, (%edi)
1249 movdqu %xmm2, 14(%edi)
1250 # ifdef USE_AS_STPCPY
1257 movdqu (%esi), %xmm0
1258 movdqu 15(%esi), %xmm2
1259 movdqu %xmm0, (%edi)
1260 movdqu %xmm2, 15(%edi)
1261 # ifdef USE_AS_STPCPY
1268 movdqu (%esi), %xmm0
1269 movdqu 16(%esi), %xmm2
1270 movdqu %xmm0, (%edi)
1271 movdqu %xmm2, 16(%edi)
1272 # ifdef USE_AS_STPCPY
1279 movdqu (%esi), %xmm0
1280 movdqu 16(%esi), %xmm2
1282 movdqu %xmm0, (%edi)
1283 movdqu %xmm2, 16(%edi)
1325 movlpd %xmm0, -1(%edi)
1330 movlpd %xmm0, (%edi)
1335 movlpd %xmm0, (%edi)
1341 movlpd %xmm0, (%edi)
1347 movlpd %xmm0, (%edi)
1353 movlpd %xmm0, (%edi)
1359 movlpd %xmm0, (%edi)
1360 movlpd %xmm0, 5(%edi)
1365 movlpd %xmm0, (%edi)
1366 movlpd %xmm0, 6(%edi)
1371 movdqu %xmm0, -1(%edi)
1376 movdqu %xmm0, (%edi)
1380 L(CopyFrom1To16BytesUnalignedXmm2):
1381 movdqu %xmm2, (%edi, %ecx)
1384 L(CopyFrom1To16BytesXmmExit):
1388 # ifdef USE_AS_STPCPY
1389 lea (%edi, %edx), %eax
1392 lea 1(%edi, %edx), %edi
1395 L(StrncpyFillTailWithZero):
1399 jbe L(StrncpyFillExit)
1401 movdqu %xmm0, (%edi)
1409 jb L(StrncpyFillLess64)
1411 L(StrncpyFillLoopMovdqa):
1412 movdqa %xmm0, (%edi)
1413 movdqa %xmm0, 16(%edi)
1414 movdqa %xmm0, 32(%edi)
1415 movdqa %xmm0, 48(%edi)
1418 jae L(StrncpyFillLoopMovdqa)
1420 L(StrncpyFillLess64):
1422 jl L(StrncpyFillLess32)
1423 movdqa %xmm0, (%edi)
1424 movdqa %xmm0, 16(%edi)
1427 jl L(StrncpyFillExit)
1428 movdqa %xmm0, (%edi)
1430 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1432 L(StrncpyFillLess32):
1434 jl L(StrncpyFillExit)
1435 movdqa %xmm0, (%edi)
1437 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1441 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1444 L(UnalignedLeaveCase2OrCase3):
1446 jnz L(Unaligned64LeaveCase2)
1447 L(Unaligned64LeaveCase3):
1451 jl L(CopyFrom1To16BytesCase3)
1452 movdqu %xmm4, (%edi)
1454 jb L(CopyFrom1To16BytesCase3)
1455 movdqu %xmm5, 16(%edi)
1457 jb L(CopyFrom1To16BytesCase3)
1458 movdqu %xmm6, 32(%edi)
1460 jb L(CopyFrom1To16BytesCase3)
1461 movdqu %xmm7, 48(%edi)
1462 # ifdef USE_AS_STPCPY
1468 L(Unaligned64LeaveCase2):
1470 pcmpeqb %xmm4, %xmm0
1471 pmovmskb %xmm0, %edx
1473 jle L(CopyFrom1To16BytesCase2OrCase3)
1475 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1477 pcmpeqb %xmm5, %xmm0
1478 pmovmskb %xmm0, %edx
1479 movdqu %xmm4, (%edi)
1482 jbe L(CopyFrom1To16BytesCase2OrCase3)
1484 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1486 pcmpeqb %xmm6, %xmm0
1487 pmovmskb %xmm0, %edx
1488 movdqu %xmm5, 16(%edi)
1491 jbe L(CopyFrom1To16BytesCase2OrCase3)
1493 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1495 pcmpeqb %xmm7, %xmm0
1496 pmovmskb %xmm0, %edx
1497 movdqu %xmm6, 32(%edi)
1498 lea 16(%edi, %ecx), %edi
1499 lea 16(%esi, %ecx), %esi
1502 jb L(CopyFrom1To16BytesExit)
1503 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
1515 .int JMPTBL(L(Exit1), L(ExitTable))
1516 .int JMPTBL(L(Exit2), L(ExitTable))
1517 .int JMPTBL(L(Exit3), L(ExitTable))
1518 .int JMPTBL(L(Exit4), L(ExitTable))
1519 .int JMPTBL(L(Exit5), L(ExitTable))
1520 .int JMPTBL(L(Exit6), L(ExitTable))
1521 .int JMPTBL(L(Exit7), L(ExitTable))
1522 .int JMPTBL(L(Exit8), L(ExitTable))
1523 .int JMPTBL(L(Exit9), L(ExitTable))
1524 .int JMPTBL(L(Exit10), L(ExitTable))
1525 .int JMPTBL(L(Exit11), L(ExitTable))
1526 .int JMPTBL(L(Exit12), L(ExitTable))
1527 .int JMPTBL(L(Exit13), L(ExitTable))
1528 .int JMPTBL(L(Exit14), L(ExitTable))
1529 .int JMPTBL(L(Exit15), L(ExitTable))
1530 .int JMPTBL(L(Exit16), L(ExitTable))
1531 .int JMPTBL(L(Exit17), L(ExitTable))
1532 .int JMPTBL(L(Exit18), L(ExitTable))
1533 .int JMPTBL(L(Exit19), L(ExitTable))
1534 .int JMPTBL(L(Exit20), L(ExitTable))
1535 .int JMPTBL(L(Exit21), L(ExitTable))
1536 .int JMPTBL(L(Exit22), L(ExitTable))
1537 .int JMPTBL(L(Exit23), L(ExitTable))
1538 .int JMPTBL(L(Exit24), L(ExitTable))
1539 .int JMPTBL(L(Exit25), L(ExitTable))
1540 .int JMPTBL(L(Exit26), L(ExitTable))
1541 .int JMPTBL(L(Exit27), L(ExitTable))
1542 .int JMPTBL(L(Exit28), L(ExitTable))
1543 .int JMPTBL(L(Exit29), L(ExitTable))
1544 .int JMPTBL(L(Exit30), L(ExitTable))
1545 .int JMPTBL(L(Exit31), L(ExitTable))
1546 .int JMPTBL(L(Exit32), L(ExitTable))
1548 L(ExitStrncpyTable):
1549 .int JMPTBL(L(Exit0), L(ExitStrncpyTable))
1550 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1551 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1552 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1553 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1554 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1555 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1556 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1557 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1558 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1559 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1560 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1561 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1562 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1563 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1564 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1565 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1566 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1567 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1568 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1569 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1570 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1571 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1572 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1573 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1574 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1575 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1576 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1577 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1578 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1579 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1580 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1581 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1582 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1586 .int JMPTBL(L(Fill0), L(FillTable))
1587 .int JMPTBL(L(Fill1), L(FillTable))
1588 .int JMPTBL(L(Fill2), L(FillTable))
1589 .int JMPTBL(L(Fill3), L(FillTable))
1590 .int JMPTBL(L(Fill4), L(FillTable))
1591 .int JMPTBL(L(Fill5), L(FillTable))
1592 .int JMPTBL(L(Fill6), L(FillTable))
1593 .int JMPTBL(L(Fill7), L(FillTable))
1594 .int JMPTBL(L(Fill8), L(FillTable))
1595 .int JMPTBL(L(Fill9), L(FillTable))
1596 .int JMPTBL(L(Fill10), L(FillTable))
1597 .int JMPTBL(L(Fill11), L(FillTable))
1598 .int JMPTBL(L(Fill12), L(FillTable))
1599 .int JMPTBL(L(Fill13), L(FillTable))
1600 .int JMPTBL(L(Fill14), L(FillTable))
1601 .int JMPTBL(L(Fill15), L(FillTable))
1602 .int JMPTBL(L(Fill16), L(FillTable))
1606 # define RETURN POP (%edi); ret; CFI_PUSH (%edi)
1607 # define RETURN1 ret
1612 mov STR1(%esp), %edx
1613 mov STR2(%esp), %ecx
1655 movdqu (%ecx), %xmm1
1656 movdqu %xmm1, (%edx)
1657 pcmpeqb (%ebx), %xmm0
1658 pmovmskb %xmm0, %eax
1661 jnz L(CopyFrom1To16Bytes)
1671 movdqa (%ecx), %xmm1
1672 movaps 16(%ecx), %xmm2
1673 movdqu %xmm1, (%edx)
1674 pcmpeqb %xmm2, %xmm0
1675 pmovmskb %xmm0, %eax
1678 jnz L(CopyFrom1To16Bytes)
1680 movaps 16(%ecx, %ebx), %xmm3
1681 movdqu %xmm2, (%edx, %ebx)
1682 pcmpeqb %xmm3, %xmm0
1683 pmovmskb %xmm0, %eax
1686 jnz L(CopyFrom1To16Bytes)
1688 movaps 16(%ecx, %ebx), %xmm4
1689 movdqu %xmm3, (%edx, %ebx)
1690 pcmpeqb %xmm4, %xmm0
1691 pmovmskb %xmm0, %eax
1694 jnz L(CopyFrom1To16Bytes)
1696 movaps 16(%ecx, %ebx), %xmm1
1697 movdqu %xmm4, (%edx, %ebx)
1698 pcmpeqb %xmm1, %xmm0
1699 pmovmskb %xmm0, %eax
1702 jnz L(CopyFrom1To16Bytes)
1704 movaps 16(%ecx, %ebx), %xmm2
1705 movdqu %xmm1, (%edx, %ebx)
1706 pcmpeqb %xmm2, %xmm0
1707 pmovmskb %xmm0, %eax
1710 jnz L(CopyFrom1To16Bytes)
1712 movaps 16(%ecx, %ebx), %xmm3
1713 movdqu %xmm2, (%edx, %ebx)
1714 pcmpeqb %xmm3, %xmm0
1715 pmovmskb %xmm0, %eax
1718 jnz L(CopyFrom1To16Bytes)
1720 movdqu %xmm3, (%edx, %ebx)
1722 lea 16(%ecx, %ebx), %ecx
1728 movaps (%ecx), %xmm2
1730 movaps 16(%ecx), %xmm5
1731 movaps 32(%ecx), %xmm3
1733 movaps 48(%ecx), %xmm7
1739 pcmpeqb %xmm0, %xmm3
1740 pmovmskb %xmm3, %eax
1742 jnz L(Aligned64Leave)
1743 L(Aligned64Loop_start):
1744 movdqu %xmm4, -64(%edx)
1745 movaps (%ecx), %xmm2
1747 movdqu %xmm5, -48(%edx)
1748 movaps 16(%ecx), %xmm5
1750 movaps 32(%ecx), %xmm3
1751 movdqu %xmm6, -32(%edx)
1753 movdqu %xmm7, -16(%edx)
1754 movaps 48(%ecx), %xmm7
1757 pcmpeqb %xmm3, %xmm0
1758 pmovmskb %xmm0, %eax
1762 jz L(Aligned64Loop_start)
1766 pcmpeqb %xmm4, %xmm0
1767 pmovmskb %xmm0, %eax
1769 jnz L(CopyFrom1To16Bytes)
1771 pcmpeqb %xmm5, %xmm0
1772 pmovmskb %xmm0, %eax
1773 movdqu %xmm4, -64(%edx)
1776 jnz L(CopyFrom1To16Bytes)
1778 pcmpeqb %xmm6, %xmm0
1779 pmovmskb %xmm0, %eax
1780 movdqu %xmm5, -48(%edx)
1783 jnz L(CopyFrom1To16Bytes)
1785 movdqu %xmm6, -32(%edx)
1786 pcmpeqb %xmm7, %xmm0
1787 pmovmskb %xmm0, %eax
1790 /*-----------------End of main part---------------------------*/
1793 L(CopyFrom1To16Bytes):
1819 # ifdef USE_AS_STPCPY
1843 movlpd (%ecx), %xmm0
1844 movlpd %xmm0, (%edx)
1845 movlpd 8(%ecx), %xmm0
1846 movlpd %xmm0, 8(%edx)
1847 # ifdef USE_AS_STPCPY
1858 # ifdef USE_AS_STPCPY
1869 # ifdef USE_AS_STPCPY
1882 # ifdef USE_AS_STPCPY
1893 # ifdef USE_AS_STPCPY
1906 # ifdef USE_AS_STPCPY
1919 # ifdef USE_AS_STPCPY
1932 # ifdef USE_AS_STPCPY
1947 # ifdef USE_AS_STPCPY
1962 # ifdef USE_AS_STPCPY
1977 # ifdef USE_AS_STPCPY
1992 # ifdef USE_AS_STPCPY
2001 movlpd (%ecx), %xmm0
2002 movlpd %xmm0, (%edx)
2003 movlpd 5(%ecx), %xmm0
2004 movlpd %xmm0, 5(%edx)
2005 # ifdef USE_AS_STPCPY
2014 movlpd (%ecx), %xmm0
2015 movlpd %xmm0, (%edx)
2016 movlpd 6(%ecx), %xmm0
2017 movlpd %xmm0, 6(%edx)
2018 # ifdef USE_AS_STPCPY
2027 movlpd (%ecx), %xmm0
2028 movlpd %xmm0, (%edx)
2029 movlpd 7(%ecx), %xmm0
2030 movlpd %xmm0, 7(%edx)
2031 # ifdef USE_AS_STPCPY
2051 # ifdef USE_AS_STPCPY
2064 # ifdef USE_AS_STPCPY
2075 # ifdef USE_AS_STPCPY
2088 # ifdef USE_AS_STPCPY
2101 # ifdef USE_AS_STPCPY
2114 # ifdef USE_AS_STPCPY
2127 # ifdef USE_AS_STPCPY
2142 # ifdef USE_AS_STPCPY
2157 # ifdef USE_AS_STPCPY
2172 # ifdef USE_AS_STPCPY
2187 # ifdef USE_AS_STPCPY
2196 movlpd (%ecx), %xmm0
2197 movlpd %xmm0, (%edx)
2198 movlpd 5(%ecx), %xmm0
2199 movlpd %xmm0, 5(%edx)
2200 # ifdef USE_AS_STPCPY
2209 movlpd (%ecx), %xmm0
2210 movlpd %xmm0, (%edx)
2211 movlpd 6(%ecx), %xmm0
2212 movlpd %xmm0, 6(%edx)
2213 # ifdef USE_AS_STPCPY
2222 movlpd (%ecx), %xmm0
2223 movlpd %xmm0, (%edx)
2224 movlpd 7(%ecx), %xmm0
2225 movlpd %xmm0, 7(%edx)
2226 # ifdef USE_AS_STPCPY
2235 movlpd (%ecx), %xmm0
2236 movlpd %xmm0, (%edx)
2237 movlpd 8(%ecx), %xmm0
2238 movlpd %xmm0, 8(%edx)
2239 # ifdef USE_AS_STPCPY