1 /* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
22 # ifndef USE_AS_STRCAT
26 # define STRCPY __strcpy_sse2_unaligned
31 # define JMPTBL(I, B) I - B
32 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
33 lea TABLE(%rip), %r11; \
34 movslq (%r11, INDEX, SCALE), %rcx; \
35 lea (%r11, %rcx), %rcx; \
36 _CET_NOTRACK jmp *%rcx
38 # ifndef USE_AS_STRCAT
42 # ifdef USE_AS_STRNCPY
48 # ifndef USE_AS_STPCPY
49 mov %rdi, %rax /* save result */
56 jbe L(SourceStringAlignmentLess32)
67 # ifdef USE_AS_STRNCPY
68 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
77 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
80 jnz L(CopyFrom1To16BytesTail)
82 pcmpeqb 16(%rsi), %xmm0
85 # ifdef USE_AS_STRNCPY
88 jbe L(CopyFrom1To32BytesCase2OrCase3)
91 jnz L(CopyFrom1To32Bytes)
93 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
96 /* If source address alignment != destination address alignment */
100 # ifdef USE_AS_STRNCPY
106 movdqa (%rsi, %rcx), %xmm1
107 movaps 16(%rsi, %rcx), %xmm2
108 movdqu %xmm1, (%rdi, %rcx)
112 # ifdef USE_AS_STRNCPY
114 jbe L(CopyFrom1To16BytesCase2OrCase3)
117 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
118 jnz L(CopyFrom1To16BytesUnalignedXmm2)
120 jnz L(CopyFrom1To16Bytes)
123 movaps 16(%rsi, %rcx), %xmm3
124 movdqu %xmm2, (%rdi, %rcx)
128 # ifdef USE_AS_STRNCPY
130 jbe L(CopyFrom1To16BytesCase2OrCase3)
133 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
134 jnz L(CopyFrom1To16BytesUnalignedXmm3)
136 jnz L(CopyFrom1To16Bytes)
139 movaps 16(%rsi, %rcx), %xmm4
140 movdqu %xmm3, (%rdi, %rcx)
144 # ifdef USE_AS_STRNCPY
146 jbe L(CopyFrom1To16BytesCase2OrCase3)
149 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
150 jnz L(CopyFrom1To16BytesUnalignedXmm4)
152 jnz L(CopyFrom1To16Bytes)
155 movaps 16(%rsi, %rcx), %xmm1
156 movdqu %xmm4, (%rdi, %rcx)
160 # ifdef USE_AS_STRNCPY
162 jbe L(CopyFrom1To16BytesCase2OrCase3)
165 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
166 jnz L(CopyFrom1To16BytesUnalignedXmm1)
168 jnz L(CopyFrom1To16Bytes)
171 movaps 16(%rsi, %rcx), %xmm2
172 movdqu %xmm1, (%rdi, %rcx)
176 # ifdef USE_AS_STRNCPY
178 jbe L(CopyFrom1To16BytesCase2OrCase3)
181 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
182 jnz L(CopyFrom1To16BytesUnalignedXmm2)
184 jnz L(CopyFrom1To16Bytes)
187 movaps 16(%rsi, %rcx), %xmm3
188 movdqu %xmm2, (%rdi, %rcx)
192 # ifdef USE_AS_STRNCPY
194 jbe L(CopyFrom1To16BytesCase2OrCase3)
197 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
198 jnz L(CopyFrom1To16BytesUnalignedXmm3)
200 jnz L(CopyFrom1To16Bytes)
203 movdqu %xmm3, (%rdi, %rcx)
205 lea 16(%rsi, %rcx), %rsi
209 # ifdef USE_AS_STRNCPY
210 lea 128(%r8, %rdx), %r8
215 movaps 16(%rsi), %xmm5
216 movaps 32(%rsi), %xmm3
218 movaps 48(%rsi), %xmm7
224 # ifdef USE_AS_STRNCPY
226 jbe L(UnalignedLeaveCase2OrCase3)
229 jnz L(Unaligned64Leave)
231 L(Unaligned64Loop_start):
234 movdqu %xmm4, -64(%rdi)
237 movdqu %xmm5, -48(%rdi)
238 movaps 16(%rsi), %xmm5
240 movaps 32(%rsi), %xmm3
241 movdqu %xmm6, -32(%rdi)
243 movdqu %xmm7, -16(%rdi)
244 movaps 48(%rsi), %xmm7
249 # ifdef USE_AS_STRNCPY
251 jbe L(UnalignedLeaveCase2OrCase3)
254 jz L(Unaligned64Loop_start)
264 jnz L(CopyFrom1To16BytesUnaligned_0)
266 jnz L(CopyFrom1To16BytesUnaligned_16)
273 jnz L(CopyFrom1To16BytesUnaligned_32)
277 movdqu %xmm5, 16(%rdi)
278 movdqu %xmm6, 32(%rdi)
279 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
280 # ifdef USE_AS_STPCPY
281 lea 48(%rdi, %rdx), %rax
283 movdqu %xmm7, 48(%rdi)
286 lea 49(%rdi, %rdx), %rdi
287 jmp L(StrncpyFillTailWithZero)
291 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
294 /* If source address alignment == destination address alignment */
296 L(SourceStringAlignmentLess32):
299 movdqu 16(%rsi), %xmm2
303 # ifdef USE_AS_STRNCPY
304 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
309 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
312 jnz L(CopyFrom1To16BytesTail1)
318 # ifdef USE_AS_STRNCPY
319 # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
324 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
327 jnz L(CopyFrom1To32Bytes1)
333 /*------End of main part with loops---------------------*/
337 # if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
339 L(CopyFrom1To16Bytes):
343 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
346 L(CopyFrom1To16BytesTail):
349 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
352 L(CopyFrom1To32Bytes1):
355 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
358 L(CopyFrom1To16BytesTail1):
360 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
363 L(CopyFrom1To32Bytes):
368 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
371 L(CopyFrom1To16BytesUnaligned_0):
373 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
374 # ifdef USE_AS_STPCPY
375 lea (%rdi, %rdx), %rax
380 lea 1(%rdi, %rdx), %rdi
381 jmp L(StrncpyFillTailWithZero)
383 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
387 L(CopyFrom1To16BytesUnaligned_16):
390 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
391 # ifdef USE_AS_STPCPY
392 lea 16(%rdi, %rdx), %rax
394 movdqu %xmm5, 16(%rdi)
397 lea 17(%rdi, %rdx), %rdi
398 jmp L(StrncpyFillTailWithZero)
402 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
406 L(CopyFrom1To16BytesUnaligned_32):
409 movdqu %xmm5, 16(%rdi)
410 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
411 # ifdef USE_AS_STPCPY
412 lea 32(%rdi, %rdx), %rax
414 movdqu %xmm6, 32(%rdi)
417 lea 33(%rdi, %rdx), %rdi
418 jmp L(StrncpyFillTailWithZero)
422 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
425 # ifdef USE_AS_STRNCPY
426 # ifndef USE_AS_STRCAT
428 L(CopyFrom1To16BytesUnalignedXmm6):
429 movdqu %xmm6, (%rdi, %rcx)
430 jmp L(CopyFrom1To16BytesXmmExit)
433 L(CopyFrom1To16BytesUnalignedXmm5):
434 movdqu %xmm5, (%rdi, %rcx)
435 jmp L(CopyFrom1To16BytesXmmExit)
438 L(CopyFrom1To16BytesUnalignedXmm4):
439 movdqu %xmm4, (%rdi, %rcx)
440 jmp L(CopyFrom1To16BytesXmmExit)
443 L(CopyFrom1To16BytesUnalignedXmm3):
444 movdqu %xmm3, (%rdi, %rcx)
445 jmp L(CopyFrom1To16BytesXmmExit)
448 L(CopyFrom1To16BytesUnalignedXmm1):
449 movdqu %xmm1, (%rdi, %rcx)
450 jmp L(CopyFrom1To16BytesXmmExit)
454 L(CopyFrom1To16BytesExit):
455 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
460 L(CopyFrom1To16BytesCase2):
466 jb L(CopyFrom1To16BytesExit)
467 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
470 L(CopyFrom1To32BytesCase2):
476 jb L(CopyFrom1To16BytesExit)
477 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
479 L(CopyFrom1To16BytesTailCase2):
483 jb L(CopyFrom1To16BytesExit)
484 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
486 L(CopyFrom1To16BytesTail1Case2):
489 jb L(CopyFrom1To16BytesExit)
490 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
492 /* Case2 or Case3, Case3 */
495 L(CopyFrom1To16BytesCase2OrCase3):
497 jnz L(CopyFrom1To16BytesCase2)
498 L(CopyFrom1To16BytesCase3):
502 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
505 L(CopyFrom1To32BytesCase2OrCase3):
507 jnz L(CopyFrom1To32BytesCase2)
509 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
512 L(CopyFrom1To16BytesTailCase2OrCase3):
514 jnz L(CopyFrom1To16BytesTailCase2)
516 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
519 L(CopyFrom1To32Bytes1Case2OrCase3):
523 L(CopyFrom1To16BytesTail1Case2OrCase3):
525 jnz L(CopyFrom1To16BytesTail1Case2)
526 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
530 /*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
535 # ifdef USE_AS_STPCPY
538 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
541 jnz L(StrncpyFillTailWithZero)
549 # ifdef USE_AS_STPCPY
552 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
555 jnz L(StrncpyFillTailWithZero)
564 # ifdef USE_AS_STPCPY
567 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
570 jnz L(StrncpyFillTailWithZero)
578 # ifdef USE_AS_STPCPY
581 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
584 jnz L(StrncpyFillTailWithZero)
593 # ifdef USE_AS_STPCPY
596 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
599 jnz L(StrncpyFillTailWithZero)
609 # ifdef USE_AS_STPCPY
612 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
615 jnz L(StrncpyFillTailWithZero)
625 # ifdef USE_AS_STPCPY
628 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
631 jnz L(StrncpyFillTailWithZero)
639 # ifdef USE_AS_STPCPY
642 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
645 jnz L(StrncpyFillTailWithZero)
654 # ifdef USE_AS_STPCPY
657 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
660 jnz L(StrncpyFillTailWithZero)
670 # ifdef USE_AS_STPCPY
673 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
676 jnz L(StrncpyFillTailWithZero)
686 # ifdef USE_AS_STPCPY
689 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
692 jnz L(StrncpyFillTailWithZero)
702 # ifdef USE_AS_STPCPY
705 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
708 jnz L(StrncpyFillTailWithZero)
718 # ifdef USE_AS_STPCPY
721 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
724 jnz L(StrncpyFillTailWithZero)
734 # ifdef USE_AS_STPCPY
737 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
740 jnz L(StrncpyFillTailWithZero)
750 # ifdef USE_AS_STPCPY
753 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
756 jnz L(StrncpyFillTailWithZero)
764 # ifdef USE_AS_STPCPY
767 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
770 jnz L(StrncpyFillTailWithZero)
779 # ifdef USE_AS_STPCPY
782 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
785 jnz L(StrncpyFillTailWithZero)
795 # ifdef USE_AS_STPCPY
798 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
801 jnz L(StrncpyFillTailWithZero)
811 # ifdef USE_AS_STPCPY
814 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
817 jnz L(StrncpyFillTailWithZero)
827 # ifdef USE_AS_STPCPY
830 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
833 jnz L(StrncpyFillTailWithZero)
844 # ifdef USE_AS_STPCPY
847 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
850 jnz L(StrncpyFillTailWithZero)
860 # ifdef USE_AS_STPCPY
863 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
866 jnz L(StrncpyFillTailWithZero)
876 # ifdef USE_AS_STPCPY
879 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
882 jnz L(StrncpyFillTailWithZero)
892 # ifdef USE_AS_STPCPY
895 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
898 jnz L(StrncpyFillTailWithZero)
909 # ifdef USE_AS_STPCPY
912 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
915 jnz L(StrncpyFillTailWithZero)
927 # ifdef USE_AS_STPCPY
930 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
933 jnz L(StrncpyFillTailWithZero)
945 # ifdef USE_AS_STPCPY
948 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
951 jnz L(StrncpyFillTailWithZero)
963 # ifdef USE_AS_STPCPY
966 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
969 jnz L(StrncpyFillTailWithZero)
976 movdqu 13(%rsi), %xmm2
978 movdqu %xmm2, 13(%rdi)
979 # ifdef USE_AS_STPCPY
982 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
985 jnz L(StrncpyFillTailWithZero)
992 movdqu 14(%rsi), %xmm2
994 movdqu %xmm2, 14(%rdi)
995 # ifdef USE_AS_STPCPY
998 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1001 jnz L(StrncpyFillTailWithZero)
1007 movdqu (%rsi), %xmm0
1008 movdqu 15(%rsi), %xmm2
1009 movdqu %xmm0, (%rdi)
1010 movdqu %xmm2, 15(%rdi)
1011 # ifdef USE_AS_STPCPY
1014 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1017 jnz L(StrncpyFillTailWithZero)
1023 movdqu (%rsi), %xmm0
1024 movdqu 16(%rsi), %xmm2
1025 movdqu %xmm0, (%rdi)
1026 movdqu %xmm2, 16(%rdi)
1027 # ifdef USE_AS_STPCPY
1030 # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1033 jnz L(StrncpyFillTailWithZero)
1037 # ifdef USE_AS_STRNCPY
1041 # ifdef USE_AS_STPCPY
1044 # ifdef USE_AS_STRCAT
1054 # ifdef USE_AS_STPCPY
1057 # ifdef USE_AS_STRCAT
1067 # ifdef USE_AS_STPCPY
1070 # ifdef USE_AS_STRCAT
1082 # ifdef USE_AS_STPCPY
1085 # ifdef USE_AS_STRCAT
1095 # ifdef USE_AS_STPCPY
1098 # ifdef USE_AS_STRCAT
1110 # ifdef USE_AS_STPCPY
1113 # ifdef USE_AS_STRCAT
1125 # ifdef USE_AS_STPCPY
1128 # ifdef USE_AS_STRCAT
1140 # ifdef USE_AS_STPCPY
1143 # ifdef USE_AS_STRCAT
1153 # ifdef USE_AS_STPCPY
1156 # ifdef USE_AS_STRCAT
1168 # ifdef USE_AS_STPCPY
1171 # ifdef USE_AS_STRCAT
1183 # ifdef USE_AS_STPCPY
1186 # ifdef USE_AS_STRCAT
1198 # ifdef USE_AS_STPCPY
1201 # ifdef USE_AS_STRCAT
1213 # ifdef USE_AS_STPCPY
1216 # ifdef USE_AS_STRCAT
1228 # ifdef USE_AS_STPCPY
1231 # ifdef USE_AS_STRCAT
1243 # ifdef USE_AS_STPCPY
1246 # ifdef USE_AS_STRCAT
1258 # ifdef USE_AS_STPCPY
1261 # ifdef USE_AS_STRCAT
1269 movdqu (%rsi), %xmm0
1270 movdqu %xmm0, (%rdi)
1271 # ifdef USE_AS_STPCPY
1274 # ifdef USE_AS_STRCAT
1282 movdqu (%rsi), %xmm0
1284 movdqu %xmm0, (%rdi)
1286 # ifdef USE_AS_STPCPY
1289 # ifdef USE_AS_STRCAT
1297 movdqu (%rsi), %xmm0
1299 movdqu %xmm0, (%rdi)
1301 # ifdef USE_AS_STPCPY
1304 # ifdef USE_AS_STRCAT
1312 movdqu (%rsi), %xmm0
1314 movdqu %xmm0, (%rdi)
1316 # ifdef USE_AS_STPCPY
1319 # ifdef USE_AS_STRCAT
1327 movdqu (%rsi), %xmm0
1329 movdqu %xmm0, (%rdi)
1331 # ifdef USE_AS_STPCPY
1334 # ifdef USE_AS_STRCAT
1342 movdqu (%rsi), %xmm0
1345 movdqu %xmm0, (%rdi)
1348 # ifdef USE_AS_STPCPY
1351 # ifdef USE_AS_STRCAT
1359 movdqu (%rsi), %xmm0
1361 movdqu %xmm0, (%rdi)
1363 # ifdef USE_AS_STPCPY
1366 # ifdef USE_AS_STRCAT
1374 movdqu (%rsi), %xmm0
1376 movdqu %xmm0, (%rdi)
1378 # ifdef USE_AS_STPCPY
1381 # ifdef USE_AS_STRCAT
1389 movdqu (%rsi), %xmm0
1391 movdqu %xmm0, (%rdi)
1393 # ifdef USE_AS_STPCPY
1396 # ifdef USE_AS_STRCAT
1404 movdqu (%rsi), %xmm0
1407 movdqu %xmm0, (%rdi)
1410 # ifdef USE_AS_STPCPY
1413 # ifdef USE_AS_STRCAT
1421 movdqu (%rsi), %xmm0
1424 movdqu %xmm0, (%rdi)
1427 # ifdef USE_AS_STPCPY
1430 # ifdef USE_AS_STRCAT
1438 movdqu (%rsi), %xmm0
1441 movdqu %xmm0, (%rdi)
1444 # ifdef USE_AS_STPCPY
1447 # ifdef USE_AS_STRCAT
1455 movdqu (%rsi), %xmm0
1458 movdqu %xmm0, (%rdi)
1461 # ifdef USE_AS_STPCPY
1464 # ifdef USE_AS_STRCAT
1472 movdqu (%rsi), %xmm0
1473 movdqu 13(%rsi), %xmm2
1474 movdqu %xmm0, (%rdi)
1475 movdqu %xmm2, 13(%rdi)
1476 # ifdef USE_AS_STPCPY
1479 # ifdef USE_AS_STRCAT
1487 movdqu (%rsi), %xmm0
1488 movdqu 14(%rsi), %xmm2
1489 movdqu %xmm0, (%rdi)
1490 movdqu %xmm2, 14(%rdi)
1491 # ifdef USE_AS_STPCPY
1494 # ifdef USE_AS_STRCAT
1502 movdqu (%rsi), %xmm0
1503 movdqu 15(%rsi), %xmm2
1504 movdqu %xmm0, (%rdi)
1505 movdqu %xmm2, 15(%rdi)
1506 # ifdef USE_AS_STPCPY
1509 # ifdef USE_AS_STRCAT
1517 movdqu (%rsi), %xmm0
1518 movdqu 16(%rsi), %xmm2
1519 movdqu %xmm0, (%rdi)
1520 movdqu %xmm2, 16(%rdi)
1521 # ifdef USE_AS_STPCPY
1524 # ifdef USE_AS_STRCAT
1532 movdqu (%rsi), %xmm0
1533 movdqu 16(%rsi), %xmm2
1535 movdqu %xmm0, (%rdi)
1536 movdqu %xmm2, 16(%rdi)
1538 # ifdef USE_AS_STRCAT
1544 # ifndef USE_AS_STRCAT
1630 movdqu %xmm0, -1(%rdi)
1635 movdqu %xmm0, (%rdi)
1639 L(CopyFrom1To16BytesUnalignedXmm2):
1640 movdqu %xmm2, (%rdi, %rcx)
1643 L(CopyFrom1To16BytesXmmExit):
1647 # ifdef USE_AS_STPCPY
1648 lea (%rdi, %rdx), %rax
1651 lea 1(%rdi, %rdx), %rdi
1654 L(StrncpyFillTailWithZero):
1658 jbe L(StrncpyFillExit)
1660 movdqu %xmm0, (%rdi)
1668 jb L(StrncpyFillLess64)
1670 L(StrncpyFillLoopMovdqa):
1671 movdqa %xmm0, (%rdi)
1672 movdqa %xmm0, 16(%rdi)
1673 movdqa %xmm0, 32(%rdi)
1674 movdqa %xmm0, 48(%rdi)
1677 jae L(StrncpyFillLoopMovdqa)
1679 L(StrncpyFillLess64):
1681 jl L(StrncpyFillLess32)
1682 movdqa %xmm0, (%rdi)
1683 movdqa %xmm0, 16(%rdi)
1686 jl L(StrncpyFillExit)
1687 movdqa %xmm0, (%rdi)
1689 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1691 L(StrncpyFillLess32):
1693 jl L(StrncpyFillExit)
1694 movdqa %xmm0, (%rdi)
1696 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1700 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1702 /* end of ifndef USE_AS_STRCAT */
1706 L(UnalignedLeaveCase2OrCase3):
1708 jnz L(Unaligned64LeaveCase2)
1709 L(Unaligned64LeaveCase3):
1713 jl L(CopyFrom1To16BytesCase3)
1714 movdqu %xmm4, (%rdi)
1716 jb L(CopyFrom1To16BytesCase3)
1717 movdqu %xmm5, 16(%rdi)
1719 jb L(CopyFrom1To16BytesCase3)
1720 movdqu %xmm6, 32(%rdi)
1722 jb L(CopyFrom1To16BytesCase3)
1723 movdqu %xmm7, 48(%rdi)
1724 # ifdef USE_AS_STPCPY
1727 # ifdef USE_AS_STRCAT
1734 L(Unaligned64LeaveCase2):
1736 pcmpeqb %xmm4, %xmm0
1737 pmovmskb %xmm0, %rdx
1739 jle L(CopyFrom1To16BytesCase2OrCase3)
1741 # ifndef USE_AS_STRCAT
1742 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1744 jnz L(CopyFrom1To16Bytes)
1746 pcmpeqb %xmm5, %xmm0
1747 pmovmskb %xmm0, %rdx
1748 movdqu %xmm4, (%rdi)
1751 jbe L(CopyFrom1To16BytesCase2OrCase3)
1753 # ifndef USE_AS_STRCAT
1754 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1756 jnz L(CopyFrom1To16Bytes)
1759 pcmpeqb %xmm6, %xmm0
1760 pmovmskb %xmm0, %rdx
1761 movdqu %xmm5, 16(%rdi)
1764 jbe L(CopyFrom1To16BytesCase2OrCase3)
1766 # ifndef USE_AS_STRCAT
1767 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1769 jnz L(CopyFrom1To16Bytes)
1772 pcmpeqb %xmm7, %xmm0
1773 pmovmskb %xmm0, %rdx
1774 movdqu %xmm6, 32(%rdi)
1775 lea 16(%rdi, %rcx), %rdi
1776 lea 16(%rsi, %rcx), %rsi
1779 jb L(CopyFrom1To16BytesExit)
1780 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1784 # ifndef USE_AS_STRCAT
1791 # ifndef USE_AS_STRCAT
1799 .int JMPTBL(L(Exit1), L(ExitTable))
1800 .int JMPTBL(L(Exit2), L(ExitTable))
1801 .int JMPTBL(L(Exit3), L(ExitTable))
1802 .int JMPTBL(L(Exit4), L(ExitTable))
1803 .int JMPTBL(L(Exit5), L(ExitTable))
1804 .int JMPTBL(L(Exit6), L(ExitTable))
1805 .int JMPTBL(L(Exit7), L(ExitTable))
1806 .int JMPTBL(L(Exit8), L(ExitTable))
1807 .int JMPTBL(L(Exit9), L(ExitTable))
1808 .int JMPTBL(L(Exit10), L(ExitTable))
1809 .int JMPTBL(L(Exit11), L(ExitTable))
1810 .int JMPTBL(L(Exit12), L(ExitTable))
1811 .int JMPTBL(L(Exit13), L(ExitTable))
1812 .int JMPTBL(L(Exit14), L(ExitTable))
1813 .int JMPTBL(L(Exit15), L(ExitTable))
1814 .int JMPTBL(L(Exit16), L(ExitTable))
1815 .int JMPTBL(L(Exit17), L(ExitTable))
1816 .int JMPTBL(L(Exit18), L(ExitTable))
1817 .int JMPTBL(L(Exit19), L(ExitTable))
1818 .int JMPTBL(L(Exit20), L(ExitTable))
1819 .int JMPTBL(L(Exit21), L(ExitTable))
1820 .int JMPTBL(L(Exit22), L(ExitTable))
1821 .int JMPTBL(L(Exit23), L(ExitTable))
1822 .int JMPTBL(L(Exit24), L(ExitTable))
1823 .int JMPTBL(L(Exit25), L(ExitTable))
1824 .int JMPTBL(L(Exit26), L(ExitTable))
1825 .int JMPTBL(L(Exit27), L(ExitTable))
1826 .int JMPTBL(L(Exit28), L(ExitTable))
1827 .int JMPTBL(L(Exit29), L(ExitTable))
1828 .int JMPTBL(L(Exit30), L(ExitTable))
1829 .int JMPTBL(L(Exit31), L(ExitTable))
1830 .int JMPTBL(L(Exit32), L(ExitTable))
1831 # ifdef USE_AS_STRNCPY
1832 L(ExitStrncpyTable):
1833 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1834 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1835 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1836 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1837 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1838 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1839 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1840 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1841 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1842 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1843 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1844 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1845 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1846 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1847 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1848 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1849 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1850 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1851 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1852 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1853 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1854 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1855 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1856 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1857 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1858 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1859 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1860 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1861 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1862 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1863 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1864 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1865 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1866 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1867 # ifndef USE_AS_STRCAT
1870 .int JMPTBL(L(Fill0), L(FillTable))
1871 .int JMPTBL(L(Fill1), L(FillTable))
1872 .int JMPTBL(L(Fill2), L(FillTable))
1873 .int JMPTBL(L(Fill3), L(FillTable))
1874 .int JMPTBL(L(Fill4), L(FillTable))
1875 .int JMPTBL(L(Fill5), L(FillTable))
1876 .int JMPTBL(L(Fill6), L(FillTable))
1877 .int JMPTBL(L(Fill7), L(FillTable))
1878 .int JMPTBL(L(Fill8), L(FillTable))
1879 .int JMPTBL(L(Fill9), L(FillTable))
1880 .int JMPTBL(L(Fill10), L(FillTable))
1881 .int JMPTBL(L(Fill11), L(FillTable))
1882 .int JMPTBL(L(Fill12), L(FillTable))
1883 .int JMPTBL(L(Fill13), L(FillTable))
1884 .int JMPTBL(L(Fill14), L(FillTable))
1885 .int JMPTBL(L(Fill15), L(FillTable))
1886 .int JMPTBL(L(Fill16), L(FillTable))