2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
22 # ifndef USE_AS_STRCAT
26 # define STRCPY __strcpy_ssse3
29 .section .text.ssse3,"ax",@progbits
33 # ifdef USE_AS_STRNCPY
37 # ifdef USE_AS_STRNCPY
41 jbe L(StrncpyExit8Bytes)
59 # ifdef USE_AS_STRNCPY
61 jb L(StrncpyExit15Bytes)
77 # ifdef USE_AS_STRNCPY
85 # ifdef USE_AS_STRNCPY
90 /* add 16 bytes rcx_offset to r8 */
103 /* convert byte mask in xmm0 to bit mask */
108 # ifdef USE_AS_STRNCPY
110 jbe L(CopyFrom1To16BytesCase2OrCase3)
113 jnz L(CopyFrom1To16Bytes)
120 # ifdef USE_AS_STRNCPY
135 /* case: rcx_offset == rdx_offset */
173 movaps 16(%rcx), %xmm2
178 # ifdef USE_AS_STRNCPY
180 jbe L(CopyFrom1To16BytesCase2OrCase3)
183 jnz L(CopyFrom1To16Bytes)
185 movaps 16(%rcx, %rsi), %xmm3
186 movaps %xmm2, (%rdx, %rsi)
190 # ifdef USE_AS_STRNCPY
192 jbe L(CopyFrom1To16BytesCase2OrCase3)
195 jnz L(CopyFrom1To16Bytes)
197 movaps 16(%rcx, %rsi), %xmm4
198 movaps %xmm3, (%rdx, %rsi)
202 # ifdef USE_AS_STRNCPY
204 jbe L(CopyFrom1To16BytesCase2OrCase3)
207 jnz L(CopyFrom1To16Bytes)
209 movaps 16(%rcx, %rsi), %xmm1
210 movaps %xmm4, (%rdx, %rsi)
214 # ifdef USE_AS_STRNCPY
216 jbe L(CopyFrom1To16BytesCase2OrCase3)
219 jnz L(CopyFrom1To16Bytes)
221 movaps 16(%rcx, %rsi), %xmm2
222 movaps %xmm1, (%rdx, %rsi)
226 # ifdef USE_AS_STRNCPY
228 jbe L(CopyFrom1To16BytesCase2OrCase3)
231 jnz L(CopyFrom1To16Bytes)
233 movaps 16(%rcx, %rsi), %xmm3
234 movaps %xmm2, (%rdx, %rsi)
238 # ifdef USE_AS_STRNCPY
240 jbe L(CopyFrom1To16BytesCase2OrCase3)
243 jnz L(CopyFrom1To16Bytes)
245 movaps %xmm3, (%rdx, %rsi)
247 lea 16(%rcx, %rsi), %rcx
251 # ifdef USE_AS_STRNCPY
252 lea 112(%r8, %rax), %r8
260 movaps 16(%rcx), %xmm5
261 movaps 32(%rcx), %xmm3
263 movaps 48(%rcx), %xmm7
271 # ifdef USE_AS_STRNCPY
273 jbe L(StrncpyLeaveCase2OrCase3)
276 jnz L(Aligned64Leave)
277 movaps %xmm4, -64(%rdx)
278 movaps %xmm5, -48(%rdx)
279 movaps %xmm6, -32(%rdx)
280 movaps %xmm7, -16(%rdx)
284 # ifdef USE_AS_STRNCPY
290 jnz L(CopyFrom1To16Bytes)
293 # ifdef USE_AS_STRNCPY
297 movaps %xmm4, -64(%rdx)
300 jnz L(CopyFrom1To16Bytes)
303 # ifdef USE_AS_STRNCPY
307 movaps %xmm5, -48(%rdx)
310 jnz L(CopyFrom1To16Bytes)
312 movaps %xmm6, -32(%rdx)
314 # ifdef USE_AS_STRNCPY
319 jmp L(CopyFrom1To16Bytes)
323 movaps -1(%rcx), %xmm1
324 movaps 15(%rcx), %xmm2
329 # ifdef USE_AS_STRNCPY
331 jbe L(StrncpyExit1Case2OrCase3)
336 palignr $1, %xmm1, %xmm2
338 movaps 31(%rcx), %xmm2
345 # ifdef USE_AS_STRNCPY
347 jbe L(StrncpyExit1Case2OrCase3)
352 palignr $1, %xmm3, %xmm2
354 movaps 31(%rcx), %xmm2
361 # ifdef USE_AS_STRNCPY
363 jbe L(StrncpyExit1Case2OrCase3)
368 palignr $1, %xmm1, %xmm2
370 movaps 31(%rcx), %xmm2
376 # ifdef USE_AS_STRNCPY
378 jbe L(StrncpyExit1Case2OrCase3)
383 palignr $1, %xmm3, %xmm2
393 # ifdef USE_AS_STRNCPY
396 movaps -1(%rcx), %xmm1
401 movaps 15(%rcx), %xmm2
402 movaps 31(%rcx), %xmm3
404 movaps 47(%rcx), %xmm4
406 movaps 63(%rcx), %xmm5
413 palignr $1, %xmm4, %xmm5
415 palignr $1, %xmm3, %xmm4
417 # ifdef USE_AS_STRNCPY
421 palignr $1, %xmm2, %xmm3
423 palignr $1, %xmm1, %xmm2
425 movaps %xmm5, 48(%rdx)
426 movaps %xmm4, 32(%rdx)
427 movaps %xmm3, 16(%rdx)
433 movdqu -1(%rcx), %xmm1
435 movdqu %xmm1, -1(%rdx)
436 jmp L(CopyFrom1To16Bytes)
440 movaps -2(%rcx), %xmm1
441 movaps 14(%rcx), %xmm2
446 # ifdef USE_AS_STRNCPY
448 jbe L(StrncpyExit2Case2OrCase3)
453 palignr $2, %xmm1, %xmm2
455 movaps 30(%rcx), %xmm2
462 # ifdef USE_AS_STRNCPY
464 jbe L(StrncpyExit2Case2OrCase3)
469 palignr $2, %xmm3, %xmm2
471 movaps 30(%rcx), %xmm2
478 # ifdef USE_AS_STRNCPY
480 jbe L(StrncpyExit2Case2OrCase3)
485 palignr $2, %xmm1, %xmm2
487 movaps 30(%rcx), %xmm2
493 # ifdef USE_AS_STRNCPY
495 jbe L(StrncpyExit2Case2OrCase3)
500 palignr $2, %xmm3, %xmm2
510 # ifdef USE_AS_STRNCPY
513 movaps -2(%rcx), %xmm1
518 movaps 14(%rcx), %xmm2
519 movaps 30(%rcx), %xmm3
521 movaps 46(%rcx), %xmm4
523 movaps 62(%rcx), %xmm5
530 palignr $2, %xmm4, %xmm5
532 palignr $2, %xmm3, %xmm4
534 # ifdef USE_AS_STRNCPY
538 palignr $2, %xmm2, %xmm3
540 palignr $2, %xmm1, %xmm2
542 movaps %xmm5, 48(%rdx)
543 movaps %xmm4, 32(%rdx)
544 movaps %xmm3, 16(%rdx)
550 movdqu -2(%rcx), %xmm1
552 movdqu %xmm1, -2(%rdx)
553 jmp L(CopyFrom1To16Bytes)
557 movaps -3(%rcx), %xmm1
558 movaps 13(%rcx), %xmm2
563 # ifdef USE_AS_STRNCPY
565 jbe L(StrncpyExit3Case2OrCase3)
570 palignr $3, %xmm1, %xmm2
572 movaps 29(%rcx), %xmm2
579 # ifdef USE_AS_STRNCPY
581 jbe L(StrncpyExit3Case2OrCase3)
586 palignr $3, %xmm3, %xmm2
588 movaps 29(%rcx), %xmm2
595 # ifdef USE_AS_STRNCPY
597 jbe L(StrncpyExit3Case2OrCase3)
602 palignr $3, %xmm1, %xmm2
604 movaps 29(%rcx), %xmm2
610 # ifdef USE_AS_STRNCPY
612 jbe L(StrncpyExit3Case2OrCase3)
617 palignr $3, %xmm3, %xmm2
627 # ifdef USE_AS_STRNCPY
630 movaps -3(%rcx), %xmm1
635 movaps 13(%rcx), %xmm2
636 movaps 29(%rcx), %xmm3
638 movaps 45(%rcx), %xmm4
640 movaps 61(%rcx), %xmm5
647 palignr $3, %xmm4, %xmm5
649 palignr $3, %xmm3, %xmm4
651 # ifdef USE_AS_STRNCPY
655 palignr $3, %xmm2, %xmm3
657 palignr $3, %xmm1, %xmm2
659 movaps %xmm5, 48(%rdx)
660 movaps %xmm4, 32(%rdx)
661 movaps %xmm3, 16(%rdx)
667 movdqu -3(%rcx), %xmm1
669 movdqu %xmm1, -3(%rdx)
670 jmp L(CopyFrom1To16Bytes)
674 movaps -4(%rcx), %xmm1
675 movaps 12(%rcx), %xmm2
680 # ifdef USE_AS_STRNCPY
682 jbe L(StrncpyExit4Case2OrCase3)
687 palignr $4, %xmm1, %xmm2
689 movaps 28(%rcx), %xmm2
696 # ifdef USE_AS_STRNCPY
698 jbe L(StrncpyExit4Case2OrCase3)
703 palignr $4, %xmm3, %xmm2
705 movaps 28(%rcx), %xmm2
712 # ifdef USE_AS_STRNCPY
714 jbe L(StrncpyExit4Case2OrCase3)
719 palignr $4, %xmm1, %xmm2
721 movaps 28(%rcx), %xmm2
727 # ifdef USE_AS_STRNCPY
729 jbe L(StrncpyExit4Case2OrCase3)
734 palignr $4, %xmm3, %xmm2
744 # ifdef USE_AS_STRNCPY
747 movaps -4(%rcx), %xmm1
752 movaps 12(%rcx), %xmm2
753 movaps 28(%rcx), %xmm3
755 movaps 44(%rcx), %xmm4
757 movaps 60(%rcx), %xmm5
764 palignr $4, %xmm4, %xmm5
766 palignr $4, %xmm3, %xmm4
768 # ifdef USE_AS_STRNCPY
772 palignr $4, %xmm2, %xmm3
774 palignr $4, %xmm1, %xmm2
776 movaps %xmm5, 48(%rdx)
777 movaps %xmm4, 32(%rdx)
778 movaps %xmm3, 16(%rdx)
784 movdqu -4(%rcx), %xmm1
786 movdqu %xmm1, -4(%rdx)
787 jmp L(CopyFrom1To16Bytes)
791 movaps -5(%rcx), %xmm1
792 movaps 11(%rcx), %xmm2
797 # ifdef USE_AS_STRNCPY
799 jbe L(StrncpyExit5Case2OrCase3)
804 palignr $5, %xmm1, %xmm2
806 movaps 27(%rcx), %xmm2
813 # ifdef USE_AS_STRNCPY
815 jbe L(StrncpyExit5Case2OrCase3)
820 palignr $5, %xmm3, %xmm2
822 movaps 27(%rcx), %xmm2
829 # ifdef USE_AS_STRNCPY
831 jbe L(StrncpyExit5Case2OrCase3)
836 palignr $5, %xmm1, %xmm2
838 movaps 27(%rcx), %xmm2
844 # ifdef USE_AS_STRNCPY
846 jbe L(StrncpyExit5Case2OrCase3)
851 palignr $5, %xmm3, %xmm2
861 # ifdef USE_AS_STRNCPY
864 movaps -5(%rcx), %xmm1
869 movaps 11(%rcx), %xmm2
870 movaps 27(%rcx), %xmm3
872 movaps 43(%rcx), %xmm4
874 movaps 59(%rcx), %xmm5
881 palignr $5, %xmm4, %xmm5
883 palignr $5, %xmm3, %xmm4
885 # ifdef USE_AS_STRNCPY
889 palignr $5, %xmm2, %xmm3
891 palignr $5, %xmm1, %xmm2
893 movaps %xmm5, 48(%rdx)
894 movaps %xmm4, 32(%rdx)
895 movaps %xmm3, 16(%rdx)
901 movdqu -5(%rcx), %xmm1
903 movdqu %xmm1, -5(%rdx)
904 jmp L(CopyFrom1To16Bytes)
908 movaps -6(%rcx), %xmm1
909 movaps 10(%rcx), %xmm2
914 # ifdef USE_AS_STRNCPY
916 jbe L(StrncpyExit6Case2OrCase3)
921 palignr $6, %xmm1, %xmm2
923 movaps 26(%rcx), %xmm2
930 # ifdef USE_AS_STRNCPY
932 jbe L(StrncpyExit6Case2OrCase3)
937 palignr $6, %xmm3, %xmm2
939 movaps 26(%rcx), %xmm2
946 # ifdef USE_AS_STRNCPY
948 jbe L(StrncpyExit6Case2OrCase3)
953 palignr $6, %xmm1, %xmm2
955 movaps 26(%rcx), %xmm2
961 # ifdef USE_AS_STRNCPY
963 jbe L(StrncpyExit6Case2OrCase3)
968 palignr $6, %xmm3, %xmm2
978 # ifdef USE_AS_STRNCPY
981 movaps -6(%rcx), %xmm1
986 movaps 10(%rcx), %xmm2
987 movaps 26(%rcx), %xmm3
989 movaps 42(%rcx), %xmm4
991 movaps 58(%rcx), %xmm5
998 palignr $6, %xmm4, %xmm5
1000 palignr $6, %xmm3, %xmm4
1002 # ifdef USE_AS_STRNCPY
1004 jbe L(StrncpyLeave6)
1006 palignr $6, %xmm2, %xmm3
1008 palignr $6, %xmm1, %xmm2
1010 movaps %xmm5, 48(%rdx)
1011 movaps %xmm4, 32(%rdx)
1012 movaps %xmm3, 16(%rdx)
1013 movaps %xmm2, (%rdx)
1015 jmp L(Shl6LoopStart)
1023 jmp L(CopyFrom1To16Bytes)
1027 movaps -7(%rcx), %xmm1
1028 movaps 9(%rcx), %xmm2
1030 pcmpeqb %xmm2, %xmm0
1031 pmovmskb %xmm0, %rax
1033 # ifdef USE_AS_STRNCPY
1035 jbe L(StrncpyExit7Case2OrCase3)
1040 palignr $7, %xmm1, %xmm2
1041 movaps %xmm2, (%rdx)
1042 movaps 25(%rcx), %xmm2
1044 pcmpeqb %xmm2, %xmm0
1046 pmovmskb %xmm0, %rax
1049 # ifdef USE_AS_STRNCPY
1051 jbe L(StrncpyExit7Case2OrCase3)
1056 palignr $7, %xmm3, %xmm2
1057 movaps %xmm2, (%rdx)
1058 movaps 25(%rcx), %xmm2
1060 pcmpeqb %xmm2, %xmm0
1062 pmovmskb %xmm0, %rax
1065 # ifdef USE_AS_STRNCPY
1067 jbe L(StrncpyExit7Case2OrCase3)
1072 palignr $7, %xmm1, %xmm2
1073 movaps %xmm2, (%rdx)
1074 movaps 25(%rcx), %xmm2
1076 pcmpeqb %xmm2, %xmm0
1078 pmovmskb %xmm0, %rax
1080 # ifdef USE_AS_STRNCPY
1082 jbe L(StrncpyExit7Case2OrCase3)
1087 palignr $7, %xmm3, %xmm2
1088 movaps %xmm2, (%rdx)
1097 # ifdef USE_AS_STRNCPY
1100 movaps -7(%rcx), %xmm1
1105 movaps 9(%rcx), %xmm2
1106 movaps 25(%rcx), %xmm3
1108 movaps 41(%rcx), %xmm4
1110 movaps 57(%rcx), %xmm5
1114 pcmpeqb %xmm0, %xmm7
1115 pmovmskb %xmm7, %rax
1117 palignr $7, %xmm4, %xmm5
1119 palignr $7, %xmm3, %xmm4
1121 # ifdef USE_AS_STRNCPY
1123 jbe L(StrncpyLeave7)
1125 palignr $7, %xmm2, %xmm3
1127 palignr $7, %xmm1, %xmm2
1129 movaps %xmm5, 48(%rdx)
1130 movaps %xmm4, 32(%rdx)
1131 movaps %xmm3, 16(%rdx)
1132 movaps %xmm2, (%rdx)
1134 jmp L(Shl7LoopStart)
1142 jmp L(CopyFrom1To16Bytes)
1146 movaps -8(%rcx), %xmm1
1147 movaps 8(%rcx), %xmm2
1149 pcmpeqb %xmm2, %xmm0
1150 pmovmskb %xmm0, %rax
1152 # ifdef USE_AS_STRNCPY
1154 jbe L(StrncpyExit8Case2OrCase3)
1159 palignr $8, %xmm1, %xmm2
1160 movaps %xmm2, (%rdx)
1161 movaps 24(%rcx), %xmm2
1163 pcmpeqb %xmm2, %xmm0
1165 pmovmskb %xmm0, %rax
1168 # ifdef USE_AS_STRNCPY
1170 jbe L(StrncpyExit8Case2OrCase3)
1175 palignr $8, %xmm3, %xmm2
1176 movaps %xmm2, (%rdx)
1177 movaps 24(%rcx), %xmm2
1179 pcmpeqb %xmm2, %xmm0
1181 pmovmskb %xmm0, %rax
1184 # ifdef USE_AS_STRNCPY
1186 jbe L(StrncpyExit8Case2OrCase3)
1191 palignr $8, %xmm1, %xmm2
1192 movaps %xmm2, (%rdx)
1193 movaps 24(%rcx), %xmm2
1195 pcmpeqb %xmm2, %xmm0
1197 pmovmskb %xmm0, %rax
1199 # ifdef USE_AS_STRNCPY
1201 jbe L(StrncpyExit8Case2OrCase3)
1206 palignr $8, %xmm3, %xmm2
1207 movaps %xmm2, (%rdx)
1216 # ifdef USE_AS_STRNCPY
1219 movaps -8(%rcx), %xmm1
1224 movaps 8(%rcx), %xmm2
1225 movaps 24(%rcx), %xmm3
1227 movaps 40(%rcx), %xmm4
1229 movaps 56(%rcx), %xmm5
1233 pcmpeqb %xmm0, %xmm7
1234 pmovmskb %xmm7, %rax
1236 palignr $8, %xmm4, %xmm5
1238 palignr $8, %xmm3, %xmm4
1240 # ifdef USE_AS_STRNCPY
1242 jbe L(StrncpyLeave8)
1244 palignr $8, %xmm2, %xmm3
1246 palignr $8, %xmm1, %xmm2
1248 movaps %xmm5, 48(%rdx)
1249 movaps %xmm4, 32(%rdx)
1250 movaps %xmm3, 16(%rdx)
1251 movaps %xmm2, (%rdx)
1253 jmp L(Shl8LoopStart)
1259 jmp L(CopyFrom1To16Bytes)
1263 movaps -9(%rcx), %xmm1
1264 movaps 7(%rcx), %xmm2
1266 pcmpeqb %xmm2, %xmm0
1267 pmovmskb %xmm0, %rax
1269 # ifdef USE_AS_STRNCPY
1271 jbe L(StrncpyExit9Case2OrCase3)
1276 palignr $9, %xmm1, %xmm2
1277 movaps %xmm2, (%rdx)
1278 movaps 23(%rcx), %xmm2
1280 pcmpeqb %xmm2, %xmm0
1282 pmovmskb %xmm0, %rax
1285 # ifdef USE_AS_STRNCPY
1287 jbe L(StrncpyExit9Case2OrCase3)
1292 palignr $9, %xmm3, %xmm2
1293 movaps %xmm2, (%rdx)
1294 movaps 23(%rcx), %xmm2
1296 pcmpeqb %xmm2, %xmm0
1298 pmovmskb %xmm0, %rax
1301 # ifdef USE_AS_STRNCPY
1303 jbe L(StrncpyExit9Case2OrCase3)
1308 palignr $9, %xmm1, %xmm2
1309 movaps %xmm2, (%rdx)
1310 movaps 23(%rcx), %xmm2
1312 pcmpeqb %xmm2, %xmm0
1314 pmovmskb %xmm0, %rax
1316 # ifdef USE_AS_STRNCPY
1318 jbe L(StrncpyExit9Case2OrCase3)
1323 palignr $9, %xmm3, %xmm2
1324 movaps %xmm2, (%rdx)
1333 # ifdef USE_AS_STRNCPY
1336 movaps -9(%rcx), %xmm1
1341 movaps 7(%rcx), %xmm2
1342 movaps 23(%rcx), %xmm3
1344 movaps 39(%rcx), %xmm4
1346 movaps 55(%rcx), %xmm5
1350 pcmpeqb %xmm0, %xmm7
1351 pmovmskb %xmm7, %rax
1353 palignr $9, %xmm4, %xmm5
1355 palignr $9, %xmm3, %xmm4
1357 # ifdef USE_AS_STRNCPY
1359 jbe L(StrncpyLeave9)
1361 palignr $9, %xmm2, %xmm3
1363 palignr $9, %xmm1, %xmm2
1365 movaps %xmm5, 48(%rdx)
1366 movaps %xmm4, 32(%rdx)
1367 movaps %xmm3, 16(%rdx)
1368 movaps %xmm2, (%rdx)
1370 jmp L(Shl9LoopStart)
1376 jmp L(CopyFrom1To16Bytes)
1380 movaps -10(%rcx), %xmm1
1381 movaps 6(%rcx), %xmm2
1383 pcmpeqb %xmm2, %xmm0
1384 pmovmskb %xmm0, %rax
1386 # ifdef USE_AS_STRNCPY
1388 jbe L(StrncpyExit10Case2OrCase3)
1391 jnz L(Shl10LoopExit)
1393 palignr $10, %xmm1, %xmm2
1394 movaps %xmm2, (%rdx)
1395 movaps 22(%rcx), %xmm2
1397 pcmpeqb %xmm2, %xmm0
1399 pmovmskb %xmm0, %rax
1402 # ifdef USE_AS_STRNCPY
1404 jbe L(StrncpyExit10Case2OrCase3)
1407 jnz L(Shl10LoopExit)
1409 palignr $10, %xmm3, %xmm2
1410 movaps %xmm2, (%rdx)
1411 movaps 22(%rcx), %xmm2
1413 pcmpeqb %xmm2, %xmm0
1415 pmovmskb %xmm0, %rax
1418 # ifdef USE_AS_STRNCPY
1420 jbe L(StrncpyExit10Case2OrCase3)
1423 jnz L(Shl10LoopExit)
1425 palignr $10, %xmm1, %xmm2
1426 movaps %xmm2, (%rdx)
1427 movaps 22(%rcx), %xmm2
1429 pcmpeqb %xmm2, %xmm0
1431 pmovmskb %xmm0, %rax
1433 # ifdef USE_AS_STRNCPY
1435 jbe L(StrncpyExit10Case2OrCase3)
1438 jnz L(Shl10LoopExit)
1440 palignr $10, %xmm3, %xmm2
1441 movaps %xmm2, (%rdx)
1450 # ifdef USE_AS_STRNCPY
1453 movaps -10(%rcx), %xmm1
1458 movaps 6(%rcx), %xmm2
1459 movaps 22(%rcx), %xmm3
1461 movaps 38(%rcx), %xmm4
1463 movaps 54(%rcx), %xmm5
1467 pcmpeqb %xmm0, %xmm7
1468 pmovmskb %xmm7, %rax
1470 palignr $10, %xmm4, %xmm5
1472 palignr $10, %xmm3, %xmm4
1474 # ifdef USE_AS_STRNCPY
1476 jbe L(StrncpyLeave10)
1478 palignr $10, %xmm2, %xmm3
1480 palignr $10, %xmm1, %xmm2
1482 movaps %xmm5, 48(%rdx)
1483 movaps %xmm4, 32(%rdx)
1484 movaps %xmm3, 16(%rdx)
1485 movaps %xmm2, (%rdx)
1487 jmp L(Shl10LoopStart)
1493 jmp L(CopyFrom1To16Bytes)
1497 movaps -11(%rcx), %xmm1
1498 movaps 5(%rcx), %xmm2
1500 pcmpeqb %xmm2, %xmm0
1501 pmovmskb %xmm0, %rax
1503 # ifdef USE_AS_STRNCPY
1505 jbe L(StrncpyExit11Case2OrCase3)
1508 jnz L(Shl11LoopExit)
1510 palignr $11, %xmm1, %xmm2
1511 movaps %xmm2, (%rdx)
1512 movaps 21(%rcx), %xmm2
1514 pcmpeqb %xmm2, %xmm0
1516 pmovmskb %xmm0, %rax
1519 # ifdef USE_AS_STRNCPY
1521 jbe L(StrncpyExit11Case2OrCase3)
1524 jnz L(Shl11LoopExit)
1526 palignr $11, %xmm3, %xmm2
1527 movaps %xmm2, (%rdx)
1528 movaps 21(%rcx), %xmm2
1530 pcmpeqb %xmm2, %xmm0
1532 pmovmskb %xmm0, %rax
1535 # ifdef USE_AS_STRNCPY
1537 jbe L(StrncpyExit11Case2OrCase3)
1540 jnz L(Shl11LoopExit)
1542 palignr $11, %xmm1, %xmm2
1543 movaps %xmm2, (%rdx)
1544 movaps 21(%rcx), %xmm2
1546 pcmpeqb %xmm2, %xmm0
1548 pmovmskb %xmm0, %rax
1550 # ifdef USE_AS_STRNCPY
1552 jbe L(StrncpyExit11Case2OrCase3)
1555 jnz L(Shl11LoopExit)
1557 palignr $11, %xmm3, %xmm2
1558 movaps %xmm2, (%rdx)
1567 # ifdef USE_AS_STRNCPY
1570 movaps -11(%rcx), %xmm1
1575 movaps 5(%rcx), %xmm2
1576 movaps 21(%rcx), %xmm3
1578 movaps 37(%rcx), %xmm4
1580 movaps 53(%rcx), %xmm5
1584 pcmpeqb %xmm0, %xmm7
1585 pmovmskb %xmm7, %rax
1587 palignr $11, %xmm4, %xmm5
1589 palignr $11, %xmm3, %xmm4
1591 # ifdef USE_AS_STRNCPY
1593 jbe L(StrncpyLeave11)
1595 palignr $11, %xmm2, %xmm3
1597 palignr $11, %xmm1, %xmm2
1599 movaps %xmm5, 48(%rdx)
1600 movaps %xmm4, 32(%rdx)
1601 movaps %xmm3, 16(%rdx)
1602 movaps %xmm2, (%rdx)
1604 jmp L(Shl11LoopStart)
1610 jmp L(CopyFrom1To16Bytes)
1614 movaps -12(%rcx), %xmm1
1615 movaps 4(%rcx), %xmm2
1617 pcmpeqb %xmm2, %xmm0
1618 pmovmskb %xmm0, %rax
1620 # ifdef USE_AS_STRNCPY
1622 jbe L(StrncpyExit12Case2OrCase3)
1625 jnz L(Shl12LoopExit)
1627 palignr $12, %xmm1, %xmm2
1628 movaps %xmm2, (%rdx)
1629 movaps 20(%rcx), %xmm2
1631 pcmpeqb %xmm2, %xmm0
1633 pmovmskb %xmm0, %rax
1636 # ifdef USE_AS_STRNCPY
1638 jbe L(StrncpyExit12Case2OrCase3)
1641 jnz L(Shl12LoopExit)
1643 palignr $12, %xmm3, %xmm2
1644 movaps %xmm2, (%rdx)
1645 movaps 20(%rcx), %xmm2
1647 pcmpeqb %xmm2, %xmm0
1649 pmovmskb %xmm0, %rax
1652 # ifdef USE_AS_STRNCPY
1654 jbe L(StrncpyExit12Case2OrCase3)
1657 jnz L(Shl12LoopExit)
1659 palignr $12, %xmm1, %xmm2
1660 movaps %xmm2, (%rdx)
1661 movaps 20(%rcx), %xmm2
1663 pcmpeqb %xmm2, %xmm0
1665 pmovmskb %xmm0, %rax
1667 # ifdef USE_AS_STRNCPY
1669 jbe L(StrncpyExit12Case2OrCase3)
1672 jnz L(Shl12LoopExit)
1674 palignr $12, %xmm3, %xmm2
1675 movaps %xmm2, (%rdx)
1684 # ifdef USE_AS_STRNCPY
1687 movaps -12(%rcx), %xmm1
1692 movaps 4(%rcx), %xmm2
1693 movaps 20(%rcx), %xmm3
1695 movaps 36(%rcx), %xmm4
1697 movaps 52(%rcx), %xmm5
1701 pcmpeqb %xmm0, %xmm7
1702 pmovmskb %xmm7, %rax
1704 palignr $12, %xmm4, %xmm5
1706 palignr $12, %xmm3, %xmm4
1708 # ifdef USE_AS_STRNCPY
1710 jbe L(StrncpyLeave12)
1712 palignr $12, %xmm2, %xmm3
1714 palignr $12, %xmm1, %xmm2
1716 movaps %xmm5, 48(%rdx)
1717 movaps %xmm4, 32(%rdx)
1718 movaps %xmm3, 16(%rdx)
1719 movaps %xmm2, (%rdx)
1721 jmp L(Shl12LoopStart)
1727 jmp L(CopyFrom1To16Bytes)
1731 movaps -13(%rcx), %xmm1
1732 movaps 3(%rcx), %xmm2
1734 pcmpeqb %xmm2, %xmm0
1735 pmovmskb %xmm0, %rax
1737 # ifdef USE_AS_STRNCPY
1739 jbe L(StrncpyExit13Case2OrCase3)
1742 jnz L(Shl13LoopExit)
1744 palignr $13, %xmm1, %xmm2
1745 movaps %xmm2, (%rdx)
1746 movaps 19(%rcx), %xmm2
1748 pcmpeqb %xmm2, %xmm0
1750 pmovmskb %xmm0, %rax
1753 # ifdef USE_AS_STRNCPY
1755 jbe L(StrncpyExit13Case2OrCase3)
1758 jnz L(Shl13LoopExit)
1760 palignr $13, %xmm3, %xmm2
1761 movaps %xmm2, (%rdx)
1762 movaps 19(%rcx), %xmm2
1764 pcmpeqb %xmm2, %xmm0
1766 pmovmskb %xmm0, %rax
1769 # ifdef USE_AS_STRNCPY
1771 jbe L(StrncpyExit13Case2OrCase3)
1774 jnz L(Shl13LoopExit)
1776 palignr $13, %xmm1, %xmm2
1777 movaps %xmm2, (%rdx)
1778 movaps 19(%rcx), %xmm2
1780 pcmpeqb %xmm2, %xmm0
1782 pmovmskb %xmm0, %rax
1784 # ifdef USE_AS_STRNCPY
1786 jbe L(StrncpyExit13Case2OrCase3)
1789 jnz L(Shl13LoopExit)
1791 palignr $13, %xmm3, %xmm2
1792 movaps %xmm2, (%rdx)
1801 # ifdef USE_AS_STRNCPY
1804 movaps -13(%rcx), %xmm1
1809 movaps 3(%rcx), %xmm2
1810 movaps 19(%rcx), %xmm3
1812 movaps 35(%rcx), %xmm4
1814 movaps 51(%rcx), %xmm5
1818 pcmpeqb %xmm0, %xmm7
1819 pmovmskb %xmm7, %rax
1821 palignr $13, %xmm4, %xmm5
1823 palignr $13, %xmm3, %xmm4
1825 # ifdef USE_AS_STRNCPY
1827 jbe L(StrncpyLeave13)
1829 palignr $13, %xmm2, %xmm3
1831 palignr $13, %xmm1, %xmm2
1833 movaps %xmm5, 48(%rdx)
1834 movaps %xmm4, 32(%rdx)
1835 movaps %xmm3, 16(%rdx)
1836 movaps %xmm2, (%rdx)
1838 jmp L(Shl13LoopStart)
1844 jmp L(CopyFrom1To16Bytes)
1848 movaps -14(%rcx), %xmm1
1849 movaps 2(%rcx), %xmm2
1851 pcmpeqb %xmm2, %xmm0
1852 pmovmskb %xmm0, %rax
1854 # ifdef USE_AS_STRNCPY
1856 jbe L(StrncpyExit14Case2OrCase3)
1859 jnz L(Shl14LoopExit)
1861 palignr $14, %xmm1, %xmm2
1862 movaps %xmm2, (%rdx)
1863 movaps 18(%rcx), %xmm2
1865 pcmpeqb %xmm2, %xmm0
1867 pmovmskb %xmm0, %rax
1870 # ifdef USE_AS_STRNCPY
1872 jbe L(StrncpyExit14Case2OrCase3)
1875 jnz L(Shl14LoopExit)
1877 palignr $14, %xmm3, %xmm2
1878 movaps %xmm2, (%rdx)
1879 movaps 18(%rcx), %xmm2
1881 pcmpeqb %xmm2, %xmm0
1883 pmovmskb %xmm0, %rax
1886 # ifdef USE_AS_STRNCPY
1888 jbe L(StrncpyExit14Case2OrCase3)
1891 jnz L(Shl14LoopExit)
1893 palignr $14, %xmm1, %xmm2
1894 movaps %xmm2, (%rdx)
1895 movaps 18(%rcx), %xmm2
1897 pcmpeqb %xmm2, %xmm0
1899 pmovmskb %xmm0, %rax
1901 # ifdef USE_AS_STRNCPY
1903 jbe L(StrncpyExit14Case2OrCase3)
1906 jnz L(Shl14LoopExit)
1908 palignr $14, %xmm3, %xmm2
1909 movaps %xmm2, (%rdx)
1918 # ifdef USE_AS_STRNCPY
1921 movaps -14(%rcx), %xmm1
1926 movaps 2(%rcx), %xmm2
1927 movaps 18(%rcx), %xmm3
1929 movaps 34(%rcx), %xmm4
1931 movaps 50(%rcx), %xmm5
1935 pcmpeqb %xmm0, %xmm7
1936 pmovmskb %xmm7, %rax
1938 palignr $14, %xmm4, %xmm5
1940 palignr $14, %xmm3, %xmm4
1942 # ifdef USE_AS_STRNCPY
1944 jbe L(StrncpyLeave14)
1946 palignr $14, %xmm2, %xmm3
1948 palignr $14, %xmm1, %xmm2
1950 movaps %xmm5, 48(%rdx)
1951 movaps %xmm4, 32(%rdx)
1952 movaps %xmm3, 16(%rdx)
1953 movaps %xmm2, (%rdx)
1955 jmp L(Shl14LoopStart)
1961 jmp L(CopyFrom1To16Bytes)
1965 movaps -15(%rcx), %xmm1
1966 movaps 1(%rcx), %xmm2
1968 pcmpeqb %xmm2, %xmm0
1969 pmovmskb %xmm0, %rax
1971 # ifdef USE_AS_STRNCPY
1973 jbe L(StrncpyExit15Case2OrCase3)
1976 jnz L(Shl15LoopExit)
1978 palignr $15, %xmm1, %xmm2
1979 movaps %xmm2, (%rdx)
1980 movaps 17(%rcx), %xmm2
1982 pcmpeqb %xmm2, %xmm0
1984 pmovmskb %xmm0, %rax
1987 # ifdef USE_AS_STRNCPY
1989 jbe L(StrncpyExit15Case2OrCase3)
1992 jnz L(Shl15LoopExit)
1994 palignr $15, %xmm3, %xmm2
1995 movaps %xmm2, (%rdx)
1996 movaps 17(%rcx), %xmm2
1998 pcmpeqb %xmm2, %xmm0
2000 pmovmskb %xmm0, %rax
2003 # ifdef USE_AS_STRNCPY
2005 jbe L(StrncpyExit15Case2OrCase3)
2008 jnz L(Shl15LoopExit)
2010 palignr $15, %xmm1, %xmm2
2011 movaps %xmm2, (%rdx)
2012 movaps 17(%rcx), %xmm2
2014 pcmpeqb %xmm2, %xmm0
2016 pmovmskb %xmm0, %rax
2018 # ifdef USE_AS_STRNCPY
2020 jbe L(StrncpyExit15Case2OrCase3)
2023 jnz L(Shl15LoopExit)
2025 palignr $15, %xmm3, %xmm2
2026 movaps %xmm2, (%rdx)
2035 # ifdef USE_AS_STRNCPY
2038 movaps -15(%rcx), %xmm1
2043 movaps 1(%rcx), %xmm2
2044 movaps 17(%rcx), %xmm3
2046 movaps 33(%rcx), %xmm4
2048 movaps 49(%rcx), %xmm5
2052 pcmpeqb %xmm0, %xmm7
2053 pmovmskb %xmm7, %rax
2055 palignr $15, %xmm4, %xmm5
2057 palignr $15, %xmm3, %xmm4
2059 # ifdef USE_AS_STRNCPY
2061 jbe L(StrncpyLeave15)
2063 palignr $15, %xmm2, %xmm3
2065 palignr $15, %xmm1, %xmm2
2067 movaps %xmm5, 48(%rdx)
2068 movaps %xmm4, 32(%rdx)
2069 movaps %xmm3, 16(%rdx)
2070 movaps %xmm2, (%rdx)
2072 jmp L(Shl15LoopStart)
2078 # ifdef USE_AS_STRCAT
2079 jmp L(CopyFrom1To16Bytes)
2082 # ifndef USE_AS_STRCAT
2085 L(CopyFrom1To16Bytes):
2086 # ifdef USE_AS_STRNCPY
2113 # ifdef USE_AS_STPCPY
2118 # ifdef USE_AS_STRNCPY
2121 jnz L(StrncpyFillTailWithZero1)
2122 # ifdef USE_AS_STPCPY
2152 # ifdef USE_AS_STPCPY
2157 # ifdef USE_AS_STRNCPY
2160 jnz L(StrncpyFillTailWithZero1)
2161 # ifdef USE_AS_STPCPY
2168 # ifdef USE_AS_STRNCPY
2171 L(CopyFrom1To16BytesCase2):
2174 lea (%rsi, %rdx), %rsi
2244 L(CopyFrom1To16BytesCase2OrCase3):
2246 jnz L(CopyFrom1To16BytesCase2)
2249 L(CopyFrom1To16BytesCase3):
2266 L(More8Case3): /* but less than 16 */
2274 L(More4Case3): /* but less than 8 */
2279 L(Less12Case3): /* but more than 8 */
2290 # ifdef USE_AS_STPCPY
2295 # ifdef USE_AS_STRNCPY
2298 jnz L(StrncpyFillTailWithZero1)
2299 # ifdef USE_AS_STPCPY
2310 # ifdef USE_AS_STPCPY
2315 # ifdef USE_AS_STRNCPY
2318 jnz L(StrncpyFillTailWithZero1)
2319 # ifdef USE_AS_STPCPY
2332 # ifdef USE_AS_STPCPY
2337 # ifdef USE_AS_STRNCPY
2340 jnz L(StrncpyFillTailWithZero1)
2341 # ifdef USE_AS_STPCPY
2352 # ifdef USE_AS_STPCPY
2357 # ifdef USE_AS_STRNCPY
2360 jnz L(StrncpyFillTailWithZero1)
2361 # ifdef USE_AS_STPCPY
2374 # ifdef USE_AS_STPCPY
2379 # ifdef USE_AS_STRNCPY
2382 jnz L(StrncpyFillTailWithZero1)
2383 # ifdef USE_AS_STPCPY
2396 # ifdef USE_AS_STPCPY
2401 # ifdef USE_AS_STRNCPY
2404 jnz L(StrncpyFillTailWithZero1)
2405 # ifdef USE_AS_STPCPY
2418 # ifdef USE_AS_STPCPY
2423 # ifdef USE_AS_STRNCPY
2426 jnz L(StrncpyFillTailWithZero1)
2427 # ifdef USE_AS_STPCPY
2440 # ifdef USE_AS_STPCPY
2445 # ifdef USE_AS_STRNCPY
2448 jnz L(StrncpyFillTailWithZero1)
2449 # ifdef USE_AS_STPCPY
2462 # ifdef USE_AS_STPCPY
2467 # ifdef USE_AS_STRNCPY
2470 jnz L(StrncpyFillTailWithZero1)
2471 # ifdef USE_AS_STPCPY
2484 # ifdef USE_AS_STPCPY
2489 # ifdef USE_AS_STRNCPY
2492 jnz L(StrncpyFillTailWithZero1)
2493 # ifdef USE_AS_STPCPY
2506 # ifdef USE_AS_STPCPY
2511 # ifdef USE_AS_STRNCPY
2514 jnz L(StrncpyFillTailWithZero1)
2515 # ifdef USE_AS_STPCPY
2528 # ifdef USE_AS_STPCPY
2533 # ifdef USE_AS_STRNCPY
2536 jnz L(StrncpyFillTailWithZero1)
2537 # ifdef USE_AS_STPCPY
2550 # ifdef USE_AS_STPCPY
2555 # ifdef USE_AS_STRNCPY
2558 jnz L(StrncpyFillTailWithZero1)
2559 # ifdef USE_AS_STPCPY
2572 # ifdef USE_AS_STPCPY
2577 # ifdef USE_AS_STRNCPY
2580 jnz L(StrncpyFillTailWithZero1)
2581 # ifdef USE_AS_STPCPY
2588 # ifdef USE_AS_STRNCPY
2686 L(StrncpyFillExit1):
2688 L(FillFrom1To16Bytes):
2703 L(FillMore8): /* but less than 16 */
2711 L(FillMore4): /* but less than 8 */
2716 L(FillLess12): /* but more than 8 */
2723 L(StrncpyFillTailWithZero1):
2726 jbe L(StrncpyFillExit1)
2740 jb L(StrncpyFillLess64)
2742 L(StrncpyFillLoopMovdqa):
2743 movdqa %xmm0, (%rcx)
2744 movdqa %xmm0, 16(%rcx)
2745 movdqa %xmm0, 32(%rcx)
2746 movdqa %xmm0, 48(%rcx)
2749 jae L(StrncpyFillLoopMovdqa)
2751 L(StrncpyFillLess64):
2753 jl L(StrncpyFillLess32)
2754 movdqa %xmm0, (%rcx)
2755 movdqa %xmm0, 16(%rcx)
2758 jl L(StrncpyFillExit1)
2759 movdqa %xmm0, (%rcx)
2761 jmp L(FillFrom1To16Bytes)
2763 L(StrncpyFillLess32):
2765 jl L(StrncpyFillExit1)
2766 movdqa %xmm0, (%rcx)
2768 jmp L(FillFrom1To16Bytes)
2776 L(StrncpyExit15Bytes):
2805 # ifdef USE_AS_STPCPY
2815 L(StrncpyExit8Bytes):
2846 # ifdef USE_AS_STPCPY
2858 # ifdef USE_AS_STRNCPY
2860 L(StrncpyLeaveCase2OrCase3):
2862 jnz L(Aligned64LeaveCase2)
2864 L(Aligned64LeaveCase3):
2867 jbe L(CopyFrom1To16BytesCase3)
2868 movaps %xmm4, -64(%rdx)
2871 jbe L(CopyFrom1To16BytesCase3)
2872 movaps %xmm5, -48(%rdx)
2875 jbe L(CopyFrom1To16BytesCase3)
2876 movaps %xmm6, -32(%rdx)
2879 jmp L(CopyFrom1To16BytesCase3)
2881 L(Aligned64LeaveCase2):
2882 pcmpeqb %xmm4, %xmm0
2883 pmovmskb %xmm0, %rax
2885 jle L(CopyFrom1To16BytesCase2OrCase3)
2887 jnz L(CopyFrom1To16Bytes)
2889 pcmpeqb %xmm5, %xmm0
2890 pmovmskb %xmm0, %rax
2891 movaps %xmm4, -64(%rdx)
2894 jbe L(CopyFrom1To16BytesCase2OrCase3)
2896 jnz L(CopyFrom1To16Bytes)
2898 pcmpeqb %xmm6, %xmm0
2899 pmovmskb %xmm0, %rax
2900 movaps %xmm5, -48(%rdx)
2903 jbe L(CopyFrom1To16BytesCase2OrCase3)
2905 jnz L(CopyFrom1To16Bytes)
2907 pcmpeqb %xmm7, %xmm0
2908 pmovmskb %xmm0, %rax
2909 movaps %xmm6, -32(%rdx)
2912 jmp L(CopyFrom1To16BytesCase2)
2913 /*--------------------------------------------------*/
2915 L(StrncpyExit1Case2OrCase3):
2916 movdqu -1(%rcx), %xmm0
2917 movdqu %xmm0, -1(%rdx)
2920 jnz L(CopyFrom1To16BytesCase2)
2921 jmp L(CopyFrom1To16BytesCase3)
2924 L(StrncpyExit2Case2OrCase3):
2925 movdqu -2(%rcx), %xmm0
2926 movdqu %xmm0, -2(%rdx)
2929 jnz L(CopyFrom1To16BytesCase2)
2930 jmp L(CopyFrom1To16BytesCase3)
2933 L(StrncpyExit3Case2OrCase3):
2934 movdqu -3(%rcx), %xmm0
2935 movdqu %xmm0, -3(%rdx)
2938 jnz L(CopyFrom1To16BytesCase2)
2939 jmp L(CopyFrom1To16BytesCase3)
2942 L(StrncpyExit4Case2OrCase3):
2943 movdqu -4(%rcx), %xmm0
2944 movdqu %xmm0, -4(%rdx)
2947 jnz L(CopyFrom1To16BytesCase2)
2948 jmp L(CopyFrom1To16BytesCase3)
2951 L(StrncpyExit5Case2OrCase3):
2952 movdqu -5(%rcx), %xmm0
2953 movdqu %xmm0, -5(%rdx)
2956 jnz L(CopyFrom1To16BytesCase2)
2957 jmp L(CopyFrom1To16BytesCase3)
2960 L(StrncpyExit6Case2OrCase3):
2967 jnz L(CopyFrom1To16BytesCase2)
2968 jmp L(CopyFrom1To16BytesCase3)
2971 L(StrncpyExit7Case2OrCase3):
2978 jnz L(CopyFrom1To16BytesCase2)
2979 jmp L(CopyFrom1To16BytesCase3)
2982 L(StrncpyExit8Case2OrCase3):
2987 jnz L(CopyFrom1To16BytesCase2)
2988 jmp L(CopyFrom1To16BytesCase3)
2991 L(StrncpyExit9Case2OrCase3):
2996 jnz L(CopyFrom1To16BytesCase2)
2997 jmp L(CopyFrom1To16BytesCase3)
3000 L(StrncpyExit10Case2OrCase3):
3005 jnz L(CopyFrom1To16BytesCase2)
3006 jmp L(CopyFrom1To16BytesCase3)
3009 L(StrncpyExit11Case2OrCase3):
3014 jnz L(CopyFrom1To16BytesCase2)
3015 jmp L(CopyFrom1To16BytesCase3)
3018 L(StrncpyExit12Case2OrCase3):
3023 jnz L(CopyFrom1To16BytesCase2)
3024 jmp L(CopyFrom1To16BytesCase3)
3027 L(StrncpyExit13Case2OrCase3):
3032 jnz L(CopyFrom1To16BytesCase2)
3033 jmp L(CopyFrom1To16BytesCase3)
3036 L(StrncpyExit14Case2OrCase3):
3041 jnz L(CopyFrom1To16BytesCase2)
3042 jmp L(CopyFrom1To16BytesCase3)
3045 L(StrncpyExit15Case2OrCase3):
3050 jnz L(CopyFrom1To16BytesCase2)
3051 jmp L(CopyFrom1To16BytesCase3)
3058 palignr $1, %xmm1, %xmm2
3059 movaps %xmm2, (%rdx)
3060 movaps 31(%rcx), %xmm2
3064 palignr $1, %xmm3, %xmm2
3065 movaps %xmm2, 16(%rdx)
3069 movaps %xmm4, 32(%rdx)
3073 movaps %xmm5, 48(%rdx)
3078 lea 15(%rdx, %rsi), %rdx
3079 lea 15(%rcx, %rsi), %rcx
3085 jmp L(CopyFrom1To16BytesCase3)
3092 palignr $2, %xmm1, %xmm2
3093 movaps %xmm2, (%rdx)
3094 movaps 30(%rcx), %xmm2
3098 palignr $2, %xmm3, %xmm2
3099 movaps %xmm2, 16(%rdx)
3103 movaps %xmm4, 32(%rdx)
3107 movaps %xmm5, 48(%rdx)
3112 lea 14(%rdx, %rsi), %rdx
3113 lea 14(%rcx, %rsi), %rcx
3119 jmp L(CopyFrom1To16BytesCase3)
3126 palignr $3, %xmm1, %xmm2
3127 movaps %xmm2, (%rdx)
3128 movaps 29(%rcx), %xmm2
3132 palignr $3, %xmm3, %xmm2
3133 movaps %xmm2, 16(%rdx)
3137 movaps %xmm4, 32(%rdx)
3141 movaps %xmm5, 48(%rdx)
3146 lea 13(%rdx, %rsi), %rdx
3147 lea 13(%rcx, %rsi), %rcx
3153 jmp L(CopyFrom1To16BytesCase3)
3160 palignr $4, %xmm1, %xmm2
3161 movaps %xmm2, (%rdx)
3162 movaps 28(%rcx), %xmm2
3166 palignr $4, %xmm3, %xmm2
3167 movaps %xmm2, 16(%rdx)
3171 movaps %xmm4, 32(%rdx)
3175 movaps %xmm5, 48(%rdx)
3180 lea 12(%rdx, %rsi), %rdx
3181 lea 12(%rcx, %rsi), %rcx
3187 jmp L(CopyFrom1To16BytesCase3)
3194 palignr $5, %xmm1, %xmm2
3195 movaps %xmm2, (%rdx)
3196 movaps 27(%rcx), %xmm2
3200 palignr $5, %xmm3, %xmm2
3201 movaps %xmm2, 16(%rdx)
3205 movaps %xmm4, 32(%rdx)
3209 movaps %xmm5, 48(%rdx)
3214 lea 11(%rdx, %rsi), %rdx
3215 lea 11(%rcx, %rsi), %rcx
3221 jmp L(CopyFrom1To16BytesCase3)
3228 palignr $6, %xmm1, %xmm2
3229 movaps %xmm2, (%rdx)
3230 movaps 26(%rcx), %xmm2
3234 palignr $6, %xmm3, %xmm2
3235 movaps %xmm2, 16(%rdx)
3239 movaps %xmm4, 32(%rdx)
3243 movaps %xmm5, 48(%rdx)
3248 lea 10(%rdx, %rsi), %rdx
3249 lea 10(%rcx, %rsi), %rcx
3255 jmp L(CopyFrom1To16BytesCase3)
3262 palignr $7, %xmm1, %xmm2
3263 movaps %xmm2, (%rdx)
3264 movaps 25(%rcx), %xmm2
3268 palignr $7, %xmm3, %xmm2
3269 movaps %xmm2, 16(%rdx)
3273 movaps %xmm4, 32(%rdx)
3277 movaps %xmm5, 48(%rdx)
3282 lea 9(%rdx, %rsi), %rdx
3283 lea 9(%rcx, %rsi), %rcx
3289 jmp L(CopyFrom1To16BytesCase3)
3296 palignr $8, %xmm1, %xmm2
3297 movaps %xmm2, (%rdx)
3298 movaps 24(%rcx), %xmm2
3302 palignr $8, %xmm3, %xmm2
3303 movaps %xmm2, 16(%rdx)
3307 movaps %xmm4, 32(%rdx)
3311 movaps %xmm5, 48(%rdx)
3316 lea 8(%rdx, %rsi), %rdx
3317 lea 8(%rcx, %rsi), %rcx
3321 jmp L(CopyFrom1To16BytesCase3)
3328 palignr $9, %xmm1, %xmm2
3329 movaps %xmm2, (%rdx)
3330 movaps 23(%rcx), %xmm2
3334 palignr $9, %xmm3, %xmm2
3335 movaps %xmm2, 16(%rdx)
3339 movaps %xmm4, 32(%rdx)
3343 movaps %xmm5, 48(%rdx)
3348 lea 7(%rdx, %rsi), %rdx
3349 lea 7(%rcx, %rsi), %rcx
3353 jmp L(CopyFrom1To16BytesCase3)
3359 jle L(StrncpyExit10)
3360 palignr $10, %xmm1, %xmm2
3361 movaps %xmm2, (%rdx)
3362 movaps 22(%rcx), %xmm2
3365 jbe L(StrncpyExit10)
3366 palignr $10, %xmm3, %xmm2
3367 movaps %xmm2, 16(%rdx)
3370 jbe L(StrncpyExit10)
3371 movaps %xmm4, 32(%rdx)
3374 jbe L(StrncpyExit10)
3375 movaps %xmm5, 48(%rdx)
3380 lea 6(%rdx, %rsi), %rdx
3381 lea 6(%rcx, %rsi), %rcx
3385 jmp L(CopyFrom1To16BytesCase3)
3391 jle L(StrncpyExit11)
3392 palignr $11, %xmm1, %xmm2
3393 movaps %xmm2, (%rdx)
3394 movaps 21(%rcx), %xmm2
3397 jbe L(StrncpyExit11)
3398 palignr $11, %xmm3, %xmm2
3399 movaps %xmm2, 16(%rdx)
3402 jbe L(StrncpyExit11)
3403 movaps %xmm4, 32(%rdx)
3406 jbe L(StrncpyExit11)
3407 movaps %xmm5, 48(%rdx)
3412 lea 5(%rdx, %rsi), %rdx
3413 lea 5(%rcx, %rsi), %rcx
3417 jmp L(CopyFrom1To16BytesCase3)
3423 jle L(StrncpyExit12)
3424 palignr $12, %xmm1, %xmm2
3425 movaps %xmm2, (%rdx)
3426 movaps 20(%rcx), %xmm2
3429 jbe L(StrncpyExit12)
3430 palignr $12, %xmm3, %xmm2
3431 movaps %xmm2, 16(%rdx)
3434 jbe L(StrncpyExit12)
3435 movaps %xmm4, 32(%rdx)
3438 jbe L(StrncpyExit12)
3439 movaps %xmm5, 48(%rdx)
3444 lea 4(%rdx, %rsi), %rdx
3445 lea 4(%rcx, %rsi), %rcx
3449 jmp L(CopyFrom1To16BytesCase3)
3455 jle L(StrncpyExit13)
3456 palignr $13, %xmm1, %xmm2
3457 movaps %xmm2, (%rdx)
3458 movaps 19(%rcx), %xmm2
3461 jbe L(StrncpyExit13)
3462 palignr $13, %xmm3, %xmm2
3463 movaps %xmm2, 16(%rdx)
3466 jbe L(StrncpyExit13)
3467 movaps %xmm4, 32(%rdx)
3470 jbe L(StrncpyExit13)
3471 movaps %xmm5, 48(%rdx)
3476 lea 3(%rdx, %rsi), %rdx
3477 lea 3(%rcx, %rsi), %rcx
3481 jmp L(CopyFrom1To16BytesCase3)
3487 jle L(StrncpyExit14)
3488 palignr $14, %xmm1, %xmm2
3489 movaps %xmm2, (%rdx)
3490 movaps 18(%rcx), %xmm2
3493 jbe L(StrncpyExit14)
3494 palignr $14, %xmm3, %xmm2
3495 movaps %xmm2, 16(%rdx)
3498 jbe L(StrncpyExit14)
3499 movaps %xmm4, 32(%rdx)
3502 jbe L(StrncpyExit14)
3503 movaps %xmm5, 48(%rdx)
3508 lea 2(%rdx, %rsi), %rdx
3509 lea 2(%rcx, %rsi), %rcx
3513 jmp L(CopyFrom1To16BytesCase3)
3519 jle L(StrncpyExit15)
3520 palignr $15, %xmm1, %xmm2
3521 movaps %xmm2, (%rdx)
3522 movaps 17(%rcx), %xmm2
3525 jbe L(StrncpyExit15)
3526 palignr $15, %xmm3, %xmm2
3527 movaps %xmm2, 16(%rdx)
3530 jbe L(StrncpyExit15)
3531 movaps %xmm4, 32(%rdx)
3534 jbe L(StrncpyExit15)
3535 movaps %xmm5, 48(%rdx)
3540 lea 1(%rdx, %rsi), %rdx
3541 lea 1(%rcx, %rsi), %rcx
3545 jmp L(CopyFrom1To16BytesCase3)
3548 # ifndef USE_AS_STRCAT