2 Copyright (C) 2011 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
23 # ifndef USE_AS_STRCAT
26 # define CFI_PUSH(REG) \
27 cfi_adjust_cfa_offset (4); \
28 cfi_rel_offset (REG, 0)
30 # define CFI_POP(REG) \
31 cfi_adjust_cfa_offset (-4); \
34 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
35 # define POP(REG) popl REG; CFI_POP (REG)
38 # define STRCPY __strcpy_ssse3
41 # ifdef USE_AS_STRNCPY
43 # define ENTRANCE PUSH (%ebx)
44 # define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
45 # define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
50 # define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
54 # define SAVE_RESULT(n) lea n(%edx), %eax
55 # define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
57 # define SAVE_RESULT(n) movl %edi, %eax
58 # define SAVE_RESULT_TAIL(n) movl %edx, %eax
65 /* In this code following instructions are used for copying:
70 movaps - 16 byte - requires 16 byte alignment
71 of sourse and destination adresses.
79 # ifdef USE_AS_STRNCPY
82 jbe L(StrncpyExit8Bytes)
100 # ifdef USE_AS_STRNCPY
102 jb L(StrncpyExit15Bytes)
118 # ifdef USE_AS_STRNCPY
129 # ifdef USE_AS_STRNCPY
134 /* add 16 bytes ecx_offset to ebx */
144 pcmpeqb (%esi), %xmm0
145 movlpd 8(%ecx), %xmm1
146 movlpd %xmm1, 8(%edx)
151 # ifdef USE_AS_STRNCPY
153 jbe L(CopyFrom1To16BytesCase2OrCase3)
156 jnz L(CopyFrom1To16Bytes)
163 # ifdef USE_AS_STRNCPY
178 /* case: ecx_offset == edx_offset */
216 movaps 16(%ecx), %xmm2
221 # ifdef USE_AS_STRNCPY
223 jbe L(CopyFrom1To16BytesCase2OrCase3)
226 jnz L(CopyFrom1To16Bytes)
228 movaps 16(%ecx, %esi), %xmm3
229 movaps %xmm2, (%edx, %esi)
233 # ifdef USE_AS_STRNCPY
235 jbe L(CopyFrom1To16BytesCase2OrCase3)
238 jnz L(CopyFrom1To16Bytes)
240 movaps 16(%ecx, %esi), %xmm4
241 movaps %xmm3, (%edx, %esi)
245 # ifdef USE_AS_STRNCPY
247 jbe L(CopyFrom1To16BytesCase2OrCase3)
250 jnz L(CopyFrom1To16Bytes)
252 movaps 16(%ecx, %esi), %xmm1
253 movaps %xmm4, (%edx, %esi)
257 # ifdef USE_AS_STRNCPY
259 jbe L(CopyFrom1To16BytesCase2OrCase3)
262 jnz L(CopyFrom1To16Bytes)
264 movaps 16(%ecx, %esi), %xmm2
265 movaps %xmm1, (%edx, %esi)
269 # ifdef USE_AS_STRNCPY
271 jbe L(CopyFrom1To16BytesCase2OrCase3)
274 jnz L(CopyFrom1To16Bytes)
276 movaps 16(%ecx, %esi), %xmm3
277 movaps %xmm2, (%edx, %esi)
281 # ifdef USE_AS_STRNCPY
283 jbe L(CopyFrom1To16BytesCase2OrCase3)
286 jnz L(CopyFrom1To16Bytes)
288 movaps %xmm3, (%edx, %esi)
290 lea 16(%ecx, %esi), %ecx
294 # ifdef USE_AS_STRNCPY
295 lea 112(%ebx, %eax), %ebx
301 movaps 32(%ecx), %xmm3
303 movaps 16(%ecx), %xmm5
305 movaps 48(%ecx), %xmm7
313 # ifdef USE_AS_STRNCPY
315 jbe L(StrncpyLeaveCase2OrCase3)
318 jnz L(Aligned64Leave)
319 movaps %xmm4, -64(%edx)
320 movaps %xmm5, -48(%edx)
321 movaps %xmm6, -32(%edx)
322 movaps %xmm7, -16(%edx)
326 # ifdef USE_AS_STRNCPY
332 jnz L(CopyFrom1To16Bytes)
335 # ifdef USE_AS_STRNCPY
339 movaps %xmm4, -64(%edx)
342 jnz L(CopyFrom1To16Bytes)
345 # ifdef USE_AS_STRNCPY
349 movaps %xmm5, -48(%edx)
352 jnz L(CopyFrom1To16Bytes)
354 movaps %xmm6, -32(%edx)
356 # ifdef USE_AS_STRNCPY
361 jmp L(CopyFrom1To16Bytes)
365 movaps -1(%ecx), %xmm1
366 movaps 15(%ecx), %xmm2
371 # ifdef USE_AS_STRNCPY
373 jbe L(StrncpyExit1Case2OrCase3)
378 palignr $1, %xmm1, %xmm2
381 movaps 31(%ecx), %xmm2
388 # ifdef USE_AS_STRNCPY
390 jbe L(StrncpyExit1Case2OrCase3)
395 palignr $1, %xmm1, %xmm2
397 movaps 31(%ecx), %xmm2
405 # ifdef USE_AS_STRNCPY
407 jbe L(StrncpyExit1Case2OrCase3)
412 palignr $1, %xmm1, %xmm2
414 movaps 31(%ecx), %xmm2
420 # ifdef USE_AS_STRNCPY
422 jbe L(StrncpyExit1Case2OrCase3)
427 palignr $1, %xmm3, %xmm2
437 # ifdef USE_AS_STRNCPY
440 movaps -1(%ecx), %xmm1
443 movaps 15(%ecx), %xmm2
444 movaps 31(%ecx), %xmm3
446 movaps 47(%ecx), %xmm4
448 movaps 63(%ecx), %xmm5
455 palignr $1, %xmm4, %xmm5
457 palignr $1, %xmm3, %xmm4
459 # ifdef USE_AS_STRNCPY
463 palignr $1, %xmm2, %xmm3
465 palignr $1, %xmm1, %xmm2
467 movaps %xmm5, 48(%edx)
468 movaps %xmm4, 32(%edx)
469 movaps %xmm3, 16(%edx)
477 movlpd 7(%ecx), %xmm0
478 movlpd %xmm0, 7(%edx)
480 jmp L(CopyFrom1To16Bytes)
484 movaps -2(%ecx), %xmm1
485 movaps 14(%ecx), %xmm2
490 # ifdef USE_AS_STRNCPY
492 jbe L(StrncpyExit2Case2OrCase3)
497 palignr $2, %xmm1, %xmm2
500 movaps 30(%ecx), %xmm2
507 # ifdef USE_AS_STRNCPY
509 jbe L(StrncpyExit2Case2OrCase3)
514 palignr $2, %xmm1, %xmm2
516 movaps 30(%ecx), %xmm2
524 # ifdef USE_AS_STRNCPY
526 jbe L(StrncpyExit2Case2OrCase3)
531 palignr $2, %xmm1, %xmm2
533 movaps 30(%ecx), %xmm2
539 # ifdef USE_AS_STRNCPY
541 jbe L(StrncpyExit2Case2OrCase3)
546 palignr $2, %xmm3, %xmm2
556 # ifdef USE_AS_STRNCPY
559 movaps -2(%ecx), %xmm1
562 movaps 14(%ecx), %xmm2
563 movaps 30(%ecx), %xmm3
565 movaps 46(%ecx), %xmm4
567 movaps 62(%ecx), %xmm5
574 palignr $2, %xmm4, %xmm5
576 palignr $2, %xmm3, %xmm4
578 # ifdef USE_AS_STRNCPY
582 palignr $2, %xmm2, %xmm3
584 palignr $2, %xmm1, %xmm2
586 movaps %xmm5, 48(%edx)
587 movaps %xmm4, 32(%edx)
588 movaps %xmm3, 16(%edx)
595 movlpd 6(%ecx), %xmm1
597 movlpd %xmm1, 6(%edx)
599 jmp L(CopyFrom1To16Bytes)
603 movaps -3(%ecx), %xmm1
604 movaps 13(%ecx), %xmm2
609 # ifdef USE_AS_STRNCPY
611 jbe L(StrncpyExit3Case2OrCase3)
616 palignr $3, %xmm1, %xmm2
619 movaps 29(%ecx), %xmm2
626 # ifdef USE_AS_STRNCPY
628 jbe L(StrncpyExit3Case2OrCase3)
633 palignr $3, %xmm1, %xmm2
635 movaps 29(%ecx), %xmm2
643 # ifdef USE_AS_STRNCPY
645 jbe L(StrncpyExit3Case2OrCase3)
650 palignr $3, %xmm1, %xmm2
652 movaps 29(%ecx), %xmm2
658 # ifdef USE_AS_STRNCPY
660 jbe L(StrncpyExit3Case2OrCase3)
665 palignr $3, %xmm3, %xmm2
675 # ifdef USE_AS_STRNCPY
678 movaps -3(%ecx), %xmm1
681 movaps 13(%ecx), %xmm2
682 movaps 29(%ecx), %xmm3
684 movaps 45(%ecx), %xmm4
686 movaps 61(%ecx), %xmm5
693 palignr $3, %xmm4, %xmm5
695 palignr $3, %xmm3, %xmm4
697 # ifdef USE_AS_STRNCPY
701 palignr $3, %xmm2, %xmm3
703 palignr $3, %xmm1, %xmm2
705 movaps %xmm5, 48(%edx)
706 movaps %xmm4, 32(%edx)
707 movaps %xmm3, 16(%edx)
714 movlpd 5(%ecx), %xmm1
716 movlpd %xmm1, 5(%edx)
718 jmp L(CopyFrom1To16Bytes)
722 movaps -4(%ecx), %xmm1
723 movaps 12(%ecx), %xmm2
728 # ifdef USE_AS_STRNCPY
730 jbe L(StrncpyExit4Case2OrCase3)
735 palignr $4, %xmm1, %xmm2
738 movaps 28(%ecx), %xmm2
745 # ifdef USE_AS_STRNCPY
747 jbe L(StrncpyExit4Case2OrCase3)
752 palignr $4, %xmm1, %xmm2
754 movaps 28(%ecx), %xmm2
762 # ifdef USE_AS_STRNCPY
764 jbe L(StrncpyExit4Case2OrCase3)
769 palignr $4, %xmm1, %xmm2
771 movaps 28(%ecx), %xmm2
777 # ifdef USE_AS_STRNCPY
779 jbe L(StrncpyExit4Case2OrCase3)
784 palignr $4, %xmm3, %xmm2
794 # ifdef USE_AS_STRNCPY
797 movaps -4(%ecx), %xmm1
800 movaps 12(%ecx), %xmm2
801 movaps 28(%ecx), %xmm3
803 movaps 44(%ecx), %xmm4
805 movaps 60(%ecx), %xmm5
812 palignr $4, %xmm4, %xmm5
814 palignr $4, %xmm3, %xmm4
816 # ifdef USE_AS_STRNCPY
820 palignr $4, %xmm2, %xmm3
822 palignr $4, %xmm1, %xmm2
824 movaps %xmm5, 48(%edx)
825 movaps %xmm4, 32(%edx)
826 movaps %xmm3, 16(%edx)
837 jmp L(CopyFrom1To16Bytes)
841 movaps -5(%ecx), %xmm1
842 movaps 11(%ecx), %xmm2
847 # ifdef USE_AS_STRNCPY
849 jbe L(StrncpyExit5Case2OrCase3)
854 palignr $5, %xmm1, %xmm2
857 movaps 27(%ecx), %xmm2
864 # ifdef USE_AS_STRNCPY
866 jbe L(StrncpyExit5Case2OrCase3)
871 palignr $5, %xmm1, %xmm2
873 movaps 27(%ecx), %xmm2
881 # ifdef USE_AS_STRNCPY
883 jbe L(StrncpyExit5Case2OrCase3)
888 palignr $5, %xmm1, %xmm2
890 movaps 27(%ecx), %xmm2
896 # ifdef USE_AS_STRNCPY
898 jbe L(StrncpyExit5Case2OrCase3)
903 palignr $5, %xmm3, %xmm2
913 # ifdef USE_AS_STRNCPY
916 movaps -5(%ecx), %xmm1
919 movaps 11(%ecx), %xmm2
920 movaps 27(%ecx), %xmm3
922 movaps 43(%ecx), %xmm4
924 movaps 59(%ecx), %xmm5
931 palignr $5, %xmm4, %xmm5
933 palignr $5, %xmm3, %xmm4
935 # ifdef USE_AS_STRNCPY
939 palignr $5, %xmm2, %xmm3
941 palignr $5, %xmm1, %xmm2
943 movaps %xmm5, 48(%edx)
944 movaps %xmm4, 32(%edx)
945 movaps %xmm3, 16(%edx)
956 jmp L(CopyFrom1To16Bytes)
960 movaps -6(%ecx), %xmm1
961 movaps 10(%ecx), %xmm2
966 # ifdef USE_AS_STRNCPY
968 jbe L(StrncpyExit6Case2OrCase3)
973 palignr $6, %xmm1, %xmm2
976 movaps 26(%ecx), %xmm2
983 # ifdef USE_AS_STRNCPY
985 jbe L(StrncpyExit6Case2OrCase3)
990 palignr $6, %xmm1, %xmm2
992 movaps 26(%ecx), %xmm2
1000 # ifdef USE_AS_STRNCPY
1002 jbe L(StrncpyExit6Case2OrCase3)
1007 palignr $6, %xmm1, %xmm2
1008 movaps %xmm2, (%edx)
1009 movaps 26(%ecx), %xmm2
1011 pcmpeqb %xmm2, %xmm0
1013 pmovmskb %xmm0, %eax
1015 # ifdef USE_AS_STRNCPY
1017 jbe L(StrncpyExit6Case2OrCase3)
1022 palignr $6, %xmm3, %xmm2
1023 movaps %xmm2, (%edx)
1032 # ifdef USE_AS_STRNCPY
1035 movaps -6(%ecx), %xmm1
1038 movaps 10(%ecx), %xmm2
1039 movaps 26(%ecx), %xmm3
1041 movaps 42(%ecx), %xmm4
1043 movaps 58(%ecx), %xmm5
1047 pcmpeqb %xmm0, %xmm7
1048 pmovmskb %xmm7, %eax
1050 palignr $6, %xmm4, %xmm5
1052 palignr $6, %xmm3, %xmm4
1054 # ifdef USE_AS_STRNCPY
1056 jbe L(StrncpyLeave6)
1058 palignr $6, %xmm2, %xmm3
1060 palignr $6, %xmm1, %xmm2
1062 movaps %xmm5, 48(%edx)
1063 movaps %xmm4, 32(%edx)
1064 movaps %xmm3, 16(%edx)
1065 movaps %xmm2, (%edx)
1067 jmp L(Shl6LoopStart)
1070 movlpd (%ecx), %xmm0
1072 movlpd %xmm0, (%edx)
1075 jmp L(CopyFrom1To16Bytes)
1079 movaps -7(%ecx), %xmm1
1080 movaps 9(%ecx), %xmm2
1082 pcmpeqb %xmm2, %xmm0
1083 pmovmskb %xmm0, %eax
1085 # ifdef USE_AS_STRNCPY
1087 jbe L(StrncpyExit7Case2OrCase3)
1092 palignr $7, %xmm1, %xmm2
1094 movaps %xmm2, (%edx)
1095 movaps 25(%ecx), %xmm2
1097 pcmpeqb %xmm2, %xmm0
1099 pmovmskb %xmm0, %eax
1102 # ifdef USE_AS_STRNCPY
1104 jbe L(StrncpyExit7Case2OrCase3)
1109 palignr $7, %xmm1, %xmm2
1110 movaps %xmm2, (%edx)
1111 movaps 25(%ecx), %xmm2
1114 pcmpeqb %xmm2, %xmm0
1116 pmovmskb %xmm0, %eax
1119 # ifdef USE_AS_STRNCPY
1121 jbe L(StrncpyExit7Case2OrCase3)
1126 palignr $7, %xmm1, %xmm2
1127 movaps %xmm2, (%edx)
1128 movaps 25(%ecx), %xmm2
1130 pcmpeqb %xmm2, %xmm0
1132 pmovmskb %xmm0, %eax
1134 # ifdef USE_AS_STRNCPY
1136 jbe L(StrncpyExit7Case2OrCase3)
1141 palignr $7, %xmm3, %xmm2
1142 movaps %xmm2, (%edx)
1151 # ifdef USE_AS_STRNCPY
1154 movaps -7(%ecx), %xmm1
1157 movaps 9(%ecx), %xmm2
1158 movaps 25(%ecx), %xmm3
1160 movaps 41(%ecx), %xmm4
1162 movaps 57(%ecx), %xmm5
1166 pcmpeqb %xmm0, %xmm7
1167 pmovmskb %xmm7, %eax
1169 palignr $7, %xmm4, %xmm5
1171 palignr $7, %xmm3, %xmm4
1173 # ifdef USE_AS_STRNCPY
1175 jbe L(StrncpyLeave7)
1177 palignr $7, %xmm2, %xmm3
1179 palignr $7, %xmm1, %xmm2
1181 movaps %xmm5, 48(%edx)
1182 movaps %xmm4, 32(%edx)
1183 movaps %xmm3, 16(%edx)
1184 movaps %xmm2, (%edx)
1186 jmp L(Shl7LoopStart)
1189 movlpd (%ecx), %xmm0
1191 movlpd %xmm0, (%edx)
1194 jmp L(CopyFrom1To16Bytes)
1198 movaps -8(%ecx), %xmm1
1199 movaps 8(%ecx), %xmm2
1201 pcmpeqb %xmm2, %xmm0
1202 pmovmskb %xmm0, %eax
1204 # ifdef USE_AS_STRNCPY
1206 jbe L(StrncpyExit8Case2OrCase3)
1211 palignr $8, %xmm1, %xmm2
1213 movaps %xmm2, (%edx)
1214 movaps 24(%ecx), %xmm2
1216 pcmpeqb %xmm2, %xmm0
1218 pmovmskb %xmm0, %eax
1221 # ifdef USE_AS_STRNCPY
1223 jbe L(StrncpyExit8Case2OrCase3)
1228 palignr $8, %xmm1, %xmm2
1229 movaps %xmm2, (%edx)
1230 movaps 24(%ecx), %xmm2
1233 pcmpeqb %xmm2, %xmm0
1235 pmovmskb %xmm0, %eax
1238 # ifdef USE_AS_STRNCPY
1240 jbe L(StrncpyExit8Case2OrCase3)
1245 palignr $8, %xmm1, %xmm2
1246 movaps %xmm2, (%edx)
1247 movaps 24(%ecx), %xmm2
1249 pcmpeqb %xmm2, %xmm0
1251 pmovmskb %xmm0, %eax
1253 # ifdef USE_AS_STRNCPY
1255 jbe L(StrncpyExit8Case2OrCase3)
1260 palignr $8, %xmm3, %xmm2
1261 movaps %xmm2, (%edx)
1270 # ifdef USE_AS_STRNCPY
1273 movaps -8(%ecx), %xmm1
1276 movaps 8(%ecx), %xmm2
1277 movaps 24(%ecx), %xmm3
1279 movaps 40(%ecx), %xmm4
1281 movaps 56(%ecx), %xmm5
1285 pcmpeqb %xmm0, %xmm7
1286 pmovmskb %xmm7, %eax
1288 palignr $8, %xmm4, %xmm5
1290 palignr $8, %xmm3, %xmm4
1292 # ifdef USE_AS_STRNCPY
1294 jbe L(StrncpyLeave8)
1296 palignr $8, %xmm2, %xmm3
1298 palignr $8, %xmm1, %xmm2
1300 movaps %xmm5, 48(%edx)
1301 movaps %xmm4, 32(%edx)
1302 movaps %xmm3, 16(%edx)
1303 movaps %xmm2, (%edx)
1305 jmp L(Shl8LoopStart)
1308 movlpd (%ecx), %xmm0
1309 movlpd %xmm0, (%edx)
1311 jmp L(CopyFrom1To16Bytes)
1315 movaps -9(%ecx), %xmm1
1316 movaps 7(%ecx), %xmm2
1318 pcmpeqb %xmm2, %xmm0
1319 pmovmskb %xmm0, %eax
1321 # ifdef USE_AS_STRNCPY
1323 jbe L(StrncpyExit9Case2OrCase3)
1328 palignr $9, %xmm1, %xmm2
1330 movaps %xmm2, (%edx)
1331 movaps 23(%ecx), %xmm2
1333 pcmpeqb %xmm2, %xmm0
1335 pmovmskb %xmm0, %eax
1338 # ifdef USE_AS_STRNCPY
1340 jbe L(StrncpyExit9Case2OrCase3)
1345 palignr $9, %xmm1, %xmm2
1346 movaps %xmm2, (%edx)
1347 movaps 23(%ecx), %xmm2
1350 pcmpeqb %xmm2, %xmm0
1352 pmovmskb %xmm0, %eax
1355 # ifdef USE_AS_STRNCPY
1357 jbe L(StrncpyExit9Case2OrCase3)
1362 palignr $9, %xmm1, %xmm2
1363 movaps %xmm2, (%edx)
1364 movaps 23(%ecx), %xmm2
1366 pcmpeqb %xmm2, %xmm0
1368 pmovmskb %xmm0, %eax
1370 # ifdef USE_AS_STRNCPY
1372 jbe L(StrncpyExit9Case2OrCase3)
1377 palignr $9, %xmm3, %xmm2
1378 movaps %xmm2, (%edx)
1387 # ifdef USE_AS_STRNCPY
1390 movaps -9(%ecx), %xmm1
1393 movaps 7(%ecx), %xmm2
1394 movaps 23(%ecx), %xmm3
1396 movaps 39(%ecx), %xmm4
1398 movaps 55(%ecx), %xmm5
1402 pcmpeqb %xmm0, %xmm7
1403 pmovmskb %xmm7, %eax
1405 palignr $9, %xmm4, %xmm5
1407 palignr $9, %xmm3, %xmm4
1409 # ifdef USE_AS_STRNCPY
1411 jbe L(StrncpyLeave9)
1413 palignr $9, %xmm2, %xmm3
1415 palignr $9, %xmm1, %xmm2
1417 movaps %xmm5, 48(%edx)
1418 movaps %xmm4, 32(%edx)
1419 movaps %xmm3, 16(%edx)
1420 movaps %xmm2, (%edx)
1422 jmp L(Shl9LoopStart)
1425 movlpd -1(%ecx), %xmm0
1426 movlpd %xmm0, -1(%edx)
1428 jmp L(CopyFrom1To16Bytes)
1432 movaps -10(%ecx), %xmm1
1433 movaps 6(%ecx), %xmm2
1435 pcmpeqb %xmm2, %xmm0
1436 pmovmskb %xmm0, %eax
1438 # ifdef USE_AS_STRNCPY
1440 jbe L(StrncpyExit10Case2OrCase3)
1443 jnz L(Shl10LoopExit)
1445 palignr $10, %xmm1, %xmm2
1447 movaps %xmm2, (%edx)
1448 movaps 22(%ecx), %xmm2
1450 pcmpeqb %xmm2, %xmm0
1452 pmovmskb %xmm0, %eax
1455 # ifdef USE_AS_STRNCPY
1457 jbe L(StrncpyExit10Case2OrCase3)
1460 jnz L(Shl10LoopExit)
1462 palignr $10, %xmm1, %xmm2
1463 movaps %xmm2, (%edx)
1464 movaps 22(%ecx), %xmm2
1467 pcmpeqb %xmm2, %xmm0
1469 pmovmskb %xmm0, %eax
1472 # ifdef USE_AS_STRNCPY
1474 jbe L(StrncpyExit10Case2OrCase3)
1477 jnz L(Shl10LoopExit)
1479 palignr $10, %xmm1, %xmm2
1480 movaps %xmm2, (%edx)
1481 movaps 22(%ecx), %xmm2
1483 pcmpeqb %xmm2, %xmm0
1485 pmovmskb %xmm0, %eax
1487 # ifdef USE_AS_STRNCPY
1489 jbe L(StrncpyExit10Case2OrCase3)
1492 jnz L(Shl10LoopExit)
1494 palignr $10, %xmm3, %xmm2
1495 movaps %xmm2, (%edx)
1504 # ifdef USE_AS_STRNCPY
1507 movaps -10(%ecx), %xmm1
1510 movaps 6(%ecx), %xmm2
1511 movaps 22(%ecx), %xmm3
1513 movaps 38(%ecx), %xmm4
1515 movaps 54(%ecx), %xmm5
1519 pcmpeqb %xmm0, %xmm7
1520 pmovmskb %xmm7, %eax
1522 palignr $10, %xmm4, %xmm5
1524 palignr $10, %xmm3, %xmm4
1526 # ifdef USE_AS_STRNCPY
1528 jbe L(StrncpyLeave10)
1530 palignr $10, %xmm2, %xmm3
1532 palignr $10, %xmm1, %xmm2
1534 movaps %xmm5, 48(%edx)
1535 movaps %xmm4, 32(%edx)
1536 movaps %xmm3, 16(%edx)
1537 movaps %xmm2, (%edx)
1539 jmp L(Shl10LoopStart)
1542 movlpd -2(%ecx), %xmm0
1543 movlpd %xmm0, -2(%edx)
1545 jmp L(CopyFrom1To16Bytes)
1549 movaps -11(%ecx), %xmm1
1550 movaps 5(%ecx), %xmm2
1552 pcmpeqb %xmm2, %xmm0
1553 pmovmskb %xmm0, %eax
1555 # ifdef USE_AS_STRNCPY
1557 jbe L(StrncpyExit11Case2OrCase3)
1560 jnz L(Shl11LoopExit)
1562 palignr $11, %xmm1, %xmm2
1564 movaps %xmm2, (%edx)
1565 movaps 21(%ecx), %xmm2
1567 pcmpeqb %xmm2, %xmm0
1569 pmovmskb %xmm0, %eax
1572 # ifdef USE_AS_STRNCPY
1574 jbe L(StrncpyExit11Case2OrCase3)
1577 jnz L(Shl11LoopExit)
1579 palignr $11, %xmm1, %xmm2
1580 movaps %xmm2, (%edx)
1581 movaps 21(%ecx), %xmm2
1584 pcmpeqb %xmm2, %xmm0
1586 pmovmskb %xmm0, %eax
1589 # ifdef USE_AS_STRNCPY
1591 jbe L(StrncpyExit11Case2OrCase3)
1594 jnz L(Shl11LoopExit)
1596 palignr $11, %xmm1, %xmm2
1597 movaps %xmm2, (%edx)
1598 movaps 21(%ecx), %xmm2
1600 pcmpeqb %xmm2, %xmm0
1602 pmovmskb %xmm0, %eax
1604 # ifdef USE_AS_STRNCPY
1606 jbe L(StrncpyExit11Case2OrCase3)
1609 jnz L(Shl11LoopExit)
1611 palignr $11, %xmm3, %xmm2
1612 movaps %xmm2, (%edx)
1621 # ifdef USE_AS_STRNCPY
1624 movaps -11(%ecx), %xmm1
1627 movaps 5(%ecx), %xmm2
1628 movaps 21(%ecx), %xmm3
1630 movaps 37(%ecx), %xmm4
1632 movaps 53(%ecx), %xmm5
1636 pcmpeqb %xmm0, %xmm7
1637 pmovmskb %xmm7, %eax
1639 palignr $11, %xmm4, %xmm5
1641 palignr $11, %xmm3, %xmm4
1643 # ifdef USE_AS_STRNCPY
1645 jbe L(StrncpyLeave11)
1647 palignr $11, %xmm2, %xmm3
1649 palignr $11, %xmm1, %xmm2
1651 movaps %xmm5, 48(%edx)
1652 movaps %xmm4, 32(%edx)
1653 movaps %xmm3, 16(%edx)
1654 movaps %xmm2, (%edx)
1656 jmp L(Shl11LoopStart)
1659 movlpd -3(%ecx), %xmm0
1660 movlpd %xmm0, -3(%edx)
1662 jmp L(CopyFrom1To16Bytes)
1666 movaps -12(%ecx), %xmm1
1667 movaps 4(%ecx), %xmm2
1669 pcmpeqb %xmm2, %xmm0
1670 pmovmskb %xmm0, %eax
1672 # ifdef USE_AS_STRNCPY
1674 jbe L(StrncpyExit12Case2OrCase3)
1677 jnz L(Shl12LoopExit)
1679 palignr $12, %xmm1, %xmm2
1681 movaps %xmm2, (%edx)
1682 movaps 20(%ecx), %xmm2
1684 pcmpeqb %xmm2, %xmm0
1686 pmovmskb %xmm0, %eax
1689 # ifdef USE_AS_STRNCPY
1691 jbe L(StrncpyExit12Case2OrCase3)
1694 jnz L(Shl12LoopExit)
1696 palignr $12, %xmm1, %xmm2
1697 movaps %xmm2, (%edx)
1698 movaps 20(%ecx), %xmm2
1701 pcmpeqb %xmm2, %xmm0
1703 pmovmskb %xmm0, %eax
1706 # ifdef USE_AS_STRNCPY
1708 jbe L(StrncpyExit12Case2OrCase3)
1711 jnz L(Shl12LoopExit)
1713 palignr $12, %xmm1, %xmm2
1714 movaps %xmm2, (%edx)
1715 movaps 20(%ecx), %xmm2
1717 pcmpeqb %xmm2, %xmm0
1719 pmovmskb %xmm0, %eax
1721 # ifdef USE_AS_STRNCPY
1723 jbe L(StrncpyExit12Case2OrCase3)
1726 jnz L(Shl12LoopExit)
1728 palignr $12, %xmm3, %xmm2
1729 movaps %xmm2, (%edx)
1738 # ifdef USE_AS_STRNCPY
1741 movaps -12(%ecx), %xmm1
1744 movaps 4(%ecx), %xmm2
1745 movaps 20(%ecx), %xmm3
1747 movaps 36(%ecx), %xmm4
1749 movaps 52(%ecx), %xmm5
1753 pcmpeqb %xmm0, %xmm7
1754 pmovmskb %xmm7, %eax
1756 palignr $12, %xmm4, %xmm5
1758 palignr $12, %xmm3, %xmm4
1760 # ifdef USE_AS_STRNCPY
1762 jbe L(StrncpyLeave12)
1764 palignr $12, %xmm2, %xmm3
1766 palignr $12, %xmm1, %xmm2
1768 movaps %xmm5, 48(%edx)
1769 movaps %xmm4, 32(%edx)
1770 movaps %xmm3, 16(%edx)
1771 movaps %xmm2, (%edx)
1773 jmp L(Shl12LoopStart)
1779 jmp L(CopyFrom1To16Bytes)
1783 movaps -13(%ecx), %xmm1
1784 movaps 3(%ecx), %xmm2
1786 pcmpeqb %xmm2, %xmm0
1787 pmovmskb %xmm0, %eax
1789 # ifdef USE_AS_STRNCPY
1791 jbe L(StrncpyExit13Case2OrCase3)
1794 jnz L(Shl13LoopExit)
1796 palignr $13, %xmm1, %xmm2
1798 movaps %xmm2, (%edx)
1799 movaps 19(%ecx), %xmm2
1801 pcmpeqb %xmm2, %xmm0
1803 pmovmskb %xmm0, %eax
1806 # ifdef USE_AS_STRNCPY
1808 jbe L(StrncpyExit13Case2OrCase3)
1811 jnz L(Shl13LoopExit)
1813 palignr $13, %xmm1, %xmm2
1814 movaps %xmm2, (%edx)
1815 movaps 19(%ecx), %xmm2
1818 pcmpeqb %xmm2, %xmm0
1820 pmovmskb %xmm0, %eax
1823 # ifdef USE_AS_STRNCPY
1825 jbe L(StrncpyExit13Case2OrCase3)
1828 jnz L(Shl13LoopExit)
1830 palignr $13, %xmm1, %xmm2
1831 movaps %xmm2, (%edx)
1832 movaps 19(%ecx), %xmm2
1834 pcmpeqb %xmm2, %xmm0
1836 pmovmskb %xmm0, %eax
1838 # ifdef USE_AS_STRNCPY
1840 jbe L(StrncpyExit13Case2OrCase3)
1843 jnz L(Shl13LoopExit)
1845 palignr $13, %xmm3, %xmm2
1846 movaps %xmm2, (%edx)
1855 # ifdef USE_AS_STRNCPY
1858 movaps -13(%ecx), %xmm1
1861 movaps 3(%ecx), %xmm2
1862 movaps 19(%ecx), %xmm3
1864 movaps 35(%ecx), %xmm4
1866 movaps 51(%ecx), %xmm5
1870 pcmpeqb %xmm0, %xmm7
1871 pmovmskb %xmm7, %eax
1873 palignr $13, %xmm4, %xmm5
1875 palignr $13, %xmm3, %xmm4
1877 # ifdef USE_AS_STRNCPY
1879 jbe L(StrncpyLeave13)
1881 palignr $13, %xmm2, %xmm3
1883 palignr $13, %xmm1, %xmm2
1885 movaps %xmm5, 48(%edx)
1886 movaps %xmm4, 32(%edx)
1887 movaps %xmm3, 16(%edx)
1888 movaps %xmm2, (%edx)
1890 jmp L(Shl13LoopStart)
1896 jmp L(CopyFrom1To16Bytes)
1900 movaps -14(%ecx), %xmm1
1901 movaps 2(%ecx), %xmm2
1903 pcmpeqb %xmm2, %xmm0
1904 pmovmskb %xmm0, %eax
1906 # ifdef USE_AS_STRNCPY
1908 jbe L(StrncpyExit14Case2OrCase3)
1911 jnz L(Shl14LoopExit)
1913 palignr $14, %xmm1, %xmm2
1915 movaps %xmm2, (%edx)
1916 movaps 18(%ecx), %xmm2
1918 pcmpeqb %xmm2, %xmm0
1920 pmovmskb %xmm0, %eax
1923 # ifdef USE_AS_STRNCPY
1925 jbe L(StrncpyExit14Case2OrCase3)
1928 jnz L(Shl14LoopExit)
1930 palignr $14, %xmm1, %xmm2
1931 movaps %xmm2, (%edx)
1932 movaps 18(%ecx), %xmm2
1935 pcmpeqb %xmm2, %xmm0
1937 pmovmskb %xmm0, %eax
1940 # ifdef USE_AS_STRNCPY
1942 jbe L(StrncpyExit14Case2OrCase3)
1945 jnz L(Shl14LoopExit)
1947 palignr $14, %xmm1, %xmm2
1948 movaps %xmm2, (%edx)
1949 movaps 18(%ecx), %xmm2
1951 pcmpeqb %xmm2, %xmm0
1953 pmovmskb %xmm0, %eax
1955 # ifdef USE_AS_STRNCPY
1957 jbe L(StrncpyExit14Case2OrCase3)
1960 jnz L(Shl14LoopExit)
1962 palignr $14, %xmm3, %xmm2
1963 movaps %xmm2, (%edx)
1972 # ifdef USE_AS_STRNCPY
1975 movaps -14(%ecx), %xmm1
1978 movaps 2(%ecx), %xmm2
1979 movaps 18(%ecx), %xmm3
1981 movaps 34(%ecx), %xmm4
1983 movaps 50(%ecx), %xmm5
1987 pcmpeqb %xmm0, %xmm7
1988 pmovmskb %xmm7, %eax
1990 palignr $14, %xmm4, %xmm5
1992 palignr $14, %xmm3, %xmm4
1994 # ifdef USE_AS_STRNCPY
1996 jbe L(StrncpyLeave14)
1998 palignr $14, %xmm2, %xmm3
2000 palignr $14, %xmm1, %xmm2
2002 movaps %xmm5, 48(%edx)
2003 movaps %xmm4, 32(%edx)
2004 movaps %xmm3, 16(%edx)
2005 movaps %xmm2, (%edx)
2007 jmp L(Shl14LoopStart)
2013 jmp L(CopyFrom1To16Bytes)
2017 movaps -15(%ecx), %xmm1
2018 movaps 1(%ecx), %xmm2
2020 pcmpeqb %xmm2, %xmm0
2021 pmovmskb %xmm0, %eax
2023 # ifdef USE_AS_STRNCPY
2025 jbe L(StrncpyExit15Case2OrCase3)
2028 jnz L(Shl15LoopExit)
2030 palignr $15, %xmm1, %xmm2
2032 movaps %xmm2, (%edx)
2033 movaps 17(%ecx), %xmm2
2035 pcmpeqb %xmm2, %xmm0
2037 pmovmskb %xmm0, %eax
2040 # ifdef USE_AS_STRNCPY
2042 jbe L(StrncpyExit15Case2OrCase3)
2045 jnz L(Shl15LoopExit)
2047 palignr $15, %xmm1, %xmm2
2048 movaps %xmm2, (%edx)
2049 movaps 17(%ecx), %xmm2
2052 pcmpeqb %xmm2, %xmm0
2054 pmovmskb %xmm0, %eax
2057 # ifdef USE_AS_STRNCPY
2059 jbe L(StrncpyExit15Case2OrCase3)
2062 jnz L(Shl15LoopExit)
2064 palignr $15, %xmm1, %xmm2
2065 movaps %xmm2, (%edx)
2066 movaps 17(%ecx), %xmm2
2068 pcmpeqb %xmm2, %xmm0
2070 pmovmskb %xmm0, %eax
2072 # ifdef USE_AS_STRNCPY
2074 jbe L(StrncpyExit15Case2OrCase3)
2077 jnz L(Shl15LoopExit)
2079 palignr $15, %xmm3, %xmm2
2080 movaps %xmm2, (%edx)
2089 # ifdef USE_AS_STRNCPY
2092 movaps -15(%ecx), %xmm1
2095 movaps 1(%ecx), %xmm2
2096 movaps 17(%ecx), %xmm3
2098 movaps 33(%ecx), %xmm4
2100 movaps 49(%ecx), %xmm5
2104 pcmpeqb %xmm0, %xmm7
2105 pmovmskb %xmm7, %eax
2107 palignr $15, %xmm4, %xmm5
2109 palignr $15, %xmm3, %xmm4
2111 # ifdef USE_AS_STRNCPY
2113 jbe L(StrncpyLeave15)
2115 palignr $15, %xmm2, %xmm3
2117 palignr $15, %xmm1, %xmm2
2119 movaps %xmm5, 48(%edx)
2120 movaps %xmm4, 32(%edx)
2121 movaps %xmm3, 16(%edx)
2122 movaps %xmm2, (%edx)
2124 jmp L(Shl15LoopStart)
2130 # ifdef USE_AS_STRCAT
2131 jmp L(CopyFrom1To16Bytes)
2135 # ifndef USE_AS_STRCAT
2138 L(CopyFrom1To16Bytes):
2139 # ifdef USE_AS_STRNCPY
2149 L(CopyFrom1To16BytesLess8):
2166 # ifdef USE_AS_STRNCPY
2169 jnz L(StrncpyFillTailWithZero1)
2170 # ifdef USE_AS_STPCPY
2188 movlpd (%ecx), %xmm0
2189 movlpd %xmm0, (%edx)
2191 # ifdef USE_AS_STRNCPY
2194 jnz L(StrncpyFillTailWithZero1)
2195 # ifdef USE_AS_STPCPY
2217 movlpd (%ecx), %xmm0
2219 movlpd %xmm0, (%edx)
2222 # ifdef USE_AS_STRNCPY
2225 jnz L(StrncpyFillTailWithZero1)
2226 # ifdef USE_AS_STPCPY
2244 movdqu (%ecx), %xmm0
2245 movdqu %xmm0, (%edx)
2247 # ifdef USE_AS_STRNCPY
2250 jnz L(StrncpyFillTailWithZero1)
2251 # ifdef USE_AS_STPCPY
2258 # ifdef USE_AS_STRNCPY
2263 L(CopyFrom1To16BytesCase2):
2274 ja L(CopyFrom1To16BytesLess8)
2309 jbe L(CopyFrom1To16BytesLess8Case3)
2344 L(CopyFrom1To16BytesCase2OrCase3):
2346 jnz L(CopyFrom1To16BytesCase2)
2349 L(CopyFrom1To16BytesCase3):
2357 ja L(ExitHigh8Case3)
2359 L(CopyFrom1To16BytesLess8Case3):
2361 ja L(ExitHigh4Case3)
2382 movlpd (%ecx), %xmm0
2383 movlpd %xmm0, (%edx)
2390 ja L(ExitHigh12Case3)
2398 movlpd (%ecx), %xmm0
2400 movlpd %xmm0, (%edx)
2413 movlpd (%ecx), %xmm0
2414 movlpd 8(%ecx), %xmm1
2415 movlpd %xmm0, (%edx)
2416 movlpd %xmm1, 8(%edx)
2427 # ifdef USE_AS_STRNCPY
2430 jnz L(StrncpyFillTailWithZero1)
2431 # ifdef USE_AS_STPCPY
2443 # ifdef USE_AS_STRNCPY
2446 jnz L(StrncpyFillTailWithZero1)
2447 # ifdef USE_AS_STPCPY
2461 # ifdef USE_AS_STRNCPY
2464 jnz L(StrncpyFillTailWithZero1)
2465 # ifdef USE_AS_STPCPY
2479 # ifdef USE_AS_STRNCPY
2482 jnz L(StrncpyFillTailWithZero1)
2483 # ifdef USE_AS_STPCPY
2497 # ifdef USE_AS_STRNCPY
2500 jnz L(StrncpyFillTailWithZero1)
2501 # ifdef USE_AS_STPCPY
2515 # ifdef USE_AS_STRNCPY
2518 jnz L(StrncpyFillTailWithZero1)
2519 # ifdef USE_AS_STPCPY
2528 movlpd (%ecx), %xmm0
2530 movlpd %xmm0, (%edx)
2533 # ifdef USE_AS_STRNCPY
2536 jnz L(StrncpyFillTailWithZero1)
2537 # ifdef USE_AS_STPCPY
2546 movlpd (%ecx), %xmm0
2548 movlpd %xmm0, (%edx)
2551 # ifdef USE_AS_STRNCPY
2554 jnz L(StrncpyFillTailWithZero1)
2555 # ifdef USE_AS_STPCPY
2564 movlpd (%ecx), %xmm0
2566 movlpd %xmm0, (%edx)
2569 # ifdef USE_AS_STRNCPY
2572 jnz L(StrncpyFillTailWithZero1)
2573 # ifdef USE_AS_STPCPY
2582 movlpd (%ecx), %xmm0
2583 movlpd 5(%ecx), %xmm1
2584 movlpd %xmm0, (%edx)
2585 movlpd %xmm1, 5(%edx)
2587 # ifdef USE_AS_STRNCPY
2590 jnz L(StrncpyFillTailWithZero1)
2591 # ifdef USE_AS_STPCPY
2600 movlpd (%ecx), %xmm0
2601 movlpd 6(%ecx), %xmm1
2602 movlpd %xmm0, (%edx)
2603 movlpd %xmm1, 6(%edx)
2605 # ifdef USE_AS_STRNCPY
2608 jnz L(StrncpyFillTailWithZero1)
2609 # ifdef USE_AS_STPCPY
2618 movlpd (%ecx), %xmm0
2619 movlpd 7(%ecx), %xmm1
2620 movlpd %xmm0, (%edx)
2621 movlpd %xmm1, 7(%edx)
2623 # ifdef USE_AS_STRNCPY
2626 jnz L(StrncpyFillTailWithZero1)
2627 # ifdef USE_AS_STPCPY
2636 # ifdef USE_AS_STRNCPY
2682 movlpd %xmm0, (%ecx)
2687 movlpd %xmm0, (%ecx)
2693 movlpd %xmm0, (%ecx)
2699 movlpd %xmm0, (%ecx)
2705 movlpd %xmm0, (%ecx)
2711 movlpd %xmm0, (%ecx)
2712 movlpd %xmm0, 5(%ecx)
2717 movlpd %xmm0, (%ecx)
2718 movlpd %xmm0, 6(%ecx)
2723 movlpd %xmm0, (%ecx)
2724 movlpd %xmm0, 7(%ecx)
2729 movlpd %xmm0, (%ecx)
2730 movlpd %xmm0, 8(%ecx)
2734 L(StrncpyFillExit1):
2736 L(FillFrom1To16Bytes):
2751 L(FillMore8): /* but less than 16 */
2759 L(FillMore4): /* but less than 8 */
2764 L(FillLess12): /* but more than 8 */
2773 L(StrncpyFillTailWithZero1):
2775 L(StrncpyFillTailWithZero):
2779 jbe L(StrncpyFillExit1)
2781 movlpd %xmm0, (%ecx)
2782 movlpd %xmm0, 8(%ecx)
2792 jb L(StrncpyFillLess64)
2794 L(StrncpyFillLoopMovdqa):
2795 movdqa %xmm0, (%ecx)
2796 movdqa %xmm0, 16(%ecx)
2797 movdqa %xmm0, 32(%ecx)
2798 movdqa %xmm0, 48(%ecx)
2801 jae L(StrncpyFillLoopMovdqa)
2803 L(StrncpyFillLess64):
2805 jl L(StrncpyFillLess32)
2806 movdqa %xmm0, (%ecx)
2807 movdqa %xmm0, 16(%ecx)
2810 jl L(StrncpyFillExit1)
2811 movdqa %xmm0, (%ecx)
2813 jmp L(FillFrom1To16Bytes)
2815 L(StrncpyFillLess32):
2817 jl L(StrncpyFillExit1)
2818 movdqa %xmm0, (%ecx)
2820 jmp L(FillFrom1To16Bytes)
2827 SAVE_RESULT_TAIL (0)
2828 # ifdef USE_AS_STRNCPY
2831 jnz L(StrncpyFillTailWithZero)
2832 # ifdef USE_AS_STPCPY
2843 SAVE_RESULT_TAIL (1)
2844 # ifdef USE_AS_STRNCPY
2847 jnz L(StrncpyFillTailWithZero)
2848 # ifdef USE_AS_STPCPY
2861 SAVE_RESULT_TAIL (2)
2862 # ifdef USE_AS_STRNCPY
2865 jnz L(StrncpyFillTailWithZero)
2866 # ifdef USE_AS_STPCPY
2877 SAVE_RESULT_TAIL (3)
2878 # ifdef USE_AS_STRNCPY
2881 jnz L(StrncpyFillTailWithZero)
2882 # ifdef USE_AS_STPCPY
2895 SAVE_RESULT_TAIL (4)
2896 # ifdef USE_AS_STRNCPY
2899 jnz L(StrncpyFillTailWithZero)
2900 # ifdef USE_AS_STPCPY
2913 SAVE_RESULT_TAIL (5)
2914 # ifdef USE_AS_STRNCPY
2917 jnz L(StrncpyFillTailWithZero)
2918 # ifdef USE_AS_STPCPY
2931 SAVE_RESULT_TAIL (6)
2932 # ifdef USE_AS_STRNCPY
2935 jnz L(StrncpyFillTailWithZero)
2936 # ifdef USE_AS_STPCPY
2945 movlpd (%ecx), %xmm0
2946 movlpd %xmm0, (%edx)
2947 SAVE_RESULT_TAIL (7)
2948 # ifdef USE_AS_STRNCPY
2951 jnz L(StrncpyFillTailWithZero)
2957 movlpd (%ecx), %xmm0
2959 movlpd %xmm0, (%edx)
2961 SAVE_RESULT_TAIL (8)
2962 # ifdef USE_AS_STRNCPY
2965 jnz L(StrncpyFillTailWithZero)
2966 # ifdef USE_AS_STPCPY
2975 movlpd (%ecx), %xmm0
2977 movlpd %xmm0, (%edx)
2979 SAVE_RESULT_TAIL (9)
2980 # ifdef USE_AS_STRNCPY
2983 jnz L(StrncpyFillTailWithZero)
2984 # ifdef USE_AS_STPCPY
2993 movlpd (%ecx), %xmm0
2995 movlpd %xmm0, (%edx)
2997 SAVE_RESULT_TAIL (10)
2998 # ifdef USE_AS_STRNCPY
3001 jnz L(StrncpyFillTailWithZero)
3002 # ifdef USE_AS_STPCPY
3011 movlpd (%ecx), %xmm0
3013 movlpd %xmm0, (%edx)
3015 SAVE_RESULT_TAIL (11)
3016 # ifdef USE_AS_STRNCPY
3019 jnz L(StrncpyFillTailWithZero)
3020 # ifdef USE_AS_STPCPY
3029 movlpd (%ecx), %xmm0
3030 movlpd 5(%ecx), %xmm1
3031 movlpd %xmm0, (%edx)
3032 movlpd %xmm1, 5(%edx)
3033 SAVE_RESULT_TAIL (12)
3034 # ifdef USE_AS_STRNCPY
3037 jnz L(StrncpyFillTailWithZero)
3038 # ifdef USE_AS_STPCPY
3047 movlpd (%ecx), %xmm0
3048 movlpd 6(%ecx), %xmm1
3049 movlpd %xmm0, (%edx)
3050 movlpd %xmm1, 6(%edx)
3051 SAVE_RESULT_TAIL (13)
3052 # ifdef USE_AS_STRNCPY
3055 jnz L(StrncpyFillTailWithZero)
3056 # ifdef USE_AS_STPCPY
3065 movlpd (%ecx), %xmm0
3066 movlpd 7(%ecx), %xmm1
3067 movlpd %xmm0, (%edx)
3068 movlpd %xmm1, 7(%edx)
3069 SAVE_RESULT_TAIL (14)
3070 # ifdef USE_AS_STRNCPY
3073 jnz L(StrncpyFillTailWithZero)
3079 movdqu (%ecx), %xmm0
3080 movdqu %xmm0, (%edx)
3081 SAVE_RESULT_TAIL (15)
3082 # ifdef USE_AS_STRNCPY
3085 jnz L(StrncpyFillTailWithZero)
3086 # ifdef USE_AS_STPCPY
3094 # ifdef USE_AS_STRNCPY
3095 # ifndef USE_AS_STRCAT
3100 L(StrncpyLeaveCase2OrCase3):
3102 jnz L(Aligned64LeaveCase2)
3104 L(Aligned64LeaveCase3):
3106 jle L(CopyFrom1To16BytesCase3)
3107 movaps %xmm4, -64(%edx)
3110 jbe L(CopyFrom1To16BytesCase3)
3111 movaps %xmm5, -48(%edx)
3114 jbe L(CopyFrom1To16BytesCase3)
3115 movaps %xmm6, -32(%edx)
3118 jmp L(CopyFrom1To16BytesCase3)
3120 L(Aligned64LeaveCase2):
3121 pcmpeqb %xmm4, %xmm0
3122 pmovmskb %xmm0, %eax
3124 jle L(CopyFrom1To16BytesCase2OrCase3)
3126 jnz L(CopyFrom1To16Bytes)
3128 pcmpeqb %xmm5, %xmm0
3129 pmovmskb %xmm0, %eax
3130 movaps %xmm4, -64(%edx)
3133 jbe L(CopyFrom1To16BytesCase2OrCase3)
3135 jnz L(CopyFrom1To16Bytes)
3137 pcmpeqb %xmm6, %xmm0
3138 pmovmskb %xmm0, %eax
3139 movaps %xmm5, -48(%edx)
3142 jbe L(CopyFrom1To16BytesCase2OrCase3)
3144 jnz L(CopyFrom1To16Bytes)
3146 pcmpeqb %xmm7, %xmm0
3147 pmovmskb %xmm0, %eax
3148 movaps %xmm6, -32(%edx)
3151 jmp L(CopyFrom1To16BytesCase2)
3153 /*--------------------------------------------------*/
3155 L(StrncpyExit1Case2OrCase3):
3156 movlpd (%ecx), %xmm0
3157 movlpd 7(%ecx), %xmm1
3158 movlpd %xmm0, (%edx)
3159 movlpd %xmm1, 7(%edx)
3162 jnz L(CopyFrom1To16BytesCase2)
3163 jmp L(CopyFrom1To16BytesCase3)
3166 L(StrncpyExit2Case2OrCase3):
3167 movlpd (%ecx), %xmm0
3168 movlpd 6(%ecx), %xmm1
3169 movlpd %xmm0, (%edx)
3170 movlpd %xmm1, 6(%edx)
3173 jnz L(CopyFrom1To16BytesCase2)
3174 jmp L(CopyFrom1To16BytesCase3)
3177 L(StrncpyExit3Case2OrCase3):
3178 movlpd (%ecx), %xmm0
3179 movlpd 5(%ecx), %xmm1
3180 movlpd %xmm0, (%edx)
3181 movlpd %xmm1, 5(%edx)
3184 jnz L(CopyFrom1To16BytesCase2)
3185 jmp L(CopyFrom1To16BytesCase3)
3188 L(StrncpyExit4Case2OrCase3):
3189 movlpd (%ecx), %xmm0
3191 movlpd %xmm0, (%edx)
3195 jnz L(CopyFrom1To16BytesCase2)
3196 jmp L(CopyFrom1To16BytesCase3)
3199 L(StrncpyExit5Case2OrCase3):
3200 movlpd (%ecx), %xmm0
3202 movlpd %xmm0, (%edx)
3206 jnz L(CopyFrom1To16BytesCase2)
3207 jmp L(CopyFrom1To16BytesCase3)
3210 L(StrncpyExit6Case2OrCase3):
3211 movlpd (%ecx), %xmm0
3213 movlpd %xmm0, (%edx)
3217 jnz L(CopyFrom1To16BytesCase2)
3218 jmp L(CopyFrom1To16BytesCase3)
3221 L(StrncpyExit7Case2OrCase3):
3222 movlpd (%ecx), %xmm0
3224 movlpd %xmm0, (%edx)
3228 jnz L(CopyFrom1To16BytesCase2)
3229 jmp L(CopyFrom1To16BytesCase3)
3232 L(StrncpyExit8Case2OrCase3):
3233 movlpd (%ecx), %xmm0
3234 movlpd %xmm0, (%edx)
3237 jnz L(CopyFrom1To16BytesCase2)
3238 jmp L(CopyFrom1To16BytesCase3)
3241 L(StrncpyExit9Case2OrCase3):
3242 movlpd (%ecx), %xmm0
3243 movlpd %xmm0, (%edx)
3246 jnz L(CopyFrom1To16BytesCase2)
3247 jmp L(CopyFrom1To16BytesCase3)
3250 L(StrncpyExit10Case2OrCase3):
3251 movlpd -1(%ecx), %xmm0
3252 movlpd %xmm0, -1(%edx)
3255 jnz L(CopyFrom1To16BytesCase2)
3256 jmp L(CopyFrom1To16BytesCase3)
3259 L(StrncpyExit11Case2OrCase3):
3260 movlpd -2(%ecx), %xmm0
3261 movlpd %xmm0, -2(%edx)
3264 jnz L(CopyFrom1To16BytesCase2)
3265 jmp L(CopyFrom1To16BytesCase3)
3268 L(StrncpyExit12Case2OrCase3):
3273 jnz L(CopyFrom1To16BytesCase2)
3274 jmp L(CopyFrom1To16BytesCase3)
3277 L(StrncpyExit13Case2OrCase3):
3282 jnz L(CopyFrom1To16BytesCase2)
3283 jmp L(CopyFrom1To16BytesCase3)
3286 L(StrncpyExit14Case2OrCase3):
3291 jnz L(CopyFrom1To16BytesCase2)
3292 jmp L(CopyFrom1To16BytesCase3)
3295 L(StrncpyExit15Case2OrCase3):
3300 jnz L(CopyFrom1To16BytesCase2)
3301 jmp L(CopyFrom1To16BytesCase3)
3307 palignr $1, %xmm1, %xmm2
3308 movaps %xmm2, (%edx)
3309 movaps 31(%ecx), %xmm2
3313 palignr $1, %xmm3, %xmm2
3314 movaps %xmm2, 16(%edx)
3318 movaps %xmm4, 32(%edx)
3322 movaps %xmm5, 48(%edx)
3326 lea 15(%edx, %esi), %edx
3327 lea 15(%ecx, %esi), %ecx
3328 movdqu -16(%ecx), %xmm0
3330 movdqu %xmm0, -16(%edx)
3331 jmp L(CopyFrom1To16BytesCase3)
3337 palignr $2, %xmm1, %xmm2
3338 movaps %xmm2, (%edx)
3339 movaps 30(%ecx), %xmm2
3343 palignr $2, %xmm3, %xmm2
3344 movaps %xmm2, 16(%edx)
3348 movaps %xmm4, 32(%edx)
3352 movaps %xmm5, 48(%edx)
3356 lea 14(%edx, %esi), %edx
3357 lea 14(%ecx, %esi), %ecx
3358 movdqu -16(%ecx), %xmm0
3360 movdqu %xmm0, -16(%edx)
3361 jmp L(CopyFrom1To16BytesCase3)
3367 palignr $3, %xmm1, %xmm2
3368 movaps %xmm2, (%edx)
3369 movaps 29(%ecx), %xmm2
3373 palignr $3, %xmm3, %xmm2
3374 movaps %xmm2, 16(%edx)
3378 movaps %xmm4, 32(%edx)
3382 movaps %xmm5, 48(%edx)
3386 lea 13(%edx, %esi), %edx
3387 lea 13(%ecx, %esi), %ecx
3388 movdqu -16(%ecx), %xmm0
3390 movdqu %xmm0, -16(%edx)
3391 jmp L(CopyFrom1To16BytesCase3)
3397 palignr $4, %xmm1, %xmm2
3398 movaps %xmm2, (%edx)
3399 movaps 28(%ecx), %xmm2
3403 palignr $4, %xmm3, %xmm2
3404 movaps %xmm2, 16(%edx)
3408 movaps %xmm4, 32(%edx)
3412 movaps %xmm5, 48(%edx)
3416 lea 12(%edx, %esi), %edx
3417 lea 12(%ecx, %esi), %ecx
3418 movlpd -12(%ecx), %xmm0
3420 movlpd %xmm0, -12(%edx)
3423 jmp L(CopyFrom1To16BytesCase3)
3429 palignr $5, %xmm1, %xmm2
3430 movaps %xmm2, (%edx)
3431 movaps 27(%ecx), %xmm2
3435 palignr $5, %xmm3, %xmm2
3436 movaps %xmm2, 16(%edx)
3440 movaps %xmm4, 32(%edx)
3444 movaps %xmm5, 48(%edx)
3448 lea 11(%edx, %esi), %edx
3449 lea 11(%ecx, %esi), %ecx
3450 movlpd -11(%ecx), %xmm0
3452 movlpd %xmm0, -11(%edx)
3455 jmp L(CopyFrom1To16BytesCase3)
3461 palignr $6, %xmm1, %xmm2
3462 movaps %xmm2, (%edx)
3463 movaps 26(%ecx), %xmm2
3467 palignr $6, %xmm3, %xmm2
3468 movaps %xmm2, 16(%edx)
3472 movaps %xmm4, 32(%edx)
3476 movaps %xmm5, 48(%edx)
3480 lea 10(%edx, %esi), %edx
3481 lea 10(%ecx, %esi), %ecx
3483 movlpd -10(%ecx), %xmm0
3485 movlpd %xmm0, -10(%edx)
3488 jmp L(CopyFrom1To16BytesCase3)
3494 palignr $7, %xmm1, %xmm2
3495 movaps %xmm2, (%edx)
3496 movaps 25(%ecx), %xmm2
3500 palignr $7, %xmm3, %xmm2
3501 movaps %xmm2, 16(%edx)
3505 movaps %xmm4, 32(%edx)
3509 movaps %xmm5, 48(%edx)
3513 lea 9(%edx, %esi), %edx
3514 lea 9(%ecx, %esi), %ecx
3516 movlpd -9(%ecx), %xmm0
3518 movlpd %xmm0, -9(%edx)
3521 jmp L(CopyFrom1To16BytesCase3)
3527 palignr $8, %xmm1, %xmm2
3528 movaps %xmm2, (%edx)
3529 movaps 24(%ecx), %xmm2
3533 palignr $8, %xmm3, %xmm2
3534 movaps %xmm2, 16(%edx)
3538 movaps %xmm4, 32(%edx)
3542 movaps %xmm5, 48(%edx)
3546 lea 8(%edx, %esi), %edx
3547 lea 8(%ecx, %esi), %ecx
3548 movlpd -8(%ecx), %xmm0
3549 movlpd %xmm0, -8(%edx)
3551 jmp L(CopyFrom1To16BytesCase3)
3557 palignr $9, %xmm1, %xmm2
3558 movaps %xmm2, (%edx)
3559 movaps 23(%ecx), %xmm2
3563 palignr $9, %xmm3, %xmm2
3564 movaps %xmm2, 16(%edx)
3568 movaps %xmm4, 32(%edx)
3572 movaps %xmm5, 48(%edx)
3576 lea 7(%edx, %esi), %edx
3577 lea 7(%ecx, %esi), %ecx
3579 movlpd -8(%ecx), %xmm0
3580 movlpd %xmm0, -8(%edx)
3582 jmp L(CopyFrom1To16BytesCase3)
3587 jle L(StrncpyExit10)
3588 palignr $10, %xmm1, %xmm2
3589 movaps %xmm2, (%edx)
3590 movaps 22(%ecx), %xmm2
3593 jbe L(StrncpyExit10)
3594 palignr $10, %xmm3, %xmm2
3595 movaps %xmm2, 16(%edx)
3598 jbe L(StrncpyExit10)
3599 movaps %xmm4, 32(%edx)
3602 jbe L(StrncpyExit10)
3603 movaps %xmm5, 48(%edx)
3607 lea 6(%edx, %esi), %edx
3608 lea 6(%ecx, %esi), %ecx
3610 movlpd -8(%ecx), %xmm0
3611 movlpd %xmm0, -8(%edx)
3613 jmp L(CopyFrom1To16BytesCase3)
3618 jle L(StrncpyExit11)
3619 palignr $11, %xmm1, %xmm2
3620 movaps %xmm2, (%edx)
3621 movaps 21(%ecx), %xmm2
3624 jbe L(StrncpyExit11)
3625 palignr $11, %xmm3, %xmm2
3626 movaps %xmm2, 16(%edx)
3629 jbe L(StrncpyExit11)
3630 movaps %xmm4, 32(%edx)
3633 jbe L(StrncpyExit11)
3634 movaps %xmm5, 48(%edx)
3638 lea 5(%edx, %esi), %edx
3639 lea 5(%ecx, %esi), %ecx
3645 jmp L(CopyFrom1To16BytesCase3)
3650 jle L(StrncpyExit12)
3651 palignr $12, %xmm1, %xmm2
3652 movaps %xmm2, (%edx)
3653 movaps 20(%ecx), %xmm2
3656 jbe L(StrncpyExit12)
3657 palignr $12, %xmm3, %xmm2
3658 movaps %xmm2, 16(%edx)
3661 jbe L(StrncpyExit12)
3662 movaps %xmm4, 32(%edx)
3665 jbe L(StrncpyExit12)
3666 movaps %xmm5, 48(%edx)
3670 lea 4(%edx, %esi), %edx
3671 lea 4(%ecx, %esi), %ecx
3675 jmp L(CopyFrom1To16BytesCase3)
3680 jle L(StrncpyExit13)
3681 palignr $13, %xmm1, %xmm2
3682 movaps %xmm2, (%edx)
3683 movaps 19(%ecx), %xmm2
3686 jbe L(StrncpyExit13)
3687 palignr $13, %xmm3, %xmm2
3688 movaps %xmm2, 16(%edx)
3691 jbe L(StrncpyExit13)
3692 movaps %xmm4, 32(%edx)
3695 jbe L(StrncpyExit13)
3696 movaps %xmm5, 48(%edx)
3700 lea 3(%edx, %esi), %edx
3701 lea 3(%ecx, %esi), %ecx
3706 jmp L(CopyFrom1To16BytesCase3)
3711 jle L(StrncpyExit14)
3712 palignr $14, %xmm1, %xmm2
3713 movaps %xmm2, (%edx)
3714 movaps 18(%ecx), %xmm2
3717 jbe L(StrncpyExit14)
3718 palignr $14, %xmm3, %xmm2
3719 movaps %xmm2, 16(%edx)
3722 jbe L(StrncpyExit14)
3723 movaps %xmm4, 32(%edx)
3726 jbe L(StrncpyExit14)
3727 movaps %xmm5, 48(%edx)
3731 lea 2(%edx, %esi), %edx
3732 lea 2(%ecx, %esi), %ecx
3736 jmp L(CopyFrom1To16BytesCase3)
3741 jle L(StrncpyExit15)
3742 palignr $15, %xmm1, %xmm2
3743 movaps %xmm2, (%edx)
3744 movaps 17(%ecx), %xmm2
3747 jbe L(StrncpyExit15)
3748 palignr $15, %xmm3, %xmm2
3749 movaps %xmm2, 16(%edx)
3752 jbe L(StrncpyExit15)
3753 movaps %xmm4, 32(%edx)
3756 jbe L(StrncpyExit15)
3757 movaps %xmm5, 48(%edx)
3761 lea 1(%edx, %esi), %edx
3762 lea 1(%ecx, %esi), %ecx
3766 jmp L(CopyFrom1To16BytesCase3)
3769 # ifndef USE_AS_STRCAT
3770 # ifdef USE_AS_STRNCPY
3780 L(StrncpyExit15Bytes):
3782 jbe L(StrncpyExit12Bytes)
3799 movlpd (%ecx), %xmm0
3800 movlpd 7(%ecx), %xmm1
3801 movlpd %xmm0, (%edx)
3802 movlpd %xmm1, 7(%edx)
3803 # ifdef USE_AS_STPCPY
3813 L(StrncpyExit12Bytes):
3826 movlpd (%ecx), %xmm0
3828 movlpd %xmm0, (%edx)
3830 SAVE_RESULT_TAIL (11)
3831 # ifdef USE_AS_STPCPY
3838 L(StrncpyExit8Bytes):
3840 jbe L(StrncpyExit4Bytes)
3862 movlpd (%ecx), %xmm0
3863 movlpd %xmm0, (%edx)
3864 # ifdef USE_AS_STPCPY
3874 L(StrncpyExit4Bytes):
3891 SAVE_RESULT_TAIL (3)
3892 # ifdef USE_AS_STPCPY