2 Copyright (C) 2009 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 #include <ifunc-defines.h>
24 #if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY)
26 # define STRCPY strcpy
31 # ifdef USE_AS_STRNCPY
32 # define STRCPY_SSSE3 __stpncpy_ssse3
33 # define STRCPY_SSE2 __stpncpy_sse2
34 # define __GI_STRCPY __GI_stpncpy
36 # define STRCPY_SSSE3 __stpcpy_ssse3
37 # define STRCPY_SSE2 __stpcpy_sse2
38 # define __GI_STRCPY __GI_stpcpy
39 # define __GI___STRCPY __GI___stpcpy
42 # ifdef USE_AS_STRNCPY
43 # define STRCPY_SSSE3 __strncpy_ssse3
44 # define STRCPY_SSE2 __strncpy_sse2
45 # define __GI_STRCPY __GI_strncpy
47 # define STRCPY_SSSE3 __strcpy_ssse3
48 # define STRCPY_SSE2 __strcpy_sse2
49 # define __GI_STRCPY __GI_strcpy
57 /* Define multiple versions only for the definition in libc. */
61 .type STRCPY, @gnu_indirect_function
62 cmpl $0, __cpu_features+KIND_OFFSET(%rip)
64 call __init_cpu_features
65 1: leaq STRCPY_SSE2(%rip), %rax
66 testl $(1<<9), __cpu_features+CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET(%rip)
68 /* Avoid SSSE3 strcpy on Atom since it is slow. */
69 cmpl $1, __cpu_features+KIND_OFFSET(%rip)
71 cmpl $6, __cpu_features+FAMILY_OFFSET(%rip)
73 cmpl $28, __cpu_features+MODEL_OFFSET(%rip)
75 2: leaq STRCPY_SSSE3(%rip), %rax
79 .section .text.ssse3,"ax",@progbits
85 * This implementation uses SSE to copy up to 16 bytes at a time.
89 jz LABEL(strncpy_exitz)
95 and $0xfffffffffffffff0, %rsi /*force rsi 16 byte align*/
97 mov %rdi, %rax /*store return parameter*/
100 pxor %xmm0, %xmm0 /* clear %xmm0 */
101 pcmpeqb (%rsi), %xmm0 /* compare 16 bytes in (%rsi) and %xmm0 for equality, try to find null char*/
102 pmovmskb %xmm0, %edx /* move each byte mask of %xmm0 to edx*/
103 shr %cl, %edx /* get real bits left in edx*/
104 test %edx, %edx /* edx must be 0 if there is no null char from rsi+%rcx */
105 jnz LABEL(less16bytes)
107 #ifdef USE_AS_STRNCPY
108 lea -16(%r8,%rcx), %r11
110 jle LABEL(less16bytes) /* if r8 + rcx <= 16, branch to less16bytes. */
117 jz LABEL(ashr_0) /* ecx must be 0 if offset of rsi and rdi is 16 byte align*/
119 neg %r10 /* store the rest in rsi aligned 16 bytes for unaligned_exit*/
121 pxor %xmm0, %xmm0 /* clear %xmm0, may be polluted by unaligned operation*/
122 pcmpeqb 16(%rsi), %xmm0 /* compare 16 bytes in (%rsi) and %xmm0 for equality, try to find null char*/
125 jnz LABEL(less32bytes)
127 * at least 16 byte available to fill destination rdi
129 #ifdef USE_AS_STRNCPY
131 jbe LABEL(less32bytes_strncpy_truncation)
133 mov (%rsi, %r9), %rdx
135 mov 8(%rsi, %r9), %rdx
139 * so far destatination rdi may be aligned by 16, re-calculate rsi to jump
141 * rcx is offset of rsi
142 * rax is offset of rdi
145 and $0xfffffffffffffff0, %rdi /* force rdi 16 byte align */
146 mov %rax, %rdx /* rax store orignal rdi */
147 xor %rdi, %rdx /* equal to and $15, %rdx */
148 #ifdef USE_AS_STRNCPY
152 add $16, %rdi /* next 16 bytes for rdi */
155 lea 16(%r9, %rsi), %rsi /*re-calculate rsi by (16 - rdx)+ rcx */
156 mov %esi, %ecx /*store offset of rsi */
157 and $0xfffffffffffffff0, %rsi /* force rsi 16 byte align */
159 and $15, %ecx /* ecx must be 0 if rdx is equal to rcx*/
165 lea LABEL(unaligned_table)(%rip), %r11
166 movslq (%r11, %rcx,4), %rcx
167 lea (%r11, %rcx), %rcx
171 * The following cases will be handled by ashr_0 & ashr_0_start
172 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
174 * n(1~15) n(1~15) 0 ashr_0_start
179 #ifdef USE_AS_STRNCPY
181 jbe LABEL(strncpy_truncation_aligned)
183 movdqa (%rsi), %xmm1 /* fetch first 16 bytes from rsi */
184 movdqa %xmm1, (%rdi) /* store first 16 bytes into rdi */
187 pcmpeqb (%rsi), %xmm0 /* compare 16 bytes in (%rsi) and %xmm0 for equality, try to find null char */
188 pmovmskb %xmm0, %edx /* move each byte mask of %xmm0 to edx*/
190 test %edx, %edx /* edx must be 0 if there is no null char in rsi*/
191 jnz LABEL(aligned_16bytes)
194 #ifdef USE_AS_STRNCPY
196 jbe LABEL(strncpy_truncation_aligned)
198 movdqa (%rsi, %rcx), %xmm1
199 movdqa %xmm1, (%rdi, %rcx)
201 pcmpeqb (%rsi, %rcx), %xmm0
204 jnz LABEL(aligned_exit)
206 #ifdef USE_AS_STRNCPY
208 jbe LABEL(strncpy_truncation_aligned)
210 movdqa (%rsi, %rcx), %xmm1
211 movdqa %xmm1, (%rdi, %rcx)
213 pcmpeqb (%rsi, %rcx), %xmm0
216 jnz LABEL(aligned_exit)
218 #ifdef USE_AS_STRNCPY
220 jbe LABEL(strncpy_truncation_aligned)
222 movdqa (%rsi, %rcx), %xmm1
223 movdqa %xmm1, (%rdi, %rcx)
225 pcmpeqb (%rsi, %rcx), %xmm0
228 jnz LABEL(aligned_exit)
230 #ifdef USE_AS_STRNCPY
232 jbe LABEL(strncpy_truncation_aligned)
234 movdqa (%rsi, %rcx), %xmm1
235 movdqa %xmm1, (%rdi, %rcx)
237 pcmpeqb (%rsi, %rcx), %xmm0
240 jz LABEL(ashr_0_loop)
242 jmp LABEL(aligned_exit)
246 * The following cases will be handled by ashr_15
247 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
248 * n(15) n - 15 15((16 - (n -15) + n)%16 ashr_15
250 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
254 xor %ecx, %ecx /*clear ecx */
255 #ifdef USE_AS_STRNCPY
257 jbe LABEL(unaligned_exit)
261 LABEL(ashr_15_use_ssse3):
262 movdqa 16(%rsi, %rcx), %xmm3
266 jnz LABEL(unaligned_exit)
267 #ifdef USE_AS_STRNCPY
269 jbe LABEL(strncpy_truncation_unaligned)
272 palignr $15, (%rsi, %rcx), %xmm3
273 movdqa %xmm3, (%rdi, %rcx)
276 #ifdef USE_AS_STRNCPY
278 jbe LABEL(unaligned_exit)
281 movdqa 16(%rsi, %rcx), %xmm3
285 jnz LABEL(unaligned_exit)
286 #ifdef USE_AS_STRNCPY
288 jbe LABEL(strncpy_truncation_unaligned)
291 palignr $15, (%rsi, %rcx), %xmm3
292 movdqa %xmm3, (%rdi, %rcx)
295 #ifdef USE_AS_STRNCPY
297 jbe LABEL(unaligned_exit)
299 jmp LABEL(ashr_15_use_ssse3)
302 * The following cases will be handled by ashr_14
303 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
304 * n(14~15) n - 14 14((16 - (n -14) + n)%16 ashr_14
306 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
310 xor %ecx, %ecx /*clear ecx */
311 #ifdef USE_AS_STRNCPY
313 jbe LABEL(unaligned_exit)
317 LABEL(ashr_14_use_ssse3):
318 movdqa 16(%rsi, %rcx), %xmm3
322 jnz LABEL(unaligned_exit)
323 #ifdef USE_AS_STRNCPY
325 jbe LABEL(strncpy_truncation_unaligned)
328 palignr $14, (%rsi, %rcx), %xmm3
329 movdqa %xmm3, (%rdi, %rcx)
332 #ifdef USE_AS_STRNCPY
334 jbe LABEL(unaligned_exit)
337 movdqa 16(%rsi, %rcx), %xmm3
341 jnz LABEL(unaligned_exit)
342 #ifdef USE_AS_STRNCPY
344 jbe LABEL(strncpy_truncation_unaligned)
347 palignr $14, (%rsi, %rcx), %xmm3
348 movdqa %xmm3, (%rdi, %rcx)
351 #ifdef USE_AS_STRNCPY
353 jbe LABEL(unaligned_exit)
355 jmp LABEL(ashr_14_use_ssse3)
358 * The following cases will be handled by ashr_13
359 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
360 * n(13~15) n - 13 13((16 - (n -13) + n)%16 ashr_13
362 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
366 xor %ecx, %ecx /*clear ecx */
367 #ifdef USE_AS_STRNCPY
369 jbe LABEL(unaligned_exit)
373 LABEL(ashr_13_use_ssse3):
374 movdqa 16(%rsi, %rcx), %xmm3
378 jnz LABEL(unaligned_exit)
379 #ifdef USE_AS_STRNCPY
381 jbe LABEL(strncpy_truncation_unaligned)
384 palignr $13, (%rsi, %rcx), %xmm3
385 movdqa %xmm3, (%rdi, %rcx)
388 #ifdef USE_AS_STRNCPY
390 jbe LABEL(unaligned_exit)
393 movdqa 16(%rsi, %rcx), %xmm3
397 jnz LABEL(unaligned_exit)
398 #ifdef USE_AS_STRNCPY
400 jbe LABEL(strncpy_truncation_unaligned)
403 palignr $13, (%rsi, %rcx), %xmm3
404 movdqa %xmm3, (%rdi, %rcx)
407 #ifdef USE_AS_STRNCPY
409 jbe LABEL(unaligned_exit)
411 jmp LABEL(ashr_13_use_ssse3)
414 * The following cases will be handled by ashr_12
415 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
416 * n(12~15) n - 12 12((16 - (n -12) + n)%16 ashr_12
418 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
422 xor %ecx, %ecx /*clear ecx */
423 #ifdef USE_AS_STRNCPY
425 jbe LABEL(unaligned_exit)
429 LABEL(ashr_12_use_ssse3):
430 movdqa 16(%rsi, %rcx), %xmm3
434 jnz LABEL(unaligned_exit)
435 #ifdef USE_AS_STRNCPY
437 jbe LABEL(strncpy_truncation_unaligned)
440 palignr $12, (%rsi, %rcx), %xmm3
441 movdqa %xmm3, (%rdi, %rcx)
444 #ifdef USE_AS_STRNCPY
446 jbe LABEL(unaligned_exit)
449 movdqa 16(%rsi, %rcx), %xmm3
453 jnz LABEL(unaligned_exit)
454 #ifdef USE_AS_STRNCPY
456 jbe LABEL(strncpy_truncation_unaligned)
459 palignr $12, (%rsi, %rcx), %xmm3
460 movdqa %xmm3, (%rdi, %rcx)
463 #ifdef USE_AS_STRNCPY
465 jbe LABEL(unaligned_exit)
467 jmp LABEL(ashr_12_use_ssse3)
470 * The following cases will be handled by ashr_11
471 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
472 * n(11~15) n - 11 11((16 - (n -11) + n)%16 ashr_11
474 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
478 xor %ecx, %ecx /*clear ecx */
479 #ifdef USE_AS_STRNCPY
481 jbe LABEL(unaligned_exit)
485 LABEL(ashr_11_use_ssse3):
486 movdqa 16(%rsi, %rcx), %xmm3
490 jnz LABEL(unaligned_exit)
491 #ifdef USE_AS_STRNCPY
493 jbe LABEL(strncpy_truncation_unaligned)
496 palignr $11, (%rsi, %rcx), %xmm3
497 movdqa %xmm3, (%rdi, %rcx)
500 #ifdef USE_AS_STRNCPY
502 jbe LABEL(unaligned_exit)
505 movdqa 16(%rsi, %rcx), %xmm3
509 jnz LABEL(unaligned_exit)
510 #ifdef USE_AS_STRNCPY
512 jbe LABEL(strncpy_truncation_unaligned)
515 palignr $11, (%rsi, %rcx), %xmm3
516 movdqa %xmm3, (%rdi, %rcx)
519 #ifdef USE_AS_STRNCPY
521 jbe LABEL(unaligned_exit)
523 jmp LABEL(ashr_11_use_ssse3)
526 * The following cases will be handled by ashr_10
527 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
528 * n(10~15) n - 10 10((16 - (n -10) + n)%16 ashr_10
530 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
534 xor %ecx, %ecx /*clear ecx */
535 #ifdef USE_AS_STRNCPY
537 jbe LABEL(unaligned_exit)
541 LABEL(ashr_10_use_ssse3):
542 movdqa 16(%rsi, %rcx), %xmm3
546 jnz LABEL(unaligned_exit)
547 #ifdef USE_AS_STRNCPY
549 jbe LABEL(strncpy_truncation_unaligned)
552 palignr $10, (%rsi, %rcx), %xmm3
553 movdqa %xmm3, (%rdi, %rcx)
556 #ifdef USE_AS_STRNCPY
558 jbe LABEL(unaligned_exit)
561 movdqa 16(%rsi, %rcx), %xmm3
565 jnz LABEL(unaligned_exit)
566 #ifdef USE_AS_STRNCPY
568 jbe LABEL(strncpy_truncation_unaligned)
571 palignr $10, (%rsi, %rcx), %xmm3
572 movdqa %xmm3, (%rdi, %rcx)
575 #ifdef USE_AS_STRNCPY
577 jbe LABEL(unaligned_exit)
579 jmp LABEL(ashr_10_use_ssse3)
582 * The following cases will be handled by ashr_9
583 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
584 * n(9~15) n - 9 9((16 - (n -9) + n)%16 ashr_9
586 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
590 xor %ecx, %ecx /*clear ecx */
591 #ifdef USE_AS_STRNCPY
593 jbe LABEL(unaligned_exit)
597 LABEL(ashr_9_use_ssse3):
598 movdqa 16(%rsi, %rcx), %xmm3
602 jnz LABEL(unaligned_exit)
603 #ifdef USE_AS_STRNCPY
605 jbe LABEL(strncpy_truncation_unaligned)
608 palignr $9, (%rsi, %rcx), %xmm3
609 movdqa %xmm3, (%rdi, %rcx)
612 #ifdef USE_AS_STRNCPY
614 jbe LABEL(unaligned_exit)
617 movdqa 16(%rsi, %rcx), %xmm3
621 jnz LABEL(unaligned_exit)
622 #ifdef USE_AS_STRNCPY
624 jbe LABEL(strncpy_truncation_unaligned)
627 palignr $9, (%rsi, %rcx), %xmm3
628 movdqa %xmm3, (%rdi, %rcx)
631 #ifdef USE_AS_STRNCPY
633 jbe LABEL(unaligned_exit)
635 jmp LABEL(ashr_9_use_ssse3)
638 * The following cases will be handled by ashr_8
639 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
640 * n(8~15) n - 8 8((16 - (n -8) + n)%16 ashr_8
642 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
646 xor %ecx, %ecx /*clear ecx */
647 #ifdef USE_AS_STRNCPY
649 jbe LABEL(unaligned_exit)
653 LABEL(ashr_8_use_ssse3):
654 movdqa 16(%rsi, %rcx), %xmm3
658 jnz LABEL(unaligned_exit)
659 #ifdef USE_AS_STRNCPY
661 jbe LABEL(strncpy_truncation_unaligned)
664 palignr $8, (%rsi, %rcx), %xmm3
665 movdqa %xmm3, (%rdi, %rcx)
668 #ifdef USE_AS_STRNCPY
670 jbe LABEL(unaligned_exit)
673 movdqa 16(%rsi, %rcx), %xmm3
677 jnz LABEL(unaligned_exit)
678 #ifdef USE_AS_STRNCPY
680 jbe LABEL(strncpy_truncation_unaligned)
683 palignr $8, (%rsi, %rcx), %xmm3
684 movdqa %xmm3, (%rdi, %rcx)
687 #ifdef USE_AS_STRNCPY
689 jbe LABEL(unaligned_exit)
691 jmp LABEL(ashr_8_use_ssse3)
694 * The following cases will be handled by ashr_7
695 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
696 * n(7~15) n - 7 7((16 - (n -7) + n)%16 ashr_7
698 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
702 xor %ecx, %ecx /*clear ecx */
703 #ifdef USE_AS_STRNCPY
705 jbe LABEL(unaligned_exit)
709 LABEL(ashr_7_use_ssse3):
710 movdqa 16(%rsi, %rcx), %xmm3
714 jnz LABEL(unaligned_exit)
715 #ifdef USE_AS_STRNCPY
717 jbe LABEL(strncpy_truncation_unaligned)
720 palignr $7, (%rsi, %rcx), %xmm3
721 movdqa %xmm3, (%rdi, %rcx)
724 #ifdef USE_AS_STRNCPY
726 jbe LABEL(unaligned_exit)
729 movdqa 16(%rsi, %rcx), %xmm3
733 jnz LABEL(unaligned_exit)
734 #ifdef USE_AS_STRNCPY
736 jbe LABEL(strncpy_truncation_unaligned)
739 palignr $7, (%rsi, %rcx), %xmm3
740 movdqa %xmm3, (%rdi, %rcx)
743 #ifdef USE_AS_STRNCPY
745 jbe LABEL(unaligned_exit)
747 jmp LABEL(ashr_7_use_ssse3)
750 * The following cases will be handled by ashr_6
751 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
752 * n(6~15) n - 6 6((16 - (n -6) + n)%16 ashr_6
754 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
758 xor %ecx, %ecx /*clear ecx */
759 #ifdef USE_AS_STRNCPY
761 jbe LABEL(unaligned_exit)
765 LABEL(ashr_6_use_ssse3):
766 movdqa 16(%rsi, %rcx), %xmm3
770 jnz LABEL(unaligned_exit)
771 #ifdef USE_AS_STRNCPY
773 jbe LABEL(strncpy_truncation_unaligned)
776 palignr $6, (%rsi, %rcx), %xmm3
777 movdqa %xmm3, (%rdi, %rcx)
780 #ifdef USE_AS_STRNCPY
782 jbe LABEL(unaligned_exit)
785 movdqa 16(%rsi, %rcx), %xmm3
789 jnz LABEL(unaligned_exit)
790 #ifdef USE_AS_STRNCPY
792 jbe LABEL(strncpy_truncation_unaligned)
795 palignr $6, (%rsi, %rcx), %xmm3
796 movdqa %xmm3, (%rdi, %rcx)
799 #ifdef USE_AS_STRNCPY
801 jbe LABEL(unaligned_exit)
803 jmp LABEL(ashr_6_use_ssse3)
806 * The following cases will be handled by ashr_5
807 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
808 * n(5~15) n - 5 5((16 - (n -5) + n)%16 ashr_5
810 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
814 xor %ecx, %ecx /*clear ecx */
815 #ifdef USE_AS_STRNCPY
817 jbe LABEL(unaligned_exit)
821 LABEL(ashr_5_use_ssse3):
822 movdqa 16(%rsi, %rcx), %xmm3
826 jnz LABEL(unaligned_exit)
827 #ifdef USE_AS_STRNCPY
829 jbe LABEL(strncpy_truncation_unaligned)
832 palignr $5, (%rsi, %rcx), %xmm3
833 movdqa %xmm3, (%rdi, %rcx)
836 #ifdef USE_AS_STRNCPY
838 jbe LABEL(unaligned_exit)
841 movdqa 16(%rsi, %rcx), %xmm3
845 jnz LABEL(unaligned_exit)
846 #ifdef USE_AS_STRNCPY
848 jbe LABEL(strncpy_truncation_unaligned)
851 palignr $5, (%rsi, %rcx), %xmm3
852 movdqa %xmm3, (%rdi, %rcx)
855 #ifdef USE_AS_STRNCPY
857 jbe LABEL(unaligned_exit)
859 jmp LABEL(ashr_5_use_ssse3)
863 * The following cases will be handled by ashr_4
864 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
865 * n(4~15) n - 4 4((16 - (n -4) + n)%16 ashr_4
867 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
871 xor %ecx, %ecx /*clear ecx */
872 #ifdef USE_AS_STRNCPY
874 jbe LABEL(unaligned_exit)
878 LABEL(ashr_4_use_ssse3):
879 movdqa 16(%rsi, %rcx), %xmm3
883 jnz LABEL(unaligned_exit)
884 #ifdef USE_AS_STRNCPY
886 jbe LABEL(strncpy_truncation_unaligned)
889 palignr $4, (%rsi, %rcx), %xmm3
890 movdqa %xmm3, (%rdi, %rcx)
893 #ifdef USE_AS_STRNCPY
895 jbe LABEL(unaligned_exit)
898 movdqa 16(%rsi, %rcx), %xmm3
902 jnz LABEL(unaligned_exit)
903 #ifdef USE_AS_STRNCPY
905 jbe LABEL(strncpy_truncation_unaligned)
908 palignr $4, (%rsi, %rcx), %xmm3
909 movdqa %xmm3, (%rdi, %rcx)
912 #ifdef USE_AS_STRNCPY
914 jbe LABEL(unaligned_exit)
916 jmp LABEL(ashr_4_use_ssse3)
920 * The following cases will be handled by ashr_3
921 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
922 * n(3~15) n - 3 3((16 - (n -3) + n)%16 ashr_3
924 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
928 xor %ecx, %ecx /*clear ecx */
929 #ifdef USE_AS_STRNCPY
931 jbe LABEL(unaligned_exit)
935 LABEL(ashr_3_use_ssse3):
936 movdqa 16(%rsi, %rcx), %xmm3
940 jnz LABEL(unaligned_exit)
941 #ifdef USE_AS_STRNCPY
943 jbe LABEL(strncpy_truncation_unaligned)
946 palignr $3, (%rsi, %rcx), %xmm3
947 movdqa %xmm3, (%rdi, %rcx)
950 #ifdef USE_AS_STRNCPY
952 jbe LABEL(unaligned_exit)
955 movdqa 16(%rsi, %rcx), %xmm3
959 jnz LABEL(unaligned_exit)
960 #ifdef USE_AS_STRNCPY
962 jbe LABEL(strncpy_truncation_unaligned)
965 palignr $3, (%rsi, %rcx), %xmm3
966 movdqa %xmm3, (%rdi, %rcx)
969 #ifdef USE_AS_STRNCPY
971 jbe LABEL(unaligned_exit)
973 jmp LABEL(ashr_3_use_ssse3)
977 * The following cases will be handled by ashr_2
978 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
979 * n(2~15) n - 2 2((16 - (n -2) + n)%16 ashr_2
981 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
985 xor %ecx, %ecx /*clear ecx */
986 #ifdef USE_AS_STRNCPY
988 jbe LABEL(unaligned_exit)
992 LABEL(ashr_2_use_ssse3):
993 movdqa 16(%rsi, %rcx), %xmm3
997 jnz LABEL(unaligned_exit)
998 #ifdef USE_AS_STRNCPY
1000 jbe LABEL(strncpy_truncation_unaligned)
1003 palignr $2, (%rsi, %rcx), %xmm3
1004 movdqa %xmm3, (%rdi, %rcx)
1007 #ifdef USE_AS_STRNCPY
1009 jbe LABEL(unaligned_exit)
1012 movdqa 16(%rsi, %rcx), %xmm3
1013 pcmpeqb %xmm3, %xmm0
1014 pmovmskb %xmm0, %edx
1016 jnz LABEL(unaligned_exit)
1017 #ifdef USE_AS_STRNCPY
1019 jbe LABEL(strncpy_truncation_unaligned)
1022 palignr $2, (%rsi, %rcx), %xmm3
1023 movdqa %xmm3, (%rdi, %rcx)
1026 #ifdef USE_AS_STRNCPY
1028 jbe LABEL(unaligned_exit)
1030 jmp LABEL(ashr_2_use_ssse3)
1034 * The following cases will be handled by ashr_1
1035 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
1036 * n(1~15) n - 1 1 ((16 - (n -1) + n)%16 ashr_1
1038 * Based on above operation , start from (%r9 + rsi) to the left of this cache bank, there is no null byte
1042 xor %ecx, %ecx /*clear ecx */
1043 #ifdef USE_AS_STRNCPY
1045 jbe LABEL(unaligned_exit)
1049 LABEL(ashr_1_use_ssse3):
1050 movdqa 16(%rsi, %rcx), %xmm3
1051 pcmpeqb %xmm3, %xmm0
1052 pmovmskb %xmm0, %edx
1054 jnz LABEL(unaligned_exit)
1055 #ifdef USE_AS_STRNCPY
1057 jbe LABEL(strncpy_truncation_unaligned)
1060 palignr $1, (%rsi, %rcx), %xmm3
1061 movdqa %xmm3, (%rdi, %rcx)
1063 #ifdef USE_AS_STRNCPY
1065 jbe LABEL(unaligned_exit)
1068 movdqa 16(%rsi, %rcx), %xmm3
1069 pcmpeqb %xmm3, %xmm0
1070 pmovmskb %xmm0, %edx
1072 jnz LABEL(unaligned_exit)
1073 #ifdef USE_AS_STRNCPY
1075 jbe LABEL(strncpy_truncation_unaligned)
1077 palignr $1, (%rsi, %rcx), %xmm3
1078 movdqa %xmm3, (%rdi, %rcx)
1081 #ifdef USE_AS_STRNCPY
1083 jbe LABEL(unaligned_exit)
1085 jmp LABEL(ashr_1_use_ssse3)
1090 LABEL(unaligned_exit):
1091 add %r9, %rsi /* r9 stores original offset of rsi*/
1094 shl %cl, %edx /* after shl, calculate the exact number to be filled*/
1097 LABEL(aligned_exit):
1098 add %rcx, %rdi /*locate exact address for rdi */
1100 add %rcx, %rsi /*locate exact address for rsi */
1101 LABEL(aligned_16bytes):
1102 #ifdef USE_AS_STRNCPY
1107 ja LABEL(strncpy_tail)
1109 LABEL(strncpy_tail):
1111 bsf %rdx, %rcx /*If a least significant 1 bit in %rdx is found, its bit index is stored in %rcx*/
1112 lea LABEL(tail_table)(%rip), %r11
1113 movslq (%r11, %rcx,4), %rcx
1114 lea (%r11, %rcx), %rcx
1117 #ifdef USE_AS_STRNCPY
1119 LABEL(less32bytes_strncpy_truncation):
1121 LABEL(strncpy_truncation_unaligned):
1123 LABEL(strncpy_truncation_aligned):
1128 lea LABEL(tail_table)(%rip), %r11
1129 movslq (%r11, %rcx,4), %rcx
1130 lea (%r11, %rcx), %rcx
1133 LABEL(strncpy_exitz):
1138 #ifdef USE_AS_STRNCPY
1140 LABEL(strncpy_fill_tail):
1147 jz LABEL(strncpy_fill_less_8)
1150 LABEL(strncpy_fill_less_8):
1153 jz LABEL(strncpy_fill_return)
1154 LABEL(strncpy_fill_less_7):
1156 mov %al, (%rdi, %rcx)
1157 jnz LABEL(strncpy_fill_less_7)
1158 LABEL(strncpy_fill_return):
1159 #ifdef USE_AS_STPCPY
1170 #ifdef USE_AS_STPCPY
1173 #ifdef USE_AS_STRNCPY
1176 jnz LABEL(strncpy_fill_tail)
1177 #ifdef USE_AS_STPCPY
1187 #ifdef USE_AS_STPCPY
1190 #ifdef USE_AS_STRNCPY
1193 jnz LABEL(strncpy_fill_tail)
1194 #ifdef USE_AS_STPCPY
1206 #ifdef USE_AS_STPCPY
1209 #ifdef USE_AS_STRNCPY
1212 jnz LABEL(strncpy_fill_tail)
1213 #ifdef USE_AS_STPCPY
1223 #ifdef USE_AS_STPCPY
1226 #ifdef USE_AS_STRNCPY
1229 jnz LABEL(strncpy_fill_tail)
1230 #ifdef USE_AS_STPCPY
1242 #ifdef USE_AS_STPCPY
1245 #ifdef USE_AS_STRNCPY
1248 jnz LABEL(strncpy_fill_tail)
1249 #ifdef USE_AS_STPCPY
1261 #ifdef USE_AS_STPCPY
1264 #ifdef USE_AS_STRNCPY
1267 jnz LABEL(strncpy_fill_tail)
1268 #ifdef USE_AS_STPCPY
1280 #ifdef USE_AS_STPCPY
1283 #ifdef USE_AS_STRNCPY
1286 jnz LABEL(strncpy_fill_tail)
1287 #ifdef USE_AS_STPCPY
1298 #ifdef USE_AS_STPCPY
1301 #ifdef USE_AS_STRNCPY
1304 jnz LABEL(strncpy_fill_tail)
1305 #ifdef USE_AS_STPCPY
1319 #ifdef USE_AS_STPCPY
1322 #ifdef USE_AS_STRNCPY
1325 jnz LABEL(strncpy_fill_tail)
1326 #ifdef USE_AS_STPCPY
1339 #ifdef USE_AS_STPCPY
1342 #ifdef USE_AS_STRNCPY
1345 jnz LABEL(strncpy_fill_tail)
1346 #ifdef USE_AS_STPCPY
1359 #ifdef USE_AS_STPCPY
1362 #ifdef USE_AS_STRNCPY
1365 jnz LABEL(strncpy_fill_tail)
1366 #ifdef USE_AS_STPCPY
1378 #ifdef USE_AS_STPCPY
1381 #ifdef USE_AS_STRNCPY
1384 jnz LABEL(strncpy_fill_tail)
1385 #ifdef USE_AS_STPCPY
1397 #ifdef USE_AS_STPCPY
1400 #ifdef USE_AS_STRNCPY
1403 jnz LABEL(strncpy_fill_tail)
1404 #ifdef USE_AS_STPCPY
1417 #ifdef USE_AS_STPCPY
1420 #ifdef USE_AS_STRNCPY
1423 jnz LABEL(strncpy_fill_tail)
1424 #ifdef USE_AS_STPCPY
1437 #ifdef USE_AS_STPCPY
1440 #ifdef USE_AS_STRNCPY
1443 jnz LABEL(strncpy_fill_tail)
1444 #ifdef USE_AS_STPCPY
1456 #ifdef USE_AS_STPCPY
1459 #ifdef USE_AS_STRNCPY
1462 jnz LABEL(strncpy_fill_tail)
1463 #ifdef USE_AS_STPCPY
1479 #ifdef USE_AS_STPCPY
1482 #ifdef USE_AS_STRNCPY
1485 jnz LABEL(strncpy_fill_tail)
1486 #ifdef USE_AS_STPCPY
1500 #ifdef USE_AS_STPCPY
1503 #ifdef USE_AS_STRNCPY
1506 jnz LABEL(strncpy_fill_tail)
1507 #ifdef USE_AS_STPCPY
1522 #ifdef USE_AS_STPCPY
1525 #ifdef USE_AS_STRNCPY
1528 jnz LABEL(strncpy_fill_tail)
1529 #ifdef USE_AS_STPCPY
1544 #ifdef USE_AS_STPCPY
1547 #ifdef USE_AS_STRNCPY
1550 jnz LABEL(strncpy_fill_tail)
1551 #ifdef USE_AS_STPCPY
1565 #ifdef USE_AS_STPCPY
1568 #ifdef USE_AS_STRNCPY
1571 jnz LABEL(strncpy_fill_tail)
1572 #ifdef USE_AS_STPCPY
1586 #ifdef USE_AS_STPCPY
1589 #ifdef USE_AS_STRNCPY
1592 jnz LABEL(strncpy_fill_tail)
1593 #ifdef USE_AS_STPCPY
1608 #ifdef USE_AS_STPCPY
1611 #ifdef USE_AS_STRNCPY
1614 jnz LABEL(strncpy_fill_tail)
1615 #ifdef USE_AS_STPCPY
1630 #ifdef USE_AS_STPCPY
1633 #ifdef USE_AS_STRNCPY
1636 jnz LABEL(strncpy_fill_tail)
1637 #ifdef USE_AS_STPCPY
1655 #ifdef USE_AS_STPCPY
1658 #ifdef USE_AS_STRNCPY
1661 jnz LABEL(strncpy_fill_tail)
1662 #ifdef USE_AS_STPCPY
1679 #ifdef USE_AS_STPCPY
1682 #ifdef USE_AS_STRNCPY
1685 jnz LABEL(strncpy_fill_tail)
1686 #ifdef USE_AS_STPCPY
1703 #ifdef USE_AS_STPCPY
1706 #ifdef USE_AS_STRNCPY
1709 jnz LABEL(strncpy_fill_tail)
1710 #ifdef USE_AS_STPCPY
1727 #ifdef USE_AS_STPCPY
1730 #ifdef USE_AS_STRNCPY
1733 jnz LABEL(strncpy_fill_tail)
1734 #ifdef USE_AS_STPCPY
1750 #ifdef USE_AS_STPCPY
1753 #ifdef USE_AS_STRNCPY
1756 jnz LABEL(strncpy_fill_tail)
1757 #ifdef USE_AS_STPCPY
1775 #ifdef USE_AS_STPCPY
1778 #ifdef USE_AS_STRNCPY
1781 jnz LABEL(strncpy_fill_tail)
1782 #ifdef USE_AS_STPCPY
1801 #ifdef USE_AS_STPCPY
1804 #ifdef USE_AS_STRNCPY
1807 jnz LABEL(strncpy_fill_tail)
1808 #ifdef USE_AS_STPCPY
1825 #ifdef USE_AS_STPCPY
1828 #ifdef USE_AS_STRNCPY
1831 jnz LABEL(strncpy_fill_tail)
1832 #ifdef USE_AS_STPCPY
1839 .size STRCPY_SSSE3, .-STRCPY_SSSE3
1842 .section .rodata.ssse3,"a",@progbits
1844 .int LABEL(tail_0) - LABEL(tail_table)
1845 .int LABEL(tail_1) - LABEL(tail_table)
1846 .int LABEL(tail_2) - LABEL(tail_table)
1847 .int LABEL(tail_3) - LABEL(tail_table)
1848 .int LABEL(tail_4) - LABEL(tail_table)
1849 .int LABEL(tail_5) - LABEL(tail_table)
1850 .int LABEL(tail_6) - LABEL(tail_table)
1851 .int LABEL(tail_7) - LABEL(tail_table)
1852 .int LABEL(tail_8) - LABEL(tail_table)
1853 .int LABEL(tail_9) - LABEL(tail_table)
1854 .int LABEL(tail_10) - LABEL(tail_table)
1855 .int LABEL(tail_11) - LABEL(tail_table)
1856 .int LABEL(tail_12) - LABEL(tail_table)
1857 .int LABEL(tail_13) - LABEL(tail_table)
1858 .int LABEL(tail_14) - LABEL(tail_table)
1859 .int LABEL(tail_15) - LABEL(tail_table)
1860 .int LABEL(tail_16) - LABEL(tail_table)
1861 .int LABEL(tail_17) - LABEL(tail_table)
1862 .int LABEL(tail_18) - LABEL(tail_table)
1863 .int LABEL(tail_19) - LABEL(tail_table)
1864 .int LABEL(tail_20) - LABEL(tail_table)
1865 .int LABEL(tail_21) - LABEL(tail_table)
1866 .int LABEL(tail_22) - LABEL(tail_table)
1867 .int LABEL(tail_23) - LABEL(tail_table)
1868 .int LABEL(tail_24) - LABEL(tail_table)
1869 .int LABEL(tail_25) - LABEL(tail_table)
1870 .int LABEL(tail_26) - LABEL(tail_table)
1871 .int LABEL(tail_27) - LABEL(tail_table)
1872 .int LABEL(tail_28) - LABEL(tail_table)
1873 .int LABEL(tail_29) - LABEL(tail_table)
1874 .int LABEL(tail_30) - LABEL(tail_table)
1875 .int LABEL(tail_31) - LABEL(tail_table)
1878 LABEL(unaligned_table):
1879 .int LABEL(ashr_0) - LABEL(unaligned_table)
1880 .int LABEL(ashr_1) - LABEL(unaligned_table)
1881 .int LABEL(ashr_2) - LABEL(unaligned_table)
1882 .int LABEL(ashr_3) - LABEL(unaligned_table)
1883 .int LABEL(ashr_4) - LABEL(unaligned_table)
1884 .int LABEL(ashr_5) - LABEL(unaligned_table)
1885 .int LABEL(ashr_6) - LABEL(unaligned_table)
1886 .int LABEL(ashr_7) - LABEL(unaligned_table)
1887 .int LABEL(ashr_8) - LABEL(unaligned_table)
1888 .int LABEL(ashr_9) - LABEL(unaligned_table)
1889 .int LABEL(ashr_10) - LABEL(unaligned_table)
1890 .int LABEL(ashr_11) - LABEL(unaligned_table)
1891 .int LABEL(ashr_12) - LABEL(unaligned_table)
1892 .int LABEL(ashr_13) - LABEL(unaligned_table)
1893 .int LABEL(ashr_14) - LABEL(unaligned_table)
1894 .int LABEL(ashr_15) - LABEL(unaligned_table)
1897 # define ENTRY(name) \
1898 .type STRCPY_SSE2, @function; \
1899 STRCPY_SSE2: cfi_startproc; \
1902 # define END(name) \
1903 cfi_endproc; .size STRCPY_SSE2, .-STRCPY_SSE2
1904 # undef libc_hidden_builtin_def
1905 /* It doesn't make sense to send libc-internal strcpy calls through a PLT.
1906 The speedup we get from using SSSE3 instruction is likely eaten away
1907 by the indirect call in the PLT. */
1908 # define libc_hidden_builtin_def(name) \
1909 .globl __GI_STRCPY; __GI_STRCPY = STRCPY_SSE2
1910 # undef libc_hidden_def
1911 # define libc_hidden_def(name) \
1912 .globl __GI___STRCPY; __GI___STRCPY = STRCPY_SSE2
1915 #ifndef USE_AS_STRNCPY
1916 #include "../strcpy.S"