2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <isa-level.h>
21 /* MINIMUM_X86_ISA_LEVEL <= 4 because there are not V3/V4
22 implementations so we need this to build for ISA V3/V4
24 #if ISA_SHOULD_BUILD (4)
27 # define WCSCPY __wcscpy_ssse3
32 .section .text.ssse3,"ax",@progbits
62 jnz L(CopyFrom1To16Bytes)
73 /* case: rcx_offset == rdx_offset */
85 movaps 16(%rcx), %xmm2
92 jnz L(CopyFrom1To16Bytes)
94 movaps 16(%rcx, %rsi), %xmm3
95 movaps %xmm2, (%rdx, %rsi)
101 jnz L(CopyFrom1To16Bytes)
103 movaps 16(%rcx, %rsi), %xmm4
104 movaps %xmm3, (%rdx, %rsi)
110 jnz L(CopyFrom1To16Bytes)
112 movaps 16(%rcx, %rsi), %xmm1
113 movaps %xmm4, (%rdx, %rsi)
119 jnz L(CopyFrom1To16Bytes)
121 movaps 16(%rcx, %rsi), %xmm2
122 movaps %xmm1, (%rdx, %rsi)
128 jnz L(CopyFrom1To16Bytes)
130 movaps 16(%rcx, %rsi), %xmm3
131 movaps %xmm2, (%rdx, %rsi)
137 jnz L(CopyFrom1To16Bytes)
139 movaps %xmm3, (%rdx, %rsi)
141 lea 16(%rcx, %rsi), %rcx
152 movaps 16(%rcx), %xmm5
153 movaps 32(%rcx), %xmm3
155 movaps 48(%rcx), %xmm7
164 jnz L(Aligned64Leave)
165 movaps %xmm4, -64(%rdx)
166 movaps %xmm5, -48(%rdx)
167 movaps %xmm6, -32(%rdx)
168 movaps %xmm7, -16(%rdx)
175 jnz L(CopyFrom1To16Bytes)
180 movaps %xmm4, -64(%rdx)
183 jnz L(CopyFrom1To16Bytes)
188 movaps %xmm5, -48(%rdx)
191 jnz L(CopyFrom1To16Bytes)
193 movaps %xmm6, -32(%rdx)
199 jnz L(CopyFrom1To16Bytes)
202 movaps %xmm7, -16(%rdx)
207 movaps -4(%rcx), %xmm1
208 movaps 12(%rcx), %xmm2
217 palignr $4, %xmm1, %xmm2
219 movaps 28(%rcx), %xmm2
230 palignr $4, %xmm3, %xmm2
232 movaps 28(%rcx), %xmm2
243 palignr $4, %xmm1, %xmm2
245 movaps 28(%rcx), %xmm2
255 palignr $4, %xmm3, %xmm2
266 movaps -4(%rcx), %xmm1
270 movaps 12(%rcx), %xmm2
271 movaps 28(%rcx), %xmm3
273 movaps 44(%rcx), %xmm4
275 movaps 60(%rcx), %xmm5
282 palignr $4, %xmm4, %xmm5
283 palignr $4, %xmm3, %xmm4
287 palignr $4, %xmm2, %xmm3
289 palignr $4, %xmm1, %xmm2
291 movaps %xmm5, 48(%rdx)
292 movaps %xmm4, 32(%rdx)
293 movaps %xmm3, 16(%rdx)
299 movdqu -4(%rcx), %xmm1
301 movdqu %xmm1, -4(%rdx)
302 jmp L(CopyFrom1To16Bytes)
306 movaps -8(%rcx), %xmm1
307 movaps 8(%rcx), %xmm2
316 palignr $8, %xmm1, %xmm2
318 movaps 24(%rcx), %xmm2
329 palignr $8, %xmm3, %xmm2
331 movaps 24(%rcx), %xmm2
342 palignr $8, %xmm1, %xmm2
344 movaps 24(%rcx), %xmm2
354 palignr $8, %xmm3, %xmm2
365 movaps -8(%rcx), %xmm1
369 movaps 8(%rcx), %xmm2
370 movaps 24(%rcx), %xmm3
372 movaps 40(%rcx), %xmm4
374 movaps 56(%rcx), %xmm5
381 palignr $8, %xmm4, %xmm5
382 palignr $8, %xmm3, %xmm4
386 palignr $8, %xmm2, %xmm3
388 palignr $8, %xmm1, %xmm2
390 movaps %xmm5, 48(%rdx)
391 movaps %xmm4, 32(%rdx)
392 movaps %xmm3, 16(%rdx)
401 jmp L(CopyFrom1To16Bytes)
405 movaps -12(%rcx), %xmm1
406 movaps 4(%rcx), %xmm2
415 palignr $12, %xmm1, %xmm2
417 movaps 20(%rcx), %xmm2
428 palignr $12, %xmm3, %xmm2
430 movaps 20(%rcx), %xmm2
441 palignr $12, %xmm1, %xmm2
443 movaps 20(%rcx), %xmm2
453 palignr $12, %xmm3, %xmm2
464 movaps -12(%rcx), %xmm1
468 movaps 4(%rcx), %xmm2
469 movaps 20(%rcx), %xmm3
471 movaps 36(%rcx), %xmm4
473 movaps 52(%rcx), %xmm5
480 palignr $12, %xmm4, %xmm5
481 palignr $12, %xmm3, %xmm4
484 palignr $12, %xmm2, %xmm3
486 palignr $12, %xmm1, %xmm2
488 movaps %xmm5, 48(%rdx)
489 movaps %xmm4, 32(%rdx)
490 movaps %xmm3, 16(%rdx)
493 jmp L(Shl12LoopStart)
499 jmp L(CopyFrom1To16Bytes)
502 L(CopyFrom1To16Bytes):