2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
22 # define CFI_PUSH(REG) \
23 cfi_adjust_cfa_offset (4); \
24 cfi_rel_offset (REG, 0)
26 # define CFI_POP(REG) \
27 cfi_adjust_cfa_offset (-4); \
30 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
31 # define POP(REG) popl REG; CFI_POP (REG)
34 # define RETURN POP (%edi); ret; CFI_PUSH (%edi)
40 ENTRY (__wcscpy_ssse3)
69 jnz L(CopyFrom1To16Bytes)
90 movaps 16(%ecx), %xmm2
97 jnz L(CopyFrom1To16Bytes)
99 movaps 16(%ecx, %esi), %xmm3
100 movaps %xmm2, (%edx, %esi)
106 jnz L(CopyFrom1To16Bytes)
108 movaps 16(%ecx, %esi), %xmm4
109 movaps %xmm3, (%edx, %esi)
115 jnz L(CopyFrom1To16Bytes)
117 movaps 16(%ecx, %esi), %xmm1
118 movaps %xmm4, (%edx, %esi)
124 jnz L(CopyFrom1To16Bytes)
126 movaps 16(%ecx, %esi), %xmm2
127 movaps %xmm1, (%edx, %esi)
133 jnz L(CopyFrom1To16Bytes)
135 movaps 16(%ecx, %esi), %xmm3
136 movaps %xmm2, (%edx, %esi)
142 jnz L(CopyFrom1To16Bytes)
144 movaps %xmm3, (%edx, %esi)
146 lea 16(%ecx, %esi), %ecx
155 movaps 32(%ecx), %xmm3
157 movaps 16(%ecx), %xmm5
159 movaps 48(%ecx), %xmm7
169 jnz L(Aligned64Leave)
170 movaps %xmm4, -64(%edx)
171 movaps %xmm5, -48(%edx)
172 movaps %xmm6, -32(%edx)
173 movaps %xmm7, -16(%edx)
180 jnz L(CopyFrom1To16Bytes)
184 movaps %xmm4, -64(%edx)
187 jnz L(CopyFrom1To16Bytes)
191 movaps %xmm5, -48(%edx)
194 jnz L(CopyFrom1To16Bytes)
196 movaps %xmm6, -32(%edx)
201 jnz L(CopyFrom1To16Bytes)
204 movaps %xmm7, -16(%edx)
209 movaps -4(%ecx), %xmm1
210 movaps 12(%ecx), %xmm2
219 palignr $4, %xmm1, %xmm2
221 movaps 28(%ecx), %xmm2
232 palignr $4, %xmm3, %xmm2
234 movaps 28(%ecx), %xmm2
245 palignr $4, %xmm1, %xmm2
247 movaps 28(%ecx), %xmm2
257 palignr $4, %xmm3, %xmm2
268 movaps -4(%ecx), %xmm1
271 movaps 12(%ecx), %xmm2
272 movaps 28(%ecx), %xmm3
274 movaps 44(%ecx), %xmm4
276 movaps 60(%ecx), %xmm5
283 palignr $4, %xmm4, %xmm5
285 palignr $4, %xmm3, %xmm4
288 palignr $4, %xmm2, %xmm3
290 palignr $4, %xmm1, %xmm2
292 movaps %xmm5, 48(%edx)
293 movaps %xmm4, 32(%edx)
294 movaps %xmm3, 16(%edx)
320 movaps -8(%ecx), %xmm1
321 movaps 8(%ecx), %xmm2
330 palignr $8, %xmm1, %xmm2
332 movaps 24(%ecx), %xmm2
343 palignr $8, %xmm3, %xmm2
345 movaps 24(%ecx), %xmm2
356 palignr $8, %xmm1, %xmm2
358 movaps 24(%ecx), %xmm2
368 palignr $8, %xmm3, %xmm2
379 movaps -8(%ecx), %xmm1
382 movaps 8(%ecx), %xmm2
383 movaps 24(%ecx), %xmm3
385 movaps 40(%ecx), %xmm4
387 movaps 56(%ecx), %xmm5
394 palignr $8, %xmm4, %xmm5
396 palignr $8, %xmm3, %xmm4
399 palignr $8, %xmm2, %xmm3
401 palignr $8, %xmm1, %xmm2
403 movaps %xmm5, 48(%edx)
404 movaps %xmm4, 32(%edx)
405 movaps %xmm3, 16(%edx)
429 movaps -12(%ecx), %xmm1
430 movaps 4(%ecx), %xmm2
439 palignr $12, %xmm1, %xmm2
441 movaps 20(%ecx), %xmm2
452 palignr $12, %xmm3, %xmm2
454 movaps 20(%ecx), %xmm2
465 palignr $12, %xmm1, %xmm2
467 movaps 20(%ecx), %xmm2
477 palignr $12, %xmm3, %xmm2
488 movaps -12(%ecx), %xmm1
491 movaps 4(%ecx), %xmm2
492 movaps 20(%ecx), %xmm3
494 movaps 36(%ecx), %xmm4
496 movaps 52(%ecx), %xmm5
503 palignr $12, %xmm4, %xmm5
505 palignr $12, %xmm3, %xmm4
508 palignr $12, %xmm2, %xmm3
510 palignr $12, %xmm1, %xmm2
512 movaps %xmm5, 48(%edx)
513 movaps %xmm4, 32(%edx)
514 movaps %xmm3, 16(%edx)
517 jmp L(Shl12LoopStart)
525 L(CopyFrom1To16Bytes):