1 /* Copyright (C) 2012-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <http://www.gnu.org/licenses/>. */
23 * ARMv8-a, AArch64, unaligned accesses
40 ENTRY_ALIGN (__memset, 6)
46 add dstend, dstin, count
54 /* Set 0..15 bytes. */
62 str valw, [dstend, -4]
67 strh valw, [dstend, -2]
70 /* Set 17..96 bytes. */
73 tbnz count, 6, L(set96)
81 /* Set 64..96 bytes. Write 64 bytes from the start and
82 32 bytes from the end. */
85 stp q0, q0, [dstin, 32]
86 stp q0, q0, [dstend, -32]
99 sub count, dstend, dst /* Count is 16 too large. */
101 sub count, count, 64 + 16 /* Adjust count and bias for loop. */
102 1: stp q0, q0, [dst], 64
103 stp q0, q0, [dst, -32]
105 subs count, count, 64
107 2: stp q0, q0, [dstend, -64]
108 stp q0, q0, [dstend, -32]
114 tbnz tmp1w, 4, L(no_zva)
116 cmp tmp1w, 4 /* ZVA size is 64 bytes. */
119 /* Write the first and last 64 byte aligned block using stp rather
120 than using DC ZVA. This is faster on some cores.
124 stp q0, q0, [dst, 32]
126 stp q0, q0, [dst, 64]
127 stp q0, q0, [dst, 96]
128 sub count, dstend, dst /* Count is now 128 too large. */
129 sub count, count, 128+64+64 /* Adjust count and bias for loop. */
134 subs count, count, 64
137 stp q0, q0, [dst, 32]
138 stp q0, q0, [dstend, -64]
139 stp q0, q0, [dstend, -32]
144 cmp tmp1w, 5 /* ZVA size is 128 bytes. */
148 stp q0, q0, [dst, 32]
149 stp q0, q0, [dst, 64]
150 stp q0, q0, [dst, 96]
152 sub count, dstend, dst /* Count is now 128 too large. */
153 sub count, count, 128+128 /* Adjust count and bias for loop. */
157 subs count, count, 128
159 stp q0, q0, [dstend, -128]
160 stp q0, q0, [dstend, -96]
161 stp q0, q0, [dstend, -64]
162 stp q0, q0, [dstend, -32]
167 lsl zva_lenw, tmp2w, tmp1w
168 add tmp1, zva_len, 64 /* Max alignment bytes written. */
173 add tmp1, dst, zva_len
175 subs count, tmp1, dst /* Actual alignment bytes to write. */
176 bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
178 1: stp q0, q0, [dst], 64
179 stp q0, q0, [dst, -32]
180 subs count, count, 64
183 sub count, dstend, tmp1 /* Remaining bytes to write. */
184 subs count, count, zva_len
187 add dst, dst, zva_len
188 subs count, count, zva_len
190 4: add count, count, zva_len
194 weak_alias (__memset, memset)
195 libc_hidden_builtin_def (memset)