1 /* Copyright (C) 2012-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <http://www.gnu.org/licenses/>. */
28 /* By default we assume that the DC instruction can be used to zero
29 data blocks more efficiently. In some circumstances this might be
30 unsafe, for example in an asymmetric multiprocessor environment with
31 different DC clear lengths (neither the upper nor lower lengths are
32 safe to use). The feature can be disabled by defining DONT_USE_DC.
34 If code may be run in a virtualized environment, then define
35 MAYBE_VIRT. This will cause the code to cache the system register
36 values rather than re-reading them each call. */
54 ENTRY_ALIGN (__memset, 6)
56 mov dst, dstin /* Preserve return value. */
61 orr A_lw, A_lw, A_lw, lsl #8
62 orr A_lw, A_lw, A_lw, lsl #16
63 orr A_l, A_l, A_l, lsl #32
71 ands tmp1, count, #0x30
77 stp A_l, A_l, [dst, #-48]
79 stp A_l, A_l, [dst, #-32]
81 stp A_l, A_l, [dst, #-16]
86 stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */
90 /* Set up to 15 bytes. Does not assume earlier memory
106 /* Critical loop. Start at a new cache line boundary. Assuming
107 * 64 bytes per line, this ensures the entire loop is in one line. */
113 /* Bring DST to 128-bit (16-byte) alignment. We know that there's
114 * more than that to set, so we simply store 16 bytes and advance by
115 * the amount required to reach alignment. */
116 sub count, count, tmp2
119 /* There may be less than 63 bytes to go now. */
123 sub dst, dst, #16 /* Pre-bias. */
124 sub count, count, #64
126 stp A_l, A_l, [dst, #16]
127 stp A_l, A_l, [dst, #32]
128 stp A_l, A_l, [dst, #48]
129 stp A_l, A_l, [dst, #64]!
130 subs count, count, #64
138 /* For zeroing memory, check to see if we can use the ZVA feature to
139 * zero entire 'cache' lines. */
143 b.le L(tail_maybe_tiny)
147 sub count, count, tmp2
153 /* For zeroing small amounts of memory, it's not worth setting up
154 * the line-clear code. */
158 /* For efficiency when virtualized, we cache the ZVA capability. */
159 adrp tmp2, L(cache_clear)
160 ldr zva_len, [tmp2, #:lo12:L(cache_clear)]
161 tbnz zva_len, #31, L(not_short)
162 cbnz zva_len, L(zero_by_line)
165 /* ZVA not available. Remember this for next time. */
167 str zva_len, [tmp2, #:lo12:L(cache_clear)]
171 and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
172 lsl zva_len, tmp3w, zva_len
173 str zva_len, [tmp2, #:lo12:L(cache_clear)]
176 tbnz tmp1, #4, L(not_short)
178 and zva_len, tmp1w, #15 /* Safety: other bits reserved. */
179 lsl zva_len, tmp3w, zva_len
183 /* Compute how far we need to go to become suitably aligned. We're
184 * already at quad-word alignment. */
186 b.lt L(not_short) /* Not enough to reach alignment. */
187 sub zva_bits_x, zva_len_x, #1
189 ands tmp2, tmp2, zva_bits_x
190 b.eq 1f /* Already aligned. */
191 /* Not aligned, check that there's enough to copy after alignment. */
192 sub tmp1, count, tmp2
194 ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
196 /* We know that there's at least 64 bytes to zero and that it's safe
197 * to overrun by 64 bytes. */
201 stp A_l, A_l, [dst, #16]
202 stp A_l, A_l, [dst, #32]
204 stp A_l, A_l, [dst, #48]
207 /* We've overrun a bit, so adjust dst downwards. */
210 sub count, count, zva_len_x
213 add dst, dst, zva_len_x
214 subs count, count, zva_len_x
216 ands count, count, zva_bits_x
217 b.ne L(tail_maybe_long)
225 #endif /* DONT_USE_DC */
228 weak_alias (__memset, memset)
229 libc_hidden_builtin_def (memset)