1 /* Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library. If not, see
17 <http://www.gnu.org/licenses/>. */
46 #if HAVE_IFUNC && !defined (IS_IN_rtld)
47 /* Rather than decode dczid_el0 every time, checking for zva disabled and
48 unpacking the line size, do this once in the indirect function and choose
49 an appropriate entry point which encodes these values as constants. */
51 .type memset, %gnu_indirect_function
55 tst x1, #16 /* test for zva disabled */
58 csel x1, xzr, x1, ne /* squash index to 0 if so */
59 ldrsw x2, [x0, x1, lsl #2]
62 .size memset, .-memset
65 1: .long memset_nozva - 1b // 0
66 .long memset_nozva - 1b // 1
67 .long memset_nozva - 1b // 2
68 .long memset_nozva - 1b // 3
69 .long memset_zva_64 - 1b // 4
70 .long memset_zva_128 - 1b // 5
71 .long memset_zva_256 - 1b // 6
72 .long memset_zva_512 - 1b // 7
73 .long memset_zva_1024 - 1b // 8
74 .long memset_zva_2048 - 1b // 9
75 .long memset_zva_4096 - 1b // 10
76 .long memset_zva_8192 - 1b // 11
77 .long memset_zva_16384 - 1b // 12
78 .long memset_zva_32768 - 1b // 13
79 .long memset_zva_65536 - 1b // 14
80 .long memset_zva_131072 - 1b // 15
83 /* The 64 byte zva size is too small, and needs unrolling for efficiency. */
86 .type memset_zva_64, %function
94 add dstend, dstin, count
97 str q16, [dstin] /* first 16 aligned 1. */
101 stp q16, q16, [tmp2, #16] /* first 64 aligned 16. */
104 stp q16, q16, [tmp2, #48]
105 sub count, dstend, dst /* recompute for misalign */
108 sub count, count, #128 /* pre-bias */
112 subs count, count, #128
118 adds count, count, #128 /* undo pre-bias */
122 .size memset_zva_64, . - memset_zva_64
124 /* For larger zva sizes, a simple loop ought to suffice. */
125 /* ??? Needs performance testing, when such hardware becomes available. */
129 .type memset_zva_\len, %function
137 add dstend, dstin, count
143 .size memset_zva_\len, . - memset_zva_\len
148 .type memset_zva_\len, %function
153 ccmp valw, #0, #0, hs /* hs ? cmp val,0 : !z */
156 add dstend, dstin, count
160 .size memset_zva_\len, . - memset_zva_\len
177 /* Without IFUNC, we must load the zva data from the dczid register. */
180 .type memset, %function
187 add dstend, dstin, count
193 tst tmp1w, #16 /* dc disabled? */
194 and tmp1w, tmp1w, #15
196 ccmp tmp1w, #4, #0, eq /* eq ? cmp len,64 : !c */
197 lsl zva_len, zva_len, tmp1w
199 ccmp count, zva_len_x, #0, hs /* hs ? cmp count,len : !c */
201 b.lo L(ge_256) /* disabled || len<64 || count<len */
203 /* Fall through into memset_zva_n. */
204 .size memset, . - memset
205 #endif /* HAVE_IFUNC */
207 /* Main part of the zva path. On arrival here, we've already checked for
208 minimum size and that VAL is zero. Also, we've set up zva_len and mask. */
210 .type memset_zva_n, %function
212 stp q16, q16, [dstin] /* first 32 aligned 1. */
214 sub zva_mask, zva_len, #1
215 sub count, count, zva_len_x /* pre-bias */
217 ands tmp1w, tmp1w, zva_mask
222 subs count, count, zva_len_x
223 add dst, dst, zva_len_x
226 adds count, count, zva_len_x /* undo pre-bias */
231 3: and tmp2, dstin, #-32
232 sub count, count, tmp1 /* account for misalign */
236 4: stp q16, q16, [tmp2, #32]
237 subs tmp1w, tmp1w, #64
238 stp q16, q16, [tmp2, #64]!
243 .size memset_zva_n, . - memset_zva_n
245 /* The non-zva path. */
248 .type memset_nozva, %function
254 add dstend, dstin, count
257 /* Small data -- original count is less than 256 bytes. */
262 stp q16, q16, [dstin]
266 stp q16, q16, [dstin, #0x20]
267 tbz count, #7, L(le_127)
269 stp q16, q16, [dstin, #0x40]
270 stp q16, q16, [dstin, #0x60]
271 stp q16, q16, [dstend, #-0x80]
272 stp q16, q16, [dstend, #-0x60]
274 stp q16, q16, [dstend, #-0x40]
276 stp q16, q16, [dstend, #-0x20]
281 and dst, dstin, #-32 /* align the pointer / pre-bias. */
282 stp q16, q16, [dstin] /* first 32 align 1 */
283 sub count, dstend, dst /* begin misalign recompute */
284 sub count, count, #32+128 /* finish recompute + pre-bias */
288 stp q16, q16, [dst, #0x20]
289 stp q16, q16, [dst, #0x40]
290 subs count, count, #128
291 stp q16, q16, [dst, #0x60]
292 stp q16, q16, [dst, #0x80]!
295 adds count, count, #128 /* undo pre-bias */
299 /* Tail of the zva loop. Less than ZVA bytes, but possibly lots
300 more than 128. Note that dst is aligned but unbiased. */
302 subs count, count, #128 /* pre-bias */
303 sub dst, dst, #32 /* pre-bias */
306 /* Tail of the stp loop; less than 128 bytes left.
307 Note that dst is still aligned and biased by -32. */
309 stp q16, q16, [dstend, #-0x80]
310 stp q16, q16, [dstend, #-0x60]
311 stp q16, q16, [dstend, #-0x40]
312 stp q16, q16, [dstend, #-0x20]
316 tbz count, #4, L(le_15)
318 str q16, [dstend, #-0x10]
321 tbz count, #3, L(le_7)
323 str d16, [dstend, #-8]
326 tbz count, #2, L(le_3)
328 str s16, [dstend, #-4]
331 tbz count, #1, L(le_1)
332 str h16, [dstend, #-2]
334 tbz count, #0, L(le_0)
339 .size memset_nozva, . - memset_nozva
342 strong_alias (memset, __memset)
343 libc_hidden_builtin_def (memset)