1 /* memset.S: optimised assembly memset
3 * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, see
18 * <http://www.gnu.org/licenses/>.
26 ###############################################################################
28 # void *memset(void *p, char ch, size_t count)
30 # - NOTE: must not use any stack. exception detection performs function return
31 # to caller's fixup routine, aborting the remainder of the set
32 # GR4, GR7, GR8, and GR11 must be managed
34 ###############################################################################
36 .type memset,@function
38 orcc.p gr10,gr0,gr5,icc3 ; GR5 = count
40 or.p gr8,gr0,gr4 ; GR4 = address
43 # conditionally write a byte to 2b-align the address
45 andicc gr4,#1,gr0,icc0
47 cstb.p gr9,@(gr4,gr0) ,cc7,#1
48 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
49 cadd.p gr4,gr6,gr4 ,cc7,#1
52 # conditionally write a word to 4b-align the address
53 andicc.p gr4,#2,gr0,icc0
54 subicc gr5,#2,gr0,icc1
57 slli.p gr9,#8,gr12 ; need to double up the pattern
62 csth.p gr12,@(gr4,gr0) ,cc7,#1
63 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
64 cadd.p gr4,gr6,gr4 ,cc7,#1
67 # conditionally write a dword to 8b-align the address
68 andicc.p gr4,#4,gr0,icc0
69 subicc gr5,#4,gr0,icc1
72 slli.p gr12,#16,gr13 ; need to quadruple-up the pattern
77 cst.p gr12,@(gr4,gr0) ,cc7,#1
78 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
79 cadd.p gr4,gr6,gr4 ,cc7,#1
82 or.p gr12,gr12,gr13 ; need to octuple-up the pattern
84 # the address is now 8b-aligned - loop around writing 64b chunks
86 subi.p gr4,#8,gr4 ; store with update index does weird stuff
89 subicc gr5,#64,gr0,icc0
91 cstdu gr12,@(gr4,gr7) ,cc7,#1
92 cstdu gr12,@(gr4,gr7) ,cc7,#1
93 cstdu gr12,@(gr4,gr7) ,cc7,#1
94 cstdu gr12,@(gr4,gr7) ,cc7,#1
95 cstdu gr12,@(gr4,gr7) ,cc7,#1
96 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
97 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
98 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
99 subicc gr5,#64,gr0,icc0
100 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
104 # now do 32-byte remnant
105 subicc.p gr5,#32,gr0,icc0
108 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
109 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
110 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
112 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
113 subicc gr5,#16,gr0,icc0
114 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
117 # now do 16-byte remnant
119 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
120 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
121 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
124 # now do 8-byte remnant
125 subicc gr5,#8,gr0,icc1
127 cstdu.p gr12,@(gr4,gr7) ,cc7,#1
128 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
132 # now do 4-byte remnant
133 subicc gr5,#4,gr0,icc0
136 cstu.p gr12,@(gr4,gr7) ,cc7,#1
137 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
138 subicc.p gr5,#2,gr0,icc1
141 # now do 2-byte remnant
145 csthu.p gr12,@(gr4,gr7) ,cc7,#1
146 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
147 subicc.p gr5,#1,gr0,icc0
150 # now do 1-byte remnant
154 cstb.p gr12,@(gr4,gr0) ,cc7,#1
156 .size memset, .-memset
158 libc_hidden_def(memset)