linuxthreads: Fix up pthread.h for XPG7.
[uclibc-ng.git] / libc / string / frv / memset.S
blob17013672e5166284f62227ff15b575a82119db41
1 /* memset.S: optimised assembly memset
2  *
3  * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public
17  *  License along with this library; if not, see
18  *  <http://www.gnu.org/licenses/>.
19  */
21 #include <features.h>
23         .text
24         .p2align        4
26 ###############################################################################
28 # void *memset(void *p, char ch, size_t count)
30 # - NOTE: must not use any stack. exception detection performs function return
31 #         to caller's fixup routine, aborting the remainder of the set
32 #         GR4, GR7, GR8, and GR11 must be managed
34 ###############################################################################
35         .globl          memset
36         .type           memset,@function
37 memset:
38         orcc.p          gr10,gr0,gr5,icc3               ; GR5 = count
39         andi            gr9,#0xff,gr9
40         or.p            gr8,gr0,gr4                     ; GR4 = address
41         beqlr           icc3,#0
43         # conditionally write a byte to 2b-align the address
44         setlos.p        #1,gr6
45         andicc          gr4,#1,gr0,icc0
46         ckne            icc0,cc7
47         cstb.p          gr9,@(gr4,gr0)          ,cc7,#1
48         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
49         cadd.p          gr4,gr6,gr4             ,cc7,#1
50         beqlr           icc3,#0
52         # conditionally write a word to 4b-align the address
53         andicc.p        gr4,#2,gr0,icc0
54         subicc          gr5,#2,gr0,icc1
55         setlos.p        #2,gr6
56         ckne            icc0,cc7
57         slli.p          gr9,#8,gr12                     ; need to double up the pattern
58         cknc            icc1,cc5
59         or.p            gr9,gr12,gr12
60         andcr           cc7,cc5,cc7
62         csth.p          gr12,@(gr4,gr0)         ,cc7,#1
63         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
64         cadd.p          gr4,gr6,gr4             ,cc7,#1
65         beqlr           icc3,#0
67         # conditionally write a dword to 8b-align the address
68         andicc.p        gr4,#4,gr0,icc0
69         subicc          gr5,#4,gr0,icc1
70         setlos.p        #4,gr6
71         ckne            icc0,cc7
72         slli.p          gr12,#16,gr13                   ; need to quadruple-up the pattern
73         cknc            icc1,cc5
74         or.p            gr13,gr12,gr12
75         andcr           cc7,cc5,cc7
77         cst.p           gr12,@(gr4,gr0)         ,cc7,#1
78         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
79         cadd.p          gr4,gr6,gr4             ,cc7,#1
80         beqlr           icc3,#0
82         or.p            gr12,gr12,gr13                  ; need to octuple-up the pattern
84         # the address is now 8b-aligned - loop around writing 64b chunks
85         setlos          #8,gr7
86         subi.p          gr4,#8,gr4                      ; store with update index does weird stuff
87         setlos          #64,gr6
89         subicc          gr5,#64,gr0,icc0
90 0:      cknc            icc0,cc7
91         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
92         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
93         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
94         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
95         cstdu           gr12,@(gr4,gr7)         ,cc7,#1
96         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
97         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
98         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
99         subicc          gr5,#64,gr0,icc0
100         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
101         beqlr           icc3,#0
102         bnc             icc0,#2,0b
104         # now do 32-byte remnant
105         subicc.p        gr5,#32,gr0,icc0
106         setlos          #32,gr6
107         cknc            icc0,cc7
108         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
109         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
110         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
111         setlos          #16,gr6
112         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
113         subicc          gr5,#16,gr0,icc0
114         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
115         beqlr           icc3,#0
117         # now do 16-byte remnant
118         cknc            icc0,cc7
119         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
120         csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
121         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
122         beqlr           icc3,#0
124         # now do 8-byte remnant
125         subicc          gr5,#8,gr0,icc1
126         cknc            icc1,cc7
127         cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
128         csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
129         setlos.p        #4,gr7
130         beqlr           icc3,#0
132         # now do 4-byte remnant
133         subicc          gr5,#4,gr0,icc0
134         addi.p          gr4,#4,gr4
135         cknc            icc0,cc7
136         cstu.p          gr12,@(gr4,gr7)         ,cc7,#1
137         csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
138         subicc.p        gr5,#2,gr0,icc1
139         beqlr           icc3,#0
141         # now do 2-byte remnant
142         setlos          #2,gr7
143         addi.p          gr4,#2,gr4
144         cknc            icc1,cc7
145         csthu.p         gr12,@(gr4,gr7)         ,cc7,#1
146         csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
147         subicc.p        gr5,#1,gr0,icc0
148         beqlr           icc3,#0
150         # now do 1-byte remnant
151         setlos          #0,gr7
152         addi.p          gr4,#2,gr4
153         cknc            icc0,cc7
154         cstb.p          gr12,@(gr4,gr0)         ,cc7,#1
155         bralr
156         .size           memset, .-memset
158 libc_hidden_def(memset)