1 /* $Id: VISmemset.S,v 1.8 1998/06/12 14:53:59 jj Exp $
2 * VISmemset.S: High speed memset operations utilizing the UltraSparc
3 * Visual Instruction Set.
5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6 * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
12 #define SET_BLOCKS(base, offset, source) \
13 stx source, [base - offset - 0x18]; \
14 stx source, [base - offset - 0x10]; \
15 stx source, [base - offset - 0x08]; \
16 stx source, [base - offset - 0x00];
18 #define SET_BLOCKS(base, offset, source) \
19 stw source, [base - offset - 0x18]; \
20 stw source, [base - offset - 0x14]; \
21 stw source, [base - offset - 0x10]; \
22 stw source, [base - offset - 0x0c]; \
23 stw source, [base - offset - 0x08]; \
24 stw source, [base - offset - 0x04]; \
25 stw source, [base - offset - 0x00]; \
26 stw source, [base - offset + 0x04];
30 /* So that the brz,a,pt in memset doesn't have to get through PLT, here we go... */
36 #include <asm/visasm.h>
40 #define RETL mov %g3, %o0
43 /* Well, memset is a lot easier to get right than bcopy... */
53 brz,a,pt %o1, bzero_private
108 1: andcc %o5, 16, %g0
121 1: andcc %o5, 32, %g0
123 andncc %o2, 0x3f, %o3
146 18: wr %g0, ASI_BLK_P, %asi
147 membar #StoreStore | #LoadStore
161 stda %f0, [%o0 + 0x00] %asi
164 stda %f0, [%o0 + 0x40] %asi
165 stda %f0, [%o0 + 0x80] %asi
168 10: stda %f0, [%o0 + 0x00] %asi
169 stda %f0, [%o0 + 0x40] %asi
170 stda %f0, [%o0 + 0x80] %asi
171 stda %f0, [%o0 + 0xc0] %asi
172 11: subcc %o3, 256, %o3
180 wr %g0, FPRS_FEF, %fprs
183 membar #StoreLoad | #StoreStore
184 9: andcc %o2, 0x78, %g5
191 jmpl %o4 + %lo(13f), %g0
201 jmpl %o4 + (13f - 14b), %g0
204 12: SET_BLOCKS(%o0, 0x68, %o1)
205 SET_BLOCKS(%o0, 0x48, %o1)
206 SET_BLOCKS(%o0, 0x28, %o1)
207 SET_BLOCKS(%o0, 0x08, %o1)
236 andncc %o2, 0x3f, %o3