1 /* Optimized memset implementation for PowerPC.
2 Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
23 /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
26 The memset is done in three sizes: byte (8 bits), word (32 bits),
27 cache line (256 bits). There is a special case for setting cache lines
28 to 0, to take advantage of the dcbz instruction.
29 r6: current address we are storing at
30 r7: number of bytes we are setting now (when aligning) */
32 /* take care of case for size <= 4 */
37 /* align to word boundary */
40 beq+ L(aligned) # 8th instruction from .align
48 L(g0): sth r4,-2(r6) # 16th instruction from .align
49 /* take care of case for size < 31 */
54 /* align to cache line boundary... */
67 stw r4,-4(r8) # 32nd instruction from .align
71 L(a2): bf 29,L(caligned)
73 /* now aligned to a cache line. */
77 mtcrf 0x01,r5 # 40th instruction from .align
78 beq cr1,L(zloopstart) # special case for clearing memory using dcbz
81 beq L(medium) # we may not actually get to do a full line
85 bdz L(cloopdone) # 48th instruction from .align
92 nop # let 601 fetch last 4 instructions of loop
94 stw r4,-24(r6) # 56th instruction from .align
95 nop # let 601 fetch first 8 instructions of loop
103 stw r4,-16(r6) # 64th instruction from .align
111 b L(medium_tail2) # 72nd instruction from .align
115 /* Clear lines of memory in 128-byte chunks. */
132 L(z1): cmplwi cr5,r5,0
146 /* Memset of 4 bytes or less. */
161 /* Memset of 0-31 bytes. */
173 bge- cr1,L(medium_27t)
175 stw r4,-4(r6) # 8th instruction from .align
187 blt- cr1,L(medium_27f) # 16th instruction from .align