1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2004 by Jens Arnold
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
23 .section .icode,"ax",@progbits
27 .type _memset,@function
29 /* Fills a memory region with specified byte value
30 * This version is optimized for speed
38 * r0 - start address (like ANSI version)
42 * r1 - start address +11 for main loop
44 * r5 - data (spread to all 4 bytes when using long stores)
45 * r6 - current address (runs down from end to start)
47 * The instruction order below is devised in a way to utilize the pipelining
48 * of the SH1 to the max. The routine fills memory from end to start in
49 * order to utilize the auto-decrementing store instructions.
54 and #3,r0 /* r0 = (4 - align_offset) % 4 */
56 cmp/hs r0,r6 /* at least one aligned longword to fill? */
57 add r4,r6 /* r6 = end_address */
58 bf .no_longs /* no, jump directly to byte loop */
60 extu.b r5,r5 /* start: spread data to all 4 bytes */
62 or r0,r5 /* data now in 2 lower bytes of r5 */
64 or r0,r5 /* data now in all 4 bytes of r5 */
67 tst #3,r0 /* r0 already long aligned? */
68 bt .end_b1 /* yes: skip loop */
70 /* leading byte loop: sets 0..3 bytes */
72 mov.b r5,@-r0 /* store byte */
73 tst #3,r0 /* r0 long aligned? */
74 bf .loop_b1 /* runs r0 down until long aligned */
76 mov r0,r6 /* r6 = last long bound */
77 nop /* keep alignment */
80 mov r4,r1 /* r1 = start_address... */
81 add #11,r1 /* ... + 11, combined for rounding and offset */
83 tst #4,r0 /* bit 2 tells whether an even or odd number of */
84 bf .loop_odd /* longwords to set */
86 /* main loop: set 2 longs per pass */
88 mov.l r5,@-r6 /* store first long */
90 cmp/hi r1,r6 /* runs r6 down to first long bound */
91 mov.l r5,@-r6 /* store second long */
95 cmp/hi r4,r6 /* any bytes left? */
96 bf .end_b2 /* no: skip loop */
98 /* trailing byte loop */
100 mov.b r5,@-r6 /* store byte */
101 cmp/hi r4,r6 /* runs r6 down to the start address */
106 mov r4,r0 /* return start address */
109 .size _memset,.end-_memset