1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2010 Yoshihisa Uchida
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
25 * The following are assembler optimised version of
26 * void hybrid_filter(fltst *fs, int *in)
30 .section .icode, "ax", %progbits
36 .type hybrid_filter, %function
39 @ input: r0 = fs, r1 = in
40 stmdb sp!, {r4 - r12, lr}
43 @ r2 pA := fs->dl + fs->index
44 @ r3 pM := fs->dx + fs->index
50 ldmia r0, {r5, r6, lr} @ r5 = fs->index
53 add r2, r0, #148 @ r2 = fs->dl
54 add r3, r0, #52 @ r3 = fs->dx
55 add r4, r0, #20 @ r4 = fs->qm
56 add r2, r2, r5 @ r2 = (unsigned char*)fs->dl + fs->index
57 add r3, r3, r5 @ r3 = (unsigned char*)fs->dx + fs->index
66 ldmia r4!, {r5, r6, r7, r8 }
67 ldmia r2!, {r9, r10, r11, r12}
72 ldmia r4!, {r5, r6, r7, r8 }
78 ldmia r4, {r5, r6, r7, r8 }
79 ldmia r3!, {r9, r10, r11, r12}
84 stmia r4!, {r5, r6, r7, r8 } @ update fs->qm[0], ..., fs->qm[3]
85 ldmia r2!, {r9, r10, r11, r12}
90 ldmia r4, {r5, r6, r7, r8 }
91 ldmia r3!, {r9, r10, r11, r12}
96 stmia r4!, {r5, r6, r7, r8 } @ update fs->qm[4], ..., fs->qm[7]
102 ldmia r4, {r5, r6, r7, r8 }
103 ldmia r3!, {r9, r10, r11, r12}
108 stmia r4!, {r5, r6, r7, r8 } @ update fs->qm[0], ..., fs->qm[3]
109 ldmia r2!, {r9, r10, r11, r12}
114 ldmia r4, {r5, r6, r7, r8 }
115 ldmia r3!, {r9, r10, r11, r12}
120 stmia r4!, {r5, r6, r7, r8 } @ update fs->qm[4], ..., fs->qm[7]
123 ldmia r2!, {r9, r10, r11, r12}
130 @ *in += (sum >> fs->shift)
133 ldr r5, [r1] @ r5 = *in
134 ldr r6, [r0, #12] @ r6 = fs->shift
135 add lr, r5, lr, asr r6
136 str lr, [r1] @ *in += (sum >> fs->shift)
140 ldr r1, [r0] @ r1 = fs->index
142 ands r1, r1, #63 @ set Z flag (after this, CPSR must keep !!)
143 stmia r0, {r1, r5} @ fs->index = (fs->index + 4) & 63
144 @ fs->error = (original) *in
146 @ change *pM, *(pM-1), *(pM-2), *(pM-3)
147 @ r9 = *(pA-4), r5 = *(pM-3)
148 @ r10 = *(pA-3), r6 = *(pM-2)
149 @ r11 = *(pA-2), r7 = *(pM-1)
150 @ r12 = *(pA-1), r8 = *(pM-0)
154 orr r5, r4, r9, asr #30
155 orr r6, r4, r10, asr #30
156 orr r7, r4, r11, asr #30
157 orr r8, r4, r12, asr #30
162 @ change *(pA-1), *(pA-2), *(pA-3)
167 @ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3)
168 stmneda r2, {r10, r11, r12, lr}
169 stmneda r3, {r5, r6, r7, r8}
170 ldmpc cond=ne regs=r4-r12 @ hybrid_filter end (when fs->index != 0)
174 @ r9 = fs->dl[16 + 3]
175 @ r10 = fs->dl[16 + 4]
176 @ r11 = fs->dl[16 + 5]
177 @ r12 = fs->dl[16 + 6]
178 @ lr = fs->dl[16 + 7]
180 add r2, r0, #212 @ r2 = fs->dl + 16
181 ldmia r2, {r1, r3, r4}
182 sub r2, r2, #64 @ r2 = fs->dl
183 stmia r2, {r1, r3, r4, r9 - r12, lr}
186 @ r5 = fs->dx[16 + 4]
187 @ r6 = fs->dx[16 + 5]
188 @ r7 = fs->dx[16 + 6]
189 @ r8 = fs->dx[16 + 7]
191 add r9, r0, #116 @ r9 = fs->dx + 16
192 ldmia r9, {r1, r2, r3, r4}
193 sub r9, r9, #64 @ r9 = fs->dx
195 ldmpc regs=r4-r12 @ hybrid_filter end (when fs->index == 0)
198 .size hybrid_filter, hybrid_filter_end - hybrid_filter