FS#11335 by me: make ARM assembly functions thumb-friendly
[kugel-rb.git] / apps / codecs / libtta / filter_arm.S
blobf3959b83ca46c7ce9893f298246b491b05a2bb6b
1 /***************************************************************************
2  *             __________               __   ___.
3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
7  *                     \/            \/     \/    \/            \/
8  * $Id$
9  *
10  * Copyright (C) 2010 Yoshihisa Uchida
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License
14  * as published by the Free Software Foundation; either version 2
15  * of the License, or (at your option) any later version.
16  *
17  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18  * KIND, either express or implied.
19  *
20  ****************************************************************************/
22 #include "config.h"
25  * The following are assembler optimised version of
26  * void hybrid_filter(fltst *fs, int *in)
27  */
29 #ifdef USE_IRAM
30     .section .icode, "ax", %progbits
31 #else
32     .text
33 #endif
34     .align
35     .global  hybrid_filter
36     .type    hybrid_filter, %function
38 hybrid_filter:
39     @ input: r0 = fs, r1 = in
40     stmdb    sp!, {r4 - r12, lr}
42     @ get fs members
43     @ r2 pA  := fs->dl + fs->index
44     @ r3 pM  := fs->dx + fs->index
45     @ r4 pB  := fs->qm
46     @ r5 fs->index
47     @ r6 fs->error
48     @ lr sum := fs->round
50     ldmia    r0,  {r5, r6, lr}                   @ r5 = fs->index
51                                                  @ r6 = fs->error
52                                                  @ lr = fs->round
53     add      r2,  r0, #148                       @ r2 = fs->dl
54     add      r3,  r0, #52                        @ r3 = fs->dx
55     add      r4,  r0, #20                        @ r4 = fs->qm
56     add      r2,  r2, r5                         @ r2 = (unsigned char*)fs->dl + fs->index
57     add      r3,  r3, r5                         @ r3 = (unsigned char*)fs->dx + fs->index
59     cmp      r6,  #0
60     bmi      .hf_negative
61     bne      .hf_positive
63     @ case fs->error == 0
65     add      r3,  r3, #32
66     ldmia    r4!, {r5, r6,  r7,  r8 }
67     ldmia    r2!, {r9, r10, r11, r12}
68     mla      lr,  r5, r9,  lr
69     mla      lr,  r6, r10, lr
70     mla      lr,  r7, r11, lr
71     mla      lr,  r8, r12, lr
72     ldmia    r4!, {r5, r6,  r7,  r8 }
73     b        .hf2
75 .hf_negative:
76     @ case fs->error < 0
78     ldmia    r4,  {r5, r6,  r7,  r8 }
79     ldmia    r3!, {r9, r10, r11, r12}
80     sub      r5,  r5, r9
81     sub      r6,  r6, r10
82     sub      r7,  r7, r11
83     sub      r8,  r8, r12
84     stmia    r4!, {r5, r6,  r7,  r8 }            @ update fs->qm[0], ..., fs->qm[3]
85     ldmia    r2!, {r9, r10, r11, r12}
86     mla      lr,  r5, r9,  lr
87     mla      lr,  r6, r10, lr
88     mla      lr,  r7, r11, lr
89     mla      lr,  r8, r12, lr
90     ldmia    r4,  {r5, r6,  r7,  r8 }
91     ldmia    r3!, {r9, r10, r11, r12}
92     sub      r5,  r5, r9
93     sub      r6,  r6, r10
94     sub      r7,  r7, r11
95     sub      r8,  r8, r12
96     stmia    r4!, {r5, r6,  r7,  r8 }            @ update fs->qm[4], ..., fs->qm[7]
97     b .hf2
99 .hf_positive:
100     @ case fs->error > 0
102     ldmia    r4,  {r5, r6,  r7,  r8 }
103     ldmia    r3!, {r9, r10, r11, r12}
104     add      r5,  r5, r9
105     add      r6,  r6, r10
106     add      r7,  r7, r11
107     add      r8,  r8, r12
108     stmia    r4!, {r5, r6,  r7,  r8 }            @ update fs->qm[0], ..., fs->qm[3]
109     ldmia    r2!, {r9, r10, r11, r12}
110     mla      lr,  r5, r9,  lr
111     mla      lr,  r6, r10, lr
112     mla      lr,  r7, r11, lr
113     mla      lr,  r8, r12, lr
114     ldmia    r4,  {r5, r6,  r7,  r8 }
115     ldmia    r3!, {r9, r10, r11, r12}
116     add      r5,  r5, r9
117     add      r6,  r6, r10
118     add      r7,  r7, r11
119     add      r8,  r8, r12
120     stmia    r4!, {r5, r6,  r7,  r8 }            @ update fs->qm[4], ..., fs->qm[7]
122 .hf2:
123     ldmia    r2!, {r9, r10, r11, r12}
124     mla      lr,  r5, r9,  lr
125     mla      lr,  r6, r10, lr
126     mla      lr,  r7, r11, lr
127     mla      lr,  r8, r12, lr
129     @ fs->error = *in;
130     @ *in += (sum >> fs->shift)
131     @ *pA = *in
133     ldr      r5,  [r1]                           @ r5 = *in
134     ldr      r6,  [r0, #12]                      @ r6 = fs->shift
135     add      lr,  r5, lr, asr r6
136     str      lr,  [r1]                           @ *in += (sum >> fs->shift)
138     @ update fs->index
140     ldr      r1,  [r0]                           @ r1 = fs->index
141     add      r1,  r1,  #4
142     ands     r1,  r1,  #63                       @ set Z flag (after this, CPSR must keep !!)
143     stmia    r0,  {r1, r5}                       @ fs->index = (fs->index + 4) & 63
144                                                  @ fs->error = (original) *in
146     @ change *pM, *(pM-1), *(pM-2), *(pM-3)
147     @ r9  = *(pA-4), r5 = *(pM-3)
148     @ r10 = *(pA-3), r6 = *(pM-2)
149     @ r11 = *(pA-2), r7 = *(pM-1)
150     @ r12 = *(pA-1), r8 = *(pM-0)
151     @ lr  = *(pA-0)
153     mov      r4,  #1
154     orr      r5,  r4, r9,  asr #30
155     orr      r6,  r4, r10, asr #30
156     orr      r7,  r4, r11, asr #30
157     orr      r8,  r4, r12, asr #30
158     mov      r6,  r6, lsl #1
159     mov      r7,  r7, lsl #1
160     mov      r8,  r8, lsl #2
162     @ change *(pA-1), *(pA-2), *(pA-3)
163     sub      r12, lr,  r12
164     sub      r11, r12, r11
165     sub      r10, r11, r10
167     @ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3)
168     stmneda  r2,  {r10, r11, r12, lr}
169     stmneda  r3,  {r5,  r6,  r7,  r8}
170     ldmpc    cond=ne regs=r4-r12     @ hybrid_filter end (when fs->index != 0)
172 .hf_memshl:
173     @ memshl (fs->dl)
174     @ r9  = fs->dl[16 + 3]
175     @ r10 = fs->dl[16 + 4]
176     @ r11 = fs->dl[16 + 5]
177     @ r12 = fs->dl[16 + 6]
178     @ lr  = fs->dl[16 + 7]
180     add      r2,  r0,  #212                      @ r2 = fs->dl + 16
181     ldmia    r2,  {r1, r3, r4}
182     sub      r2,  r2,  #64                       @ r2 = fs->dl
183     stmia    r2,  {r1, r3, r4, r9 - r12, lr}
185     @ memshl (fs->dx)
186     @ r5 = fs->dx[16 + 4]
187     @ r6 = fs->dx[16 + 5]
188     @ r7 = fs->dx[16 + 6]
189     @ r8 = fs->dx[16 + 7]
191     add      r9,  r0,  #116                      @ r9 = fs->dx + 16
192     ldmia    r9,  {r1, r2, r3, r4}
193     sub      r9,  r9,  #64                       @ r9 = fs->dx
194     stmia    r9,  {r1 - r8}
195     ldmpc    regs=r4-r12                         @ hybrid_filter end (when fs->index == 0)
197 hybrid_filter_end:
198     .size    hybrid_filter, hybrid_filter_end - hybrid_filter