Update Thai translation - FS #11474 by Phinitnun Chanasabaeng
[kugel-rb.git] / apps / codecs / libatrac / atrac3_arm.S
blob0dacff0b7c644c5354360da27c669b0a72fb8e1c
1 /***************************************************************************
2  *             __________               __   ___.
3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
7  *                     \/            \/     \/    \/            \/
8  * $Id: 
9  *
10  * Copyright (C) 2009 by Andree Buschmann
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License
14  * as published by the Free Software Foundation; either version 2
15  * of the License, or (at your option) any later version.
16  *
17  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18  * KIND, either express or implied.
19  *
20  ****************************************************************************/
22 #include "config.h"
24     .section .text, "ax", %progbits
26 /****************************************************************************
27  * void atrac3_iqmf_matrixing(int32_t *dest, 
28  *                            int32_t *inlo, 
29  *                            int32_t *inhi, 
30  *                            unsigned int count);
31  * 
32  * Matrixing step within iqmf of atrac3 synthesis. Reference implementation:
33  *
34  * for(i=0; i<counter; i+=2){
35  *      dest[2*i+0] = inlo[i  ] + inhi[i  ];
36  *      dest[2*i+1] = inlo[i  ] - inhi[i  ];
37  *      dest[2*i+2] = inlo[i+1] + inhi[i+1];
38  *      dest[2*i+3] = inlo[i+1] - inhi[i+1];
39  * }
40  * Note: r12 is a scratch register and can be used without restorage.
41  ****************************************************************************/
42     .align  2
43     .global atrac3_iqmf_matrixing
44     .type   atrac3_iqmf_matrixing, %function
46 atrac3_iqmf_matrixing:
47     /* r0 = dest */
48     /* r1 = inlo */
49     /* r2 = inhi */
50     /* r3 = counter */
51     stmfd   sp!, {r4-r9, lr}       /* save non-scratch registers */
52     
53 .iqmf_matrixing_loop:
54     ldmia r1!, { r4, r6, r8, r12}   /* load inlo[0...3] */
55     ldmia r2!, { r5, r7, r9, lr }   /* load inhi[0...3] */
56     add   r4, r4, r5                /* r4  = inlo[0] + inhi[0] */
57     sub   r5, r4, r5, asl #1        /* r5  = inlo[0] - inhi[0] */
58     add   r6, r6, r7                /* r6  = inlo[1] + inhi[1] */
59     sub   r7, r6, r7, asl #1        /* r7  = inlo[1] - inhi[1] */
60     add   r8, r8, r9                /* r8  = inlo[2] + inhi[2] */
61     sub   r9, r8, r9, asl #1        /* r9  = inlo[2] - inhi[2] */
62     add   r12, r12, lr              /* r12 = inlo[3] + inhi[3] */
63     sub   lr , r12, lr, asl #1      /* lr  = inlo[3] - inhi[3] */
64     stmia r0!, {r4-r9, r12, lr}     /* store results to dest */
65     subs r3, r3, #4                 /* counter -= 4 */
66     bgt .iqmf_matrixing_loop
67     
68     ldmpc   regs=r4-r9              /* restore registers */
70 .atrac3_iqmf_matrixing_end:
71     .size   atrac3_iqmf_matrixing,.atrac3_iqmf_matrixing_end-atrac3_iqmf_matrixing
73     
74 /****************************************************************************
75  * atrac3_iqmf_dewindowing(int32_t *out,
76  *                         int32_t *in,
77  *                         int32_t *win,
78  *                         unsigned int nIn);
79  * 
80  * Dewindowing step within iqmf of atrac3 synthesis. Reference implementation:
81  *
82  * for (j = nIn; j != 0; j--) {
83  *          s1 = fixmul32(in[0], win[0]);
84  *          s2 = fixmul32(in[1], win[1]);
85  *          for (i = 2; i < 48; i += 2) {
86  *              s1 += fixmul32(in[i  ], win[i  ]);
87  *              s2 += fixmul32(in[i+1], win[i+1]);
88  *          }
89  *          out[0] = s2 << 1;
90  *          out[1] = s1 << 1;
91  *          in += 2;
92  *          out += 2;
93  *      }
94  * Note: r12 is a scratch register and can be used without restorage.
95  ****************************************************************************/
96     .align  2
97     .global atrac3_iqmf_dewindowing
98     .type   atrac3_iqmf_dewindowing, %function
99     
100 atrac3_iqmf_dewindowing:
101     /* r0 = dest */
102     /* r1 = input samples */
103     /* r2 = window coefficients */
104     /* r3 = counter */
105     stmfd   sp!, {r4-r9, lr}        /* save non-scratch registers */
106     
107 .iqmf_dewindow_outer_loop:          /* outer loop 0...counter-1 */
108     /*  0.. 7 */
109     ldmia r2!, {r4, r5}             /* load win[0..1] */
110     ldmia r1!, {r6, r7}             /* load in[0..1] */
111     smull lr , r9, r4, r6           /* s1 = win[0] * in[0] */
112     smull r12, r8, r5, r7           /* s2 = win[1] * in[1] */
113     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
114     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
115     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
116     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
117     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
118     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
119     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
120     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
121     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
122     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
123     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
124     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
125     /*  8..15 */
126     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
127     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
128     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
129     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
130     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
131     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
132     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
133     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
134     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
135     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
136     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
137     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
138     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
139     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
140     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
141     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
142     /* 16..23 */
143     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
144     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
145     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
146     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
147     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
148     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
149     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
150     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
151     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
152     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
153     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
154     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
155     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
156     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
157     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
158     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
159     /* 24..31 */
160     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
161     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
162     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
163     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
164     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
165     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
166     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
167     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
168     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
169     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
170     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
171     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
172     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
173     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
174     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
175     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
176     /* 32..39 */
177     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
178     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
179     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
180     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
181     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
182     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
183     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
184     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
185     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
186     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
187     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
188     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
189     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
190     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
191     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
192     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
193     /* 40..47 */
194     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
195     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
196     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
197     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
198     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
199     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
200     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
201     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
202     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
203     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
204     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
205     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
206     ldmia r2!, {r4, r5}             /* load win[i...i+1] */
207     ldmia r1!, {r6, r7}             /* load in[i...i+1] */
208     smlal lr , r9, r4, r6           /* s1 = win[i  ] * in[i  ] */
209     smlal r12, r8, r5, r7           /* s2 = win[i+1] * in[i+1] */
211     mov   lr , lr , lsr #31
212     orr   r9, lr , r9, lsl #1       /* s1 = low>>31 || hi<<1 */
213     mov   r12, r12, lsr #31
214     orr   r8, r12, r8, lsl #1       /* s2 = low>>31 || hi<<1 */
216     stmia r0!, {r8, r9}             /* store result out[0]=s2, out[1]=s1 */
217     sub   r1, r1, #184              /* roll back 64 entries = 184 bytes */
218     sub   r2, r2, #192              /* roll back 48 entries = 192 bytes = win[0] */
220     subs r3, r3, #1                 /* outer loop -= 1 */
221     bgt .iqmf_dewindow_outer_loop
222     
223     ldmpc   regs=r4-r9              /* restore registers */
224     
225 .atrac3_iqmf_dewindowing_end:
226     .size   atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing