Slight speedup for the APE filters. Most noticeable on coldfire (+3.5% for -c2000...
[kugel-rb.git] / apps / codecs / demac / libdemac / filter.c
blobbab830a8bd4f2ddd6806518db2c1a7fbc406333c
1 /*
3 libdemac - A Monkey's Audio decoder
5 $Id$
7 Copyright (C) Dave Chapman 2007
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
25 #include <string.h>
26 #include <inttypes.h>
28 #include "demac.h"
29 #include "filter.h"
30 #include "demac_config.h"
32 #if FILTER_BITS == 32
34 #if defined(CPU_ARM) && (ARM_ARCH == 4)
35 #include "vector_math32_armv4.h"
36 #else
37 #include "vector_math_generic.h"
38 #endif
40 #else /* FILTER_BITS == 16 */
42 #ifdef CPU_COLDFIRE
43 #include "vector_math16_cf.h"
44 #elif defined(CPU_ARM) && (ARM_ARCH >= 6)
45 #include "vector_math16_armv6.h"
46 #elif defined(CPU_ARM) && (ARM_ARCH >= 5)
47 /* Assume all our ARMv5 targets are ARMv5te(j) */
48 #include "vector_math16_armv5te.h"
49 #else
50 #include "vector_math_generic.h"
51 #endif
53 #endif /* FILTER_BITS */
55 struct filter_t {
56 filter_int* coeffs; /* ORDER entries */
58 /* We store all the filter delays in a single buffer */
59 filter_int* history_end;
61 filter_int* delay;
62 filter_int* adaptcoeffs;
64 int avg;
67 /* We name the functions according to the ORDER and FRACBITS
68 pre-processor symbols and build multiple .o files from this .c file
69 - this increases code-size but gives the compiler more scope for
70 optimising the individual functions, as well as replacing a lot of
71 variables with constants.
74 #if FRACBITS == 11
75 #if ORDER == 16
76 #define INIT_FILTER init_filter_16_11
77 #define APPLY_FILTER apply_filter_16_11
78 #elif ORDER == 64
79 #define INIT_FILTER init_filter_64_11
80 #define APPLY_FILTER apply_filter_64_11
81 #endif
82 #elif FRACBITS == 13
83 #define INIT_FILTER init_filter_256_13
84 #define APPLY_FILTER apply_filter_256_13
85 #elif FRACBITS == 10
86 #define INIT_FILTER init_filter_32_10
87 #define APPLY_FILTER apply_filter_32_10
88 #elif FRACBITS == 15
89 #define INIT_FILTER init_filter_1280_15
90 #define APPLY_FILTER apply_filter_1280_15
91 #endif
93 /* Some macros to handle the fixed-point stuff */
95 /* Convert from (32-FRACBITS).FRACBITS fixed-point format to an
96 integer (rounding to nearest). */
97 #define FP_HALF (1 << (FRACBITS - 1)) /* 0.5 in fixed-point format. */
98 #define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS) /* round(x) */
100 #if defined(CPU_ARM) && (ARM_ARCH >= 6)
101 #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
102 #else
103 #define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF)
104 #endif
106 /* Apply the filter with state f to count entries in data[] */
108 static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f,
109 int32_t* data, int count)
111 int res;
112 int absres;
114 #ifdef PREPARE_SCALARPRODUCT
115 PREPARE_SCALARPRODUCT
116 #endif
118 while(LIKELY(count--))
120 res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
122 if (LIKELY(*data != 0)) {
123 if (*data < 0)
124 vector_add(f->coeffs, f->adaptcoeffs - ORDER);
125 else
126 vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
129 res += *data;
131 *data++ = res;
133 /* Update the output history */
134 *f->delay++ = SATURATE(res);
136 /* Version 3.98 and later files */
138 /* Update the adaption coefficients */
139 absres = (res < 0 ? -res : res);
141 if (UNLIKELY(absres > 3 * f->avg))
142 *f->adaptcoeffs = ((res >> 25) & 64) - 32;
143 else if (3 * absres > 4 * f->avg)
144 *f->adaptcoeffs = ((res >> 26) & 32) - 16;
145 else if (LIKELY(absres > 0))
146 *f->adaptcoeffs = ((res >> 27) & 16) - 8;
147 else
148 *f->adaptcoeffs = 0;
150 f->avg += (absres - f->avg) / 16;
152 f->adaptcoeffs[-1] >>= 1;
153 f->adaptcoeffs[-2] >>= 1;
154 f->adaptcoeffs[-8] >>= 1;
156 f->adaptcoeffs++;
158 /* Have we filled the history buffer? */
159 if (UNLIKELY(f->delay == f->history_end)) {
160 memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
161 (ORDER*2) * sizeof(filter_int));
162 f->adaptcoeffs = f->coeffs + ORDER*2;
163 f->delay = f->coeffs + ORDER*3;
168 static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
169 int32_t* data, int count)
171 int res;
173 #ifdef PREPARE_SCALARPRODUCT
174 PREPARE_SCALARPRODUCT
175 #endif
177 while(LIKELY(count--))
179 res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
181 if (LIKELY(*data != 0)) {
182 if (*data < 0)
183 vector_add(f->coeffs, f->adaptcoeffs - ORDER);
184 else
185 vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
188 /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an
189 integer (rounding to nearest) and add the input value to
190 it */
191 res += *data;
193 *data++ = res;
195 /* Update the output history */
196 *f->delay++ = SATURATE(res);
198 /* Version ??? to < 3.98 files (untested) */
199 f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
200 f->adaptcoeffs[-4] >>= 1;
201 f->adaptcoeffs[-8] >>= 1;
203 f->adaptcoeffs++;
205 /* Have we filled the history buffer? */
206 if (UNLIKELY(f->delay == f->history_end)) {
207 memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
208 (ORDER*2) * sizeof(filter_int));
209 f->adaptcoeffs = f->coeffs + ORDER*2;
210 f->delay = f->coeffs + ORDER*3;
215 static struct filter_t filter0 IBSS_ATTR;
216 static struct filter_t filter1 IBSS_ATTR;
218 static void do_init_filter(struct filter_t* f, filter_int* buf)
220 f->coeffs = buf;
221 f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE;
223 /* Init pointers */
224 f->adaptcoeffs = f->coeffs + ORDER*2;
225 f->delay = f->coeffs + ORDER*3;
227 /* Zero coefficients and history buffer */
228 memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int));
230 /* Zero the running average */
231 f->avg = 0;
234 void INIT_FILTER(filter_int* buf)
236 do_init_filter(&filter0, buf);
237 do_init_filter(&filter1, buf + ORDER*3 + FILTER_HISTORY_SIZE);
240 void ICODE_ATTR_DEMAC APPLY_FILTER(int fileversion, int32_t* data0,
241 int32_t* data1, int count)
243 if (fileversion >= 3980) {
244 do_apply_filter_3980(&filter0, data0, count);
245 if (data1 != NULL)
246 do_apply_filter_3980(&filter1, data1, count);
247 } else {
248 do_apply_filter_3970(&filter0, data0, count);
249 if (data1 != NULL)
250 do_apply_filter_3970(&filter1, data1, count);