3 libdemac - A Monkey's Audio decoder
7 Copyright (C) Dave Chapman 2007
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
30 #include "demac_config.h"
34 #if defined(CPU_ARM) && (ARM_ARCH == 4)
35 #include "vector_math32_armv4.h"
37 #include "vector_math_generic.h"
40 #else /* FILTER_BITS == 16 */
43 #include "vector_math16_cf.h"
44 #elif defined(CPU_ARM) && (ARM_ARCH >= 6)
45 #include "vector_math16_armv6.h"
46 #elif defined(CPU_ARM) && (ARM_ARCH >= 5)
47 /* Assume all our ARMv5 targets are ARMv5te(j) */
48 #include "vector_math16_armv5te.h"
49 #elif (defined(__i386__) || defined(__i486__)) && defined(__MMX__) \
50 || defined(__x86_64__)
51 #include "vector_math16_mmx.h"
53 #include "vector_math_generic.h"
56 #endif /* FILTER_BITS */
59 filter_int
* coeffs
; /* ORDER entries */
61 /* We store all the filter delays in a single buffer */
62 filter_int
* history_end
;
65 filter_int
* adaptcoeffs
;
70 /* We name the functions according to the ORDER and FRACBITS
71 pre-processor symbols and build multiple .o files from this .c file
72 - this increases code-size but gives the compiler more scope for
73 optimising the individual functions, as well as replacing a lot of
74 variables with constants.
79 #define INIT_FILTER init_filter_16_11
80 #define APPLY_FILTER apply_filter_16_11
82 #define INIT_FILTER init_filter_64_11
83 #define APPLY_FILTER apply_filter_64_11
86 #define INIT_FILTER init_filter_256_13
87 #define APPLY_FILTER apply_filter_256_13
89 #define INIT_FILTER init_filter_32_10
90 #define APPLY_FILTER apply_filter_32_10
92 #define INIT_FILTER init_filter_1280_15
93 #define APPLY_FILTER apply_filter_1280_15
96 /* Some macros to handle the fixed-point stuff */
98 /* Convert from (32-FRACBITS).FRACBITS fixed-point format to an
99 integer (rounding to nearest). */
100 #define FP_HALF (1 << (FRACBITS - 1)) /* 0.5 in fixed-point format. */
101 #define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS) /* round(x) */
105 #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
106 #else /* ARM_ARCH < 6 */
107 /* Keeping the asr #31 outside of the asm allows loads to be scheduled between
108 it and the rest of the block on ARM9E, with the load's result latency filled
109 by the other calculations. */
110 #define SATURATE(x) ({ \
111 int __res = (x) >> 31; \
113 "teq %0, %1, asr #15\n\t" \
115 "eorne %0, %0, #0xff\n\t" \
116 "eorne %0, %0, #0x7f00" \
117 : "+r" (__res) : "r" (x) : "cc" \
121 #endif /* ARM_ARCH */
123 #define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF)
126 /* Apply the filter with state f to count entries in data[] */
128 static void ICODE_ATTR_DEMAC
do_apply_filter_3980(struct filter_t
* f
,
129 int32_t* data
, int count
)
134 #ifdef PREPARE_SCALARPRODUCT
135 PREPARE_SCALARPRODUCT
138 while(LIKELY(count
--))
140 #ifdef FUSED_VECTOR_MATH
141 if (LIKELY(*data
!= 0)) {
143 res
= vector_sp_add(f
->coeffs
, f
->delay
- ORDER
,
144 f
->adaptcoeffs
- ORDER
);
146 res
= vector_sp_sub(f
->coeffs
, f
->delay
- ORDER
,
147 f
->adaptcoeffs
- ORDER
);
149 res
= scalarproduct(f
->coeffs
, f
->delay
- ORDER
);
151 res
= FP_TO_INT(res
);
153 res
= FP_TO_INT(scalarproduct(f
->coeffs
, f
->delay
- ORDER
));
155 if (LIKELY(*data
!= 0)) {
157 vector_add(f
->coeffs
, f
->adaptcoeffs
- ORDER
);
159 vector_sub(f
->coeffs
, f
->adaptcoeffs
- ORDER
);
167 /* Update the output history */
168 *f
->delay
++ = SATURATE(res
);
170 /* Version 3.98 and later files */
172 /* Update the adaption coefficients */
173 absres
= (res
< 0 ? -res
: res
);
175 if (UNLIKELY(absres
> 3 * f
->avg
))
176 *f
->adaptcoeffs
= ((res
>> 25) & 64) - 32;
177 else if (3 * absres
> 4 * f
->avg
)
178 *f
->adaptcoeffs
= ((res
>> 26) & 32) - 16;
179 else if (LIKELY(absres
> 0))
180 *f
->adaptcoeffs
= ((res
>> 27) & 16) - 8;
184 f
->avg
+= (absres
- f
->avg
) / 16;
186 f
->adaptcoeffs
[-1] >>= 1;
187 f
->adaptcoeffs
[-2] >>= 1;
188 f
->adaptcoeffs
[-8] >>= 1;
192 /* Have we filled the history buffer? */
193 if (UNLIKELY(f
->delay
== f
->history_end
)) {
194 memmove(f
->coeffs
+ ORDER
, f
->delay
- (ORDER
*2),
195 (ORDER
*2) * sizeof(filter_int
));
196 f
->adaptcoeffs
= f
->coeffs
+ ORDER
*2;
197 f
->delay
= f
->coeffs
+ ORDER
*3;
202 static void ICODE_ATTR_DEMAC
do_apply_filter_3970(struct filter_t
* f
,
203 int32_t* data
, int count
)
207 #ifdef PREPARE_SCALARPRODUCT
208 PREPARE_SCALARPRODUCT
211 while(LIKELY(count
--))
213 #ifdef FUSED_VECTOR_MATH
214 if (LIKELY(*data
!= 0)) {
216 res
= vector_sp_add(f
->coeffs
, f
->delay
- ORDER
,
217 f
->adaptcoeffs
- ORDER
);
219 res
= vector_sp_sub(f
->coeffs
, f
->delay
- ORDER
,
220 f
->adaptcoeffs
- ORDER
);
222 res
= scalarproduct(f
->coeffs
, f
->delay
- ORDER
);
224 res
= FP_TO_INT(res
);
226 res
= FP_TO_INT(scalarproduct(f
->coeffs
, f
->delay
- ORDER
));
228 if (LIKELY(*data
!= 0)) {
230 vector_add(f
->coeffs
, f
->adaptcoeffs
- ORDER
);
232 vector_sub(f
->coeffs
, f
->adaptcoeffs
- ORDER
);
236 /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an
237 integer (rounding to nearest) and add the input value to
243 /* Update the output history */
244 *f
->delay
++ = SATURATE(res
);
246 /* Version ??? to < 3.98 files (untested) */
247 f
->adaptcoeffs
[0] = (res
== 0) ? 0 : ((res
>> 28) & 8) - 4;
248 f
->adaptcoeffs
[-4] >>= 1;
249 f
->adaptcoeffs
[-8] >>= 1;
253 /* Have we filled the history buffer? */
254 if (UNLIKELY(f
->delay
== f
->history_end
)) {
255 memmove(f
->coeffs
+ ORDER
, f
->delay
- (ORDER
*2),
256 (ORDER
*2) * sizeof(filter_int
));
257 f
->adaptcoeffs
= f
->coeffs
+ ORDER
*2;
258 f
->delay
= f
->coeffs
+ ORDER
*3;
263 static struct filter_t filter
[2] IBSS_ATTR_DEMAC
;
265 static void do_init_filter(struct filter_t
* f
, filter_int
* buf
)
268 f
->history_end
= buf
+ ORDER
*3 + FILTER_HISTORY_SIZE
;
271 f
->adaptcoeffs
= f
->coeffs
+ ORDER
*2;
272 f
->delay
= f
->coeffs
+ ORDER
*3;
274 /* Zero coefficients and history buffer */
275 memset(f
->coeffs
, 0, ORDER
*3 * sizeof(filter_int
));
277 /* Zero the running average */
281 void INIT_FILTER(filter_int
* buf
)
283 do_init_filter(&filter
[0], buf
);
284 do_init_filter(&filter
[1], buf
+ ORDER
*3 + FILTER_HISTORY_SIZE
);
287 void ICODE_ATTR_DEMAC
APPLY_FILTER(int fileversion
, int channel
,
288 int32_t* data
, int count
)
290 if (fileversion
>= 3980)
291 do_apply_filter_3980(&filter
[channel
], data
, count
);
293 do_apply_filter_3970(&filter
[channel
], data
, count
);