apps/codecs/demac/libdemac/filter.c

   1 /*
   2
   3 libdemac - A Monkey's Audio decoder
   4
   5 $Id$
   6
   7 Copyright (C) Dave Chapman 2007
   8
   9 This program is free software; you can redistribute it and/or modify
  10 it under the terms of the GNU General Public License as published by
  11 the Free Software Foundation; either version 2 of the License, or
  12 (at your option) any later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
  22
  23 */
  24
  25 #include <string.h>
  26 #include <inttypes.h>
  27
  28 #include "demac.h"
  29 #include "filter.h"
  30 #include "demac_config.h"
  31
  32 #if FILTER_BITS == 32
  33
  34 #if defined(CPU_ARM) && (ARM_ARCH == 4)
  35 #include "vector_math32_armv4.h"
  36 #else
  37 #include "vector_math_generic.h"
  38 #endif
  39
  40 #else /* FILTER_BITS == 16 */
  41
  42 #ifdef CPU_COLDFIRE
  43 #include "vector_math16_cf.h"
  44 #elif defined(CPU_ARM) && (ARM_ARCH >= 6)
  45 #include "vector_math16_armv6.h"
  46 #elif defined(CPU_ARM) && (ARM_ARCH >= 5)
  47 /* Assume all our ARMv5 targets are ARMv5te(j) */
  48 #include "vector_math16_armv5te.h"
  49 #elif (defined(__i386__) || defined(__i486__))  && defined(__MMX__) \
  50     || defined(__x86_64__)
  51 #include "vector_math16_mmx.h"
  52 #else
  53 #include "vector_math_generic.h"
  54 #endif
  55
  56 #endif /* FILTER_BITS */
  57
  58 struct filter_t {
  59     filter_int* coeffs; /* ORDER entries */
  60
  61     /* We store all the filter delays in a single buffer */
  62     filter_int* history_end;
  63
  64     filter_int* delay;
  65     filter_int* adaptcoeffs;
  66
  67     int avg;
  68 };
  69
  70 /* We name the functions according to the ORDER and FRACBITS
  71    pre-processor symbols and build multiple .o files from this .c file
  72    - this increases code-size but gives the compiler more scope for
  73    optimising the individual functions, as well as replacing a lot of
  74    variables with constants.
  75 */
  76
  77 #if FRACBITS == 11
  78   #if ORDER == 16
  79      #define INIT_FILTER   init_filter_16_11
  80      #define APPLY_FILTER apply_filter_16_11
  81   #elif ORDER == 64
  82      #define INIT_FILTER  init_filter_64_11
  83      #define APPLY_FILTER apply_filter_64_11
  84   #endif
  85 #elif FRACBITS == 13
  86   #define INIT_FILTER  init_filter_256_13
  87   #define APPLY_FILTER apply_filter_256_13
  88 #elif FRACBITS == 10
  89   #define INIT_FILTER  init_filter_32_10
  90   #define APPLY_FILTER apply_filter_32_10
  91 #elif FRACBITS == 15
  92   #define INIT_FILTER  init_filter_1280_15
  93   #define APPLY_FILTER apply_filter_1280_15
  94 #endif
  95
  96 /* Some macros to handle the fixed-point stuff */
  97
  98 /* Convert from (32-FRACBITS).FRACBITS fixed-point format to an
  99    integer (rounding to nearest). */
 100 #define FP_HALF  (1 << (FRACBITS - 1))   /* 0.5 in fixed-point format. */
 101 #define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS)  /* round(x) */
 102
 103 #ifdef CPU_ARM
 104 #if ARM_ARCH >= 6
 105 #define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
 106 #else /* ARM_ARCH < 6 */
 107 /* Keeping the asr #31 outside of the asm allows loads to be scheduled between
 108    it and the rest of the block on ARM9E, with the load's result latency filled
 109    by the other calculations. */
 110 #define SATURATE(x) ({ \
 111     int __res = (x) >> 31; \
 112     asm volatile ( \
 113         "teq %0, %1, asr #15\n\t" \
 114         "moveq %0, %1\n\t" \
 115         "eorne %0, %0, #0xff\n\t" \
 116         "eorne %0, %0, #0x7f00" \
 117         : "+r" (__res) : "r" (x) : "cc" \
 118     ); \
 119     __res; \
 120 })
 121 #endif /* ARM_ARCH */
 122 #else /* CPU_ARM */
 123 #define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF)
 124 #endif
 125
 126 /* Apply the filter with state f to count entries in data[] */
 127
 128 static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f,
 129                                                   int32_t* data, int count)
 130 {
 131     int res;
 132     int absres;
 133
 134 #ifdef PREPARE_SCALARPRODUCT
 135     PREPARE_SCALARPRODUCT
 136 #endif
 137
 138     while(LIKELY(count--))
 139     {
 140 #ifdef FUSED_VECTOR_MATH
 141         if (LIKELY(*data != 0)) {
 142             if (*data < 0)
 143                 res = vector_sp_add(f->coeffs, f->delay - ORDER,
 144                                     f->adaptcoeffs - ORDER);
 145             else
 146                 res = vector_sp_sub(f->coeffs, f->delay - ORDER,
 147                                     f->adaptcoeffs - ORDER);
 148         } else {
 149             res = scalarproduct(f->coeffs, f->delay - ORDER);
 150         }
 151         res = FP_TO_INT(res);
 152 #else
 153         res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
 154
 155         if (LIKELY(*data != 0)) {
 156             if (*data < 0)
 157                 vector_add(f->coeffs, f->adaptcoeffs - ORDER);
 158             else
 159                 vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
 160         }
 161 #endif
 162
 163         res += *data;
 164
 165         *data++ = res;
 166
 167         /* Update the output history */
 168         *f->delay++ = SATURATE(res);
 169
 170         /* Version 3.98 and later files */
 171
 172         /* Update the adaption coefficients */
 173         absres = (res < 0 ? -res : res);
 174
 175         if (UNLIKELY(absres > 3 * f->avg))
 176             *f->adaptcoeffs = ((res >> 25) & 64) - 32;
 177         else if (3 * absres > 4 * f->avg)
 178             *f->adaptcoeffs = ((res >> 26) & 32) - 16;
 179         else if (LIKELY(absres > 0))
 180             *f->adaptcoeffs = ((res >> 27) & 16) - 8;
 181         else
 182             *f->adaptcoeffs = 0;
 183
 184         f->avg += (absres - f->avg) / 16;
 185
 186         f->adaptcoeffs[-1] >>= 1;
 187         f->adaptcoeffs[-2] >>= 1;
 188         f->adaptcoeffs[-8] >>= 1;
 189
 190         f->adaptcoeffs++;
 191
 192         /* Have we filled the history buffer? */
 193         if (UNLIKELY(f->delay == f->history_end)) {
 194             memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
 195                     (ORDER*2) * sizeof(filter_int));
 196             f->adaptcoeffs = f->coeffs + ORDER*2;
 197             f->delay = f->coeffs + ORDER*3;
 198         }
 199     }
 200 }
 201
 202 static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
 203                                                   int32_t* data, int count)
 204 {
 205     int res;
 206
 207 #ifdef PREPARE_SCALARPRODUCT
 208     PREPARE_SCALARPRODUCT
 209 #endif
 210
 211     while(LIKELY(count--))
 212     {
 213 #ifdef FUSED_VECTOR_MATH
 214         if (LIKELY(*data != 0)) {
 215             if (*data < 0)
 216                 res = vector_sp_add(f->coeffs, f->delay - ORDER,
 217                                     f->adaptcoeffs - ORDER);
 218             else
 219                 res = vector_sp_sub(f->coeffs, f->delay - ORDER,
 220                                     f->adaptcoeffs - ORDER);
 221         } else {
 222             res = scalarproduct(f->coeffs, f->delay - ORDER);
 223         }
 224         res = FP_TO_INT(res);
 225 #else
 226         res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
 227
 228         if (LIKELY(*data != 0)) {
 229             if (*data < 0)
 230                 vector_add(f->coeffs, f->adaptcoeffs - ORDER);
 231             else
 232                 vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
 233         }
 234 #endif
 235
 236         /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an
 237            integer (rounding to nearest) and add the input value to
 238            it */
 239         res += *data;
 240
 241         *data++ = res;
 242
 243         /* Update the output history */
 244         *f->delay++ = SATURATE(res);
 245
 246         /* Version ??? to < 3.98 files (untested) */
 247         f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
 248         f->adaptcoeffs[-4] >>= 1;
 249         f->adaptcoeffs[-8] >>= 1;
 250
 251         f->adaptcoeffs++;
 252
 253         /* Have we filled the history buffer? */
 254         if (UNLIKELY(f->delay == f->history_end)) {
 255             memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
 256                     (ORDER*2) * sizeof(filter_int));
 257             f->adaptcoeffs = f->coeffs + ORDER*2;
 258             f->delay = f->coeffs + ORDER*3;
 259         }
 260     }
 261 }
 262
 263 static struct filter_t filter[2] IBSS_ATTR_DEMAC;
 264
 265 static void do_init_filter(struct filter_t* f, filter_int* buf)
 266 {
 267     f->coeffs = buf;
 268     f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE;
 269
 270     /* Init pointers */
 271     f->adaptcoeffs = f->coeffs + ORDER*2;
 272     f->delay = f->coeffs + ORDER*3;
 273
 274     /* Zero coefficients and history buffer */
 275     memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int));
 276
 277     /* Zero the running average */
 278     f->avg = 0;
 279 }
 280
 281 void INIT_FILTER(filter_int* buf)
 282 {
 283     do_init_filter(&filter[0], buf);
 284     do_init_filter(&filter[1], buf + ORDER*3 + FILTER_HISTORY_SIZE);
 285 }
 286
 287 void ICODE_ATTR_DEMAC APPLY_FILTER(int fileversion, int channel,
 288                                    int32_t* data, int count)
 289 {
 290     if (fileversion >= 3980)
 291         do_apply_filter_3980(&filter[channel], data, count);
 292     else
 293         do_apply_filter_3970(&filter[channel], data, count);
 294 }