apps/eq_arm.S

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2006 Thom Johansen
  11  *
  12  * All files in this archive are subject to the GNU General Public License.
  13  * See the file COPYING in the source tree root for full license agreement.
  14  *
  15  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  16  * KIND, either express or implied.
  17  *
  18  ****************************************************************************/
  19
  20     .text
  21     .global eq_filter
  22 eq_filter:
  23     ldr r12, [sp]             @ get shift parameter
  24     stmdb sp!, { r0-r11, lr } @ save all params and clobbered regs
  25     ldmia r1!, { r4-r8 }      @ load coefs
  26     mov r10, r1               @ loop prelude expects filter struct addr in r10
  27
  28 .filterloop:
  29     ldr r9, [sp]            @ get pointer to this channels data
  30     add r0, r9, #4
  31     str r0, [sp]            @ save back pointer to next channels data
  32     ldr r9, [r9]            @ r9 = x[]
  33     ldr r14, [sp, #8]       @ r14 = numsamples
  34     ldmia r10, { r0-r3 }    @ load history, r10 should be filter struct addr
  35     str r10, [sp, #4]       @ save it for loop end
  36 .loop:
  37     /* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,
  38        r12 = shift amount, r14 = number of samples.
  39        See eq_cf.S for explanation of what this loop does. Primary difference
  40        is the reordering of the equation we do here, which is done for register
  41        reuse reasons, we're pretty short on regs.
  42      */
  43     smull r10, r11, r6, r1  @ acc = b2*x[i - 2]
  44     mov r1, r0              @ fix input history
  45     smlal r10, r11, r5, r0  @ acc += b1*x[i - 1]
  46     ldr r0, [r9]            @ load input and fix history in same operation
  47     smlal r10, r11, r4, r0  @ acc += b0*x[i]
  48     smlal r10, r11, r7, r2  @ acc += a1*y[i - 1]
  49     smlal r10, r11, r8, r3  @ acc += a2*y[i - 2]
  50     mov r3, r2              @ fix output history
  51     mov r2, r11, lsl r12    @ get result
  52     @ TODO: arm makes it easy to mix in lower bits from r10 for extended
  53     @ precision here, but we don't have enough regs to save the shift factor
  54     @ we would need (32 - r12).
  55     str r2, [r9], #4        @ save result
  56     subs r14, r14, #1       @ are we done with this channel?
  57     bne .loop
  58
  59     ldr r10, [sp, #4]       @ load filter struct pointer
  60     stmia r10!, { r0-r3 }   @ save back history
  61     ldr r11, [sp, #12]      @ load number of channels
  62     subs r11, r11, #1       @ all channels processed?
  63     strne r11, [sp, #12]
  64     bne .filterloop
  65
  66     add sp, sp, #16         @ compensate for temp storage
  67     ldmia sp!, { r4-r11, pc }