apps/dsp_arm.S

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2006-2007 Thom Johansen
  11  *
  12  * This program is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU General Public License
  14  * as published by the Free Software Foundation; either version 2
  15  * of the License, or (at your option) any later version.
  16  *
  17  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  18  * KIND, either express or implied.
  19  *
  20  ****************************************************************************/
  21  #include "config.h"
  22
  23 /****************************************************************************
  24  *  void channels_process_sound_chan_mono(int count, int32_t *buf[])
  25  *
  26  *  NOTE: The following code processes two samples at once. When count is odd,
  27  *        there is an additional obsolete sample processed, which will not be
  28  *        used by the calling functions.
  29  */
  30     .section .icode, "ax", %progbits
  31     .align  2
  32     .global channels_process_sound_chan_mono
  33     .type   channels_process_sound_chan_mono, %function
  34 channels_process_sound_chan_mono:
  35     @ input: r0 = count, r1 = buf
  36     stmfd   sp!, { r4, lr }            @
  37                                        @
  38     ldmia   r1, { r1, r2 }             @ r1 = buf[0], r2 = buf[1]
  39     subs    r0, r0, #1                 @ odd: end at 0; even: end at -1
  40     beq     .mono_singlesample         @ Zero? Only one sample!
  41                                        @
  42 .monoloop:                             @
  43     ldmia   r1, { r3, r4 }             @ r3, r4 = Li0, Li1
  44     ldmia   r2, { r12, r14 }           @ r12, r14 = Ri0, Ri1
  45     mov     r3, r3, asr #1             @ Mo0 = Li0 / 2 + Ri0 / 2
  46     mov     r4, r4, asr #1             @ Mo1 = Li1 / 2 + Ri1 / 2
  47     add     r12, r3, r12, asr #1       @
  48     add     r14, r4, r14, asr #1       @
  49     subs    r0, r0, #2                 @
  50     stmia   r1!, { r12, r14 }          @ store Mo0, Mo1
  51     stmia   r2!, { r12, r14 }          @ store Mo0, Mo1
  52     bgt     .monoloop                  @
  53                                        @
  54     ldmltfd sp!, { r4, pc }            @ if count was even, we're done
  55                                        @
  56 .mono_singlesample:                    @
  57     ldr     r3, [r1]                   @ r3 = Ls
  58     ldr     r12, [r2]                  @ r12 = Rs
  59     mov     r3, r3, asr #1             @ Mo = Ls / 2 + Rs / 2
  60     add     r12, r3, r12, asr #1       @
  61     str     r12, [r1]                  @ store Mo
  62     str     r12, [r2]                  @ store Mo
  63                                        @
  64     ldmfd   sp!, { r4, pc }            @
  65     .size   channels_process_sound_chan_mono, \
  66                 .-channels_process_sound_chan_mono
  67
  68 /****************************************************************************
  69  *  void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
  70  *  NOTE: The following code processes two samples at once. When count is odd,
  71  *        there is an additional obsolete sample processed, which will not be
  72  *        used by the calling functions.
  73  */
  74     .section .icode, "ax", %progbits
  75     .align  2
  76     .global channels_process_sound_chan_karaoke
  77     .type   channels_process_sound_chan_karaoke, %function
  78 channels_process_sound_chan_karaoke:
  79     @ input: r0 = count, r1 = buf
  80     stmfd   sp!, { r4, lr }            @
  81                                        @
  82     ldmia   r1, { r1, r2 }             @ r1 = buf[0], r2 = buf[1]
  83     subs    r0, r0, #1                 @ odd: end at 0; even: end at -1
  84     beq     .karaoke_singlesample      @ Zero? Only one sample!
  85                                        @
  86 .karaokeloop:                          @
  87     ldmia   r1, { r3, r4 }             @ r3, r4  = Li0, Li1
  88     ldmia   r2, { r12, r14 }           @ r12, r14 = Ri0, Ri1
  89     mov     r3, r3, asr #1             @ Lo0 = Li0 / 2 - Ri0 / 2
  90     mov     r4, r4, asr #1             @ Lo1 = Li1 / 2 - Ri1 / 2
  91     sub     r3, r3, r12, asr #1        @
  92     sub     r4, r4, r14, asr #1        @
  93     rsb     r12, r3, #0                @ Ro0 = -Lk0 = Rs0 / 2 - Ls0 / 2
  94     rsb     r14, r4, #0                @ Ro1 = -Lk1 = Ri1 / 2 - Li1 / 2
  95     subs    r0, r0, #2                 @
  96     stmia   r1!, { r3, r4 }            @ store Lo0, Lo1
  97     stmia   r2!, { r12, r14 }          @ store Ro0, Ro1
  98     bgt     .karaokeloop               @
  99                                        @
 100     ldmltfd sp!, { r4, pc }            @ if count was even, we're done
 101                                        @
 102 .karaoke_singlesample:                 @
 103     ldr     r3, [r1]                   @ r3 = Li
 104     ldr     r12, [r2]                  @ r12 = Ri
 105     mov     r3, r3, asr #1             @ Lk = Li / 2 - Ri /2
 106     sub     r3, r3, r12, asr #1        @
 107     rsb     r12, r3, #0                @ Rk = -Lo = Ri / 2 - Li / 2
 108     str     r3, [r1]                   @ store Lo
 109     str     r12, [r2]                  @ store Ro
 110                                        @
 111     ldmfd   sp!, { r4, pc }            @
 112     .size   channels_process_sound_chan_karaoke, \
 113                 .-channels_process_sound_chan_karaoke
 114
 115 #if ARM_ARCH < 6
 116 /****************************************************************************
 117  *  void sample_output_mono(int count, struct dsp_data *data,
 118  *                          const int32_t *src[], int16_t *dst)
 119  *  NOTE: The following code processes two samples at once. When count is odd,
 120  *        there is an additional obsolete sample processed, which will not be
 121  *        used by the calling functions.
 122  */
 123     .section .icode, "ax", %progbits
 124     .align  2
 125     .global sample_output_mono
 126     .type   sample_output_mono, %function
 127 sample_output_mono:
 128     @ input: r0 = count, r1 = data, r2 = src, r3 = dst
 129     stmfd   sp!, { r4-r6, lr }
 130
 131     ldr     r1, [r1]                   @ lr = data->output_scale
 132     ldr     r2, [r2]                   @ r2 = src[0]
 133
 134     mov     r4, #1
 135     mov     r4, r4, lsl r1             @ r4 = 1 << (scale-1)
 136     mov     r4, r4, lsr #1
 137     mvn     r14, #0x8000               @ r14 = 0xffff7fff, needed for
 138                                        @ clipping and masking
 139     subs    r0, r0, #1                 @
 140     beq     .som_singlesample          @ Zero? Only one sample!
 141
 142 .somloop:
 143     ldmia   r2!, { r5, r6 }
 144     add     r5, r5, r4                 @ r6 = (r6 + 1<<(scale-1)) >> scale
 145     mov     r5, r5, asr r1
 146     mov     r12, r5, asr #15
 147     teq     r12, r12, asr #31
 148     eorne   r5, r14, r5, asr #31       @ Clip (-32768...+32767)
 149     add     r6, r6, r4
 150     mov     r6, r6, asr r1             @ r7 = (r7 + 1<<(scale-1)) >> scale
 151     mov     r12, r6, asr #15
 152     teq     r12, r12, asr #31
 153     eorne   r6, r14, r6, asr #31       @ Clip (-32768...+32767)
 154
 155     and     r5, r5, r14, lsr #16
 156     and     r6, r6, r14, lsr #16
 157     orr     r5, r5, r5, lsl #16        @ pack first 2 halfwords into 1 word
 158     orr     r6, r6, r6, lsl #16        @ pack last 2 halfwords into 1 word
 159     stmia   r3!, { r5, r6 }
 160
 161     subs    r0, r0, #2
 162     bgt     .somloop
 163
 164     ldmltfd sp!, { r4-r6, pc }         @ even 'count'? return
 165
 166 .som_singlesample:
 167     ldr     r5, [r2]                   @ do odd sample
 168     add     r5, r5, r4
 169     mov     r5, r5, asr r1
 170     mov     r12, r5, asr #15
 171     teq     r12, r12, asr #31
 172     eorne   r5, r14, r5, asr #31
 173
 174     and     r5, r5, r14, lsr #16       @ pack 2 halfwords into 1 word
 175     orr     r5, r5, r5, lsl #16
 176     str     r5, [r3]
 177
 178     ldmfd   sp!, { r4-r6, pc }
 179     .size   sample_output_mono, .-sample_output_mono
 180
 181 /****************************************************************************
 182  * void sample_output_stereo(int count, struct dsp_data *data,
 183  *                           const int32_t *src[], int16_t *dst)
 184  *  NOTE: The following code processes two samples at once. When count is odd,
 185  *        there is an additional obsolete sample processed, which will not be
 186  *        used by the calling functions.
 187  */
 188     .section .icode, "ax", %progbits
 189     .align  2
 190     .global sample_output_stereo
 191     .type   sample_output_stereo, %function
 192 sample_output_stereo:
 193     @ input: r0 = count, r1 = data, r2 = src, r3 = dst
 194     stmfd   sp!, { r4-r9, lr }
 195
 196     ldr     r1, [r1]                   @ r1 = data->output_scale
 197     ldmia   r2, { r2, r5 }             @ r2 = src[0], r5 = src[1]
 198
 199     mov     r4, #1
 200     mov     r4, r4, lsl r1             @ r4 = 1 << (scale-1)
 201     mov     r4, r4, lsr #1             @
 202
 203     mvn     r14, #0x8000               @ r14 = 0xffff7fff, needed for
 204                                        @ clipping and masking
 205     subs    r0, r0, #1                 @
 206     beq     .sos_singlesample          @ Zero? Only one sample!
 207
 208 .sosloop:
 209     ldmia   r2!, { r6, r7 }            @ 2 left
 210     ldmia   r5!, { r8, r9 }            @ 2 right
 211
 212     add     r6, r6, r4                 @ r6 = (r6 + 1<<(scale-1)) >> scale
 213     mov     r6, r6, asr r1
 214     mov     r12, r6, asr #15
 215     teq     r12, r12, asr #31
 216     eorne   r6, r14, r6, asr #31       @ Clip (-32768...+32767)
 217     add     r7, r7, r4
 218     mov     r7, r7, asr r1             @ r7 = (r7 + 1<<(scale-1)) >> scale
 219     mov     r12, r7, asr #15
 220     teq     r12, r12, asr #31
 221     eorne   r7, r14, r7, asr #31       @ Clip (-32768...+32767)
 222
 223     add     r8, r8, r4                 @ r8 = (r8 + 1<<(scale-1)) >> scale
 224     mov     r8, r8, asr r1
 225     mov     r12, r8, asr #15
 226     teq     r12, r12, asr #31
 227     eorne   r8, r14, r8, asr #31       @ Clip (-32768...+32767)
 228     add     r9, r9, r4                 @ r9 = (r9 + 1<<(scale-1)) >> scale
 229     mov     r9, r9, asr r1
 230     mov     r12, r9, asr #15
 231     teq     r12, r12, asr #31
 232     eorne   r9, r14, r9, asr #31       @ Clip (-32768...+32767)
 233
 234     and     r6, r6, r14, lsr #16       @ pack first 2 halfwords into 1 word
 235     orr     r8, r6, r8, asl #16
 236     and     r7, r7, r14, lsr #16       @ pack last 2 halfwords into 1 word
 237     orr     r9, r7, r9, asl #16
 238
 239     stmia   r3!, { r8, r9 }
 240
 241     subs    r0, r0, #2
 242     bgt     .sosloop
 243
 244     ldmltfd sp!, { r4-r9, pc }         @ even 'count'? return
 245
 246 .sos_singlesample:
 247     ldr     r6, [r2]                   @ left odd sample
 248     ldr     r8, [r5]                   @ right odd sample
 249
 250     add     r6, r6, r4                 @ r6 = (r7 + 1<<(scale-1)) >> scale
 251     mov     r6, r6, asr r1
 252     mov     r12, r6, asr #15
 253     teq     r12, r12, asr #31
 254     eorne   r6, r14, r6, asr #31       @ Clip (-32768...+32767)
 255     add     r8, r8, r4                 @ r8 = (r8 + 1<<(scale-1)) >> scale
 256     mov     r8, r8, asr r1
 257     mov     r12, r8, asr #15
 258     teq     r12, r12, asr #31
 259     eorne   r8, r14, r8, asr #31       @ Clip (-32768...+32767)
 260
 261     and     r6, r6, r14, lsr #16       @ pack 2 halfwords into 1 word
 262     orr     r8, r6, r8, asl #16
 263
 264     str     r8, [r3]
 265
 266     ldmfd   sp!, { r4-r9, pc }
 267     .size   sample_output_stereo, .-sample_output_stereo
 268 #endif /* ARM_ARCH < 6 */
 269
 270 /****************************************************************************
 271  * void apply_crossfeed(int count, int32_t* src[])
 272  */
 273     .section .text
 274     .global apply_crossfeed
 275 apply_crossfeed:
 276     @ unfortunately, we ended up in a bit of a register squeeze here, and need
 277     @ to keep the count on the stack :/
 278     stmdb   sp!, { r4-r11, lr }        @ stack modified regs
 279     ldmia   r1, { r2-r3 }              @ r2 = src[0], r3 = src[1]
 280
 281     ldr     r1, =crossfeed_data
 282     ldmia   r1!, { r4-r11 }            @ load direct gain and filter data
 283     add     r12, r1, #13*4*2           @ calculate end of delay
 284     stmdb   sp!, { r0, r12 }           @ stack count and end of delay adr
 285     ldr     r0, [r1, #13*4*2]          @ fetch current delay line address
 286
 287     /* Register usage in loop:
 288      * r0 = &delay[index][0], r1 = accumulator high, r2 = src[0], r3 = src[1],
 289      * r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs),
 290      * r8-r11 = filter history, r12 = temp, r14 = accumulator low
 291      */
 292 .cfloop:
 293     smull   r14, r1, r6, r8            @ acc = b1*dr[n - 1]
 294     smlal   r14, r1, r7, r9            @ acc += a1*y_l[n - 1]
 295     ldr     r8, [r0, #4]               @ r8 = dr[n]
 296     smlal   r14, r1, r5, r8            @ acc += b0*dr[n]
 297     mov     r9, r1, lsl #1             @ fix format for filter history
 298     ldr     r12, [r2]                  @ load left input
 299     smlal   r14, r1, r4, r12           @ acc += gain*x_l[n]
 300     mov     r1, r1, lsl #1             @ fix format
 301     str     r1, [r2], #4               @ save result
 302
 303     smull   r14, r1, r6, r10           @ acc = b1*dl[n - 1]
 304     smlal   r14, r1, r7, r11           @ acc += a1*y_r[n - 1]
 305     ldr     r10, [r0]                  @ r10 = dl[n]
 306     str     r12, [r0], #4              @ save left input to delay line
 307     smlal   r14, r1, r5, r10           @ acc += b0*dl[n]
 308     mov     r11, r1, lsl #1            @ fix format for filter history
 309     ldr     r12, [r3]                  @ load right input
 310     smlal   r14, r1, r4, r12           @ acc += gain*x_r[n]
 311     str     r12, [r0], #4              @ save right input to delay line
 312     mov     r1, r1, lsl #1             @ fix format
 313     str     r1, [r3], #4               @ save result
 314
 315     ldr     r12, [sp, #4]              @ fetch delay line end addr from stack
 316     cmp     r0, r12                    @ need to wrap to start of delay?
 317     subeq   r0, r0, #13*4*2            @ wrap back delay line ptr to start
 318
 319     ldr     r1, [sp]                   @ fetch count from stack
 320     subs    r1, r1, #1                 @ are we finished?
 321     strne   r1, [sp]                   @ nope, save count back to stack
 322     bne     .cfloop
 323
 324     @ save data back to struct
 325     ldr     r12, =crossfeed_data + 4*4
 326     stmia   r12, { r8-r11 }            @ save filter history
 327     str     r0, [r12, #30*4]           @ save delay line index
 328     add     sp, sp, #8                 @ remove temp variables from stack
 329     ldmia   sp!, { r4-r11, pc }
 330     .size   apply_crossfeed, .-apply_crossfeed
 331
 332 /****************************************************************************
 333  * int dsp_downsample(int count, struct dsp_data *data,
 334  *                    in32_t *src[], int32_t *dst[])
 335  */
 336     .section    .text
 337     .global     dsp_downsample
 338 dsp_downsample:
 339     stmdb   sp!, { r4-r11, lr }     @ stack modified regs
 340     ldmib   r1, { r5-r6 }           @ r5 = num_channels,r6 = resample_data.delta
 341     sub     r5, r5, #1              @ pre-decrement num_channels for use
 342     add     r4, r1, #12             @ r4 = &resample_data.phase
 343     mov     r12, #0xff
 344     orr     r12, r12, #0xff00       @ r12 = 0xffff
 345 .dschannel_loop:
 346     ldr     r1, [r4]                @ r1 = resample_data.phase
 347     ldr     r7, [r2, r5, lsl #2]    @ r7 = s = src[ch - 1]
 348     ldr     r8, [r3, r5, lsl #2]    @ r8 = d = dst[ch - 1]
 349     add     r9, r4, #4              @ r9 = &last_sample[0]
 350     ldr     r10, [r9, r5, lsl #2]   @ r10 = last_sample[ch - 1]
 351     sub     r11, r0, #1
 352     ldr     r14, [r7, r11, lsl #2]  @ load last sample in s[] ...
 353     str     r14, [r9, r5, lsl #2]   @ and write as next frame's last_sample
 354     movs    r9, r1, lsr #16         @ r9 = pos = phase >> 16
 355     ldreq   r11, [r7]               @ if pos = 0, load src[0] and jump into loop
 356     beq     .dsuse_last_start
 357     cmp     r9, r0                  @ if pos >= count, we're already done
 358     bge     .dsloop_skip
 359
 360     @ Register usage in loop:
 361     @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
 362     @ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos]
 363 .dsloop:
 364     add     r9, r7, r9, lsl #2      @ r9 = &s[pos]
 365     ldmda   r9, { r10, r11 }        @ r10 = s[pos - 1], r11 = s[pos]
 366 .dsuse_last_start:
 367     sub     r11, r11, r10           @ r11 = diff = s[pos] - s[pos - 1]
 368     @ keep frac in lower bits to take advantage of multiplier early termination
 369     and     r9, r1, r12             @ frac = phase & 0xffff
 370     smull   r9, r14, r11, r9
 371     add     r1, r1, r6              @ phase += delta
 372     add     r10, r10, r9, lsr #16   @ r10 = out = s[pos - 1] + frac*diff
 373     add     r10, r10, r14, lsl #16
 374     str     r10, [r8], #4           @ *d++ = out
 375     mov     r9, r1, lsr #16         @ pos = phase >> 16
 376     cmp     r9, r0                  @ pos < count?
 377     blt     .dsloop                 @ yup, do more samples
 378 .dsloop_skip:
 379     subs    r5, r5, #1
 380     bpl     .dschannel_loop         @ if (--ch) >= 0, do another channel
 381     sub     r1, r1, r0, lsl #16     @ wrap phase back to start
 382     str     r1, [r4]                @ store back
 383     ldr     r1, [r3]                @ r1 = &dst[0]
 384     sub     r8, r8, r1              @ dst - &dst[0]
 385     mov     r0, r8, lsr #2          @ convert bytes->samples
 386     ldmia   sp!, { r4-r11, pc }     @ ... and we're out
 387     .size   dsp_downsample, .-dsp_downsample
 388
 389 /****************************************************************************
 390  * int dsp_upsample(int count, struct dsp_data *dsp,
 391  *                  in32_t *src[], int32_t *dst[])
 392  */
 393     .section    .text
 394     .global     dsp_upsample
 395 dsp_upsample:
 396     stmfd   sp!, { r4-r11, lr }     @ stack modified regs
 397     ldmib   r1, { r5-r6 }           @ r5 = num_channels,r6 = resample_data.delta
 398     sub     r5, r5, #1              @ pre-decrement num_channels for use
 399     add     r4, r1, #12             @ r4 = &resample_data.phase
 400     mov     r6, r6, lsl #16         @ we'll use carry to detect pos increments
 401     stmfd   sp!, { r0, r4 }         @ stack count and &resample_data.phase
 402 .uschannel_loop:
 403     ldr     r12, [r4]               @ r12 = resample_data.phase
 404     ldr     r7, [r2, r5, lsl #2]    @ r7 = s = src[ch - 1]
 405     ldr     r8, [r3, r5, lsl #2]    @ r8 = d = dst[ch - 1]
 406     add     r9, r4, #4              @ r9 = &last_sample[0]
 407     mov     r1, r12, lsl #16        @ we'll use carry to detect pos increments
 408     sub     r11, r0, #1
 409     ldr     r14, [r7, r11, lsl #2]  @ load last sample in s[] ...
 410     ldr     r10, [r9, r5, lsl #2]   @ r10 = last_sample[ch - 1]
 411     str     r14, [r9, r5, lsl #2]   @ and write as next frame's last_sample
 412     movs    r14, r12, lsr #16       @ pos = resample_data.phase >> 16
 413     beq     .usstart_0              @ pos = 0
 414     cmp     r14, r0                 @ if pos >= count, we're already done
 415     bge     .usloop_skip
 416     add     r7, r7, r14, lsl #2     @ r7 = &s[pos]
 417     ldr     r10, [r7, #-4]          @ r11 = s[pos - 1]
 418     b       .usstart_0
 419
 420     @ Register usage in loop:
 421     @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
 422     @ r6 = delta, r7 = s, r8 = d, r9 = diff, r10 = s[pos - 1], r11 = s[pos]
 423 .usloop_1:
 424     mov     r10, r11                @ r10 = previous sample
 425 .usstart_0:
 426     ldr     r11, [r7], #4           @ r11 = next sample
 427     mov     r4, r1, lsr #16         @ r4 = frac = phase >> 16
 428     sub     r9, r11, r10            @ r9 = diff = s[pos] - s[pos - 1]
 429 .usloop_0:
 430     smull   r12, r14, r4, r9
 431     adds    r1, r1, r6              @ phase += delta << 16
 432     mov     r4, r1, lsr #16         @ r4 = frac = phase >> 16
 433     add     r14, r10, r14, lsl #16
 434     add     r14, r14, r12, lsr #16  @ r14 = out = s[pos - 1] + frac*diff
 435     str     r14, [r8], #4           @ *d++ = out
 436     bcc     .usloop_0               @ if carry is set, pos is incremented
 437     subs    r0, r0, #1              @ if count > 0, do another sample
 438     bgt     .usloop_1
 439 .usloop_skip:
 440     subs    r5, r5, #1
 441     ldmfd   sp, { r0, r4 }          @ reload count and &resample_data.phase
 442     bpl     .uschannel_loop         @ if (--ch) >= 0, do another channel
 443     mov     r1, r1, lsr #16         @ wrap phase back to start of next frame
 444     ldr     r2, [r3]                @ r1 = &dst[0]
 445     str     r1, [r4]                @ store phase
 446     sub     r8, r8, r2              @ dst - &dst[0]
 447     mov     r0, r8, lsr #2          @ convert bytes->samples
 448     add     sp, sp, #8              @ adjust stack for temp variables
 449     ldmfd   sp!, { r4-r11, pc }     @ ... and we're out
 450     .size       dsp_upsample, .-dsp_upsample
 451
 452 /****************************************************************************
 453  *  void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
 454  */
 455     .section .icode, "ax", %progbits
 456     .align  2
 457     .global dsp_apply_gain
 458     .type   dsp_apply_gain, %function
 459 dsp_apply_gain:
 460     @ input: r0 = count, r1 = data, r2 = buf[]
 461     stmfd   sp!, { r4-r8, lr }
 462
 463     ldr     r3, [r1,  #4]           @ r3 = data->num_channels
 464     ldr     r4, [r1, #32]           @ r5 = data->gain
 465
 466 .dag_outerloop:
 467     ldr     r1, [r2], #4            @ r1 = buf[0] and increment index of buf[]
 468     subs    r12, r0, #1             @ r12 = r0 = count - 1
 469     beq     .dag_singlesample       @ Zero? Only one sample!
 470
 471 .dag_innerloop:
 472     ldmia   r1, { r5, r6 }          @ load r5, r6 from r1
 473     smull   r7, r8, r5, r4          @ r7 = FRACMUL_SHL(r5, r4, 8)
 474     smull   r14, r5, r6, r4         @ r14 = FRACMUL_SHL(r6, r4, 8)
 475     subs    r12, r12, #2
 476     mov     r7, r7, lsr #23
 477     mov     r14, r14, lsr #23
 478     orr     r7, r7, r8, asl #9
 479     orr     r14, r14, r5, asl #9
 480     stmia   r1!, { r7, r14 }        @ save r7, r14 to [r1] and increment r1
 481     bgt     .dag_innerloop          @ end of inner loop
 482
 483     blt     .dag_evencount          @ < 0? even count
 484
 485 .dag_singlesample:
 486     ldr     r5, [r1]                @ handle odd sample
 487     smull   r7, r8, r5, r4          @ r7 = FRACMUL_SHL(r5, r4, 8)
 488     mov     r7, r7, lsr #23
 489     orr     r7, r7, r8, asl #9
 490     str     r7, [r1]
 491
 492 .dag_evencount:
 493     subs    r3, r3, #1
 494     bgt     .dag_outerloop          @ end of outer loop
 495
 496     ldmfd   sp!, { r4-r8, pc }
 497     .size   dsp_apply_gain, .-dsp_apply_gain