apps/codecs/libwavpack/arm.S

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2006 by David Bryant
  11  *
  12  * All files in this archive are subject to the GNU General Public License.
  13  * See the file COPYING in the source tree root for full license agreement.
  14  *
  15  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  16  * KIND, either express or implied.
  17  *
  18  ****************************************************************************/
  19
  20 /* This is an assembly optimized version of the following WavPack function:
  21  *
  22  * void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
  23  *                                   long *buffer, long sample_count);
  24  *
  25  * It performs a single pass of stereo decorrelation on the provided buffer.
  26  * Note that this version of the function requires that the 8 previous stereo
  27  * samples are visible and correct. In other words, it ignores the "samples_*"
  28  * fields in the decorr_pass structure and gets the history data directly
  29  * from the buffer. It does, however, return the appropriate history samples
  30  * to the decorr_pass structure before returning.
  31  *
  32  * This is written to work on a ARM7TDMI processor. This version only uses the
  33  * 32-bit multiply-accumulate instruction and so will overflow with 24-bit
  34  * WavPack files.
  35  */
  36         .text
  37         .align
  38         .global         decorr_stereo_pass_cont_arm
  39
  40 /*
  41  * on entry:
  42  *
  43  * r0 = struct decorr_pass *dpp
  44  * r1 = long *buffer
  45  * r2 = long sample_count
  46  */
  47
  48 decorr_stereo_pass_cont_arm:
  49
  50         stmfd   sp!, {r4 - r8, r10, r11, lr}
  51         mov     r5, r0                  @ r5 = dpp
  52         mov     r11, #512               @ r11 = 512 for rounding
  53         ldrsh   r6, [r0, #2]            @ r6 = dpp->delta
  54         ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A
  55         ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B
  56         cmp     r2, #0                  @ exit if no samples to process
  57         beq     common_exit
  58
  59         add     r7, r1, r2, asl #3      @ r7 = buffer ending position
  60         ldrsh   r2, [r5, #0]            @ r2 = dpp->term
  61         cmp     r2, #0
  62         bmi     minus_term
  63
  64         ldr     lr, [r1, #-16]          @ load 2 sample history from buffer
  65         ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18
  66         ldr     r8, [r1, #-8]
  67         ldr     r3, [r1, #-4]
  68         cmp     r2, #17
  69         beq     term_17_loop
  70         cmp     r2, #18
  71         beq     term_18_loop
  72         cmp     r2, #2
  73         beq     term_2_loop
  74         b       term_default_loop       @ else handle default (1-8, except 2)
  75
  76 minus_term:
  77         mov     r10, #1024              @ r10 = -1024 for weight clipping
  78         rsb     r10, r10, #0            @  (only used for negative terms)
  79         cmn     r2, #1
  80         beq     term_minus_1
  81         cmn     r2, #2
  82         beq     term_minus_2
  83         cmn     r2, #3
  84         beq     term_minus_3
  85         b       common_exit
  86
  87 /*
  88  ******************************************************************************
  89  * Loop to handle term = 17 condition
  90  *
  91  * r0 = dpp->weight_B           r8 = previous left sample
  92  * r1 = bptr                    r9 =
  93  * r2 = current sample          r10 = second previous left sample
  94  * r3 = previous right sample   r11 = 512 (for rounding)
  95  * r4 = dpp->weight_A           ip = current decorrelation value
  96  * r5 = dpp                     sp =
  97  * r6 = dpp->delta              lr = second previous right sample
  98  * r7 = eptr                    pc =
  99  *******************************************************************************
 100  */
 101
 102 term_17_loop:
 103         rsbs    ip, lr, r8, asl #1      @ decorr value = (2 * prev) - 2nd prev
 104         mov     lr, r8                  @ previous becomes 2nd previous
 105         ldr     r2, [r1], #4            @ get sample & update pointer
 106         mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
 107         add     r8, r2, r8, asr #10     @  shift, and add to new sample
 108         strne   r8, [r1, #-4]           @ if change possible, store sample back
 109         cmpne   r2, #0
 110         beq     .L325
 111         teq     ip, r2                  @ update weight based on signs
 112         submi   r4, r4, r6
 113         addpl   r4, r4, r6
 114
 115 .L325:  rsbs    ip, r10, r3, asl #1     @ do same thing for right channel
 116         mov     r10, r3
 117         ldr     r2, [r1], #4
 118         mla     r3, ip, r0, r11
 119         add     r3, r2, r3, asr #10
 120         strne   r3, [r1, #-4]
 121         cmpne   r2, #0
 122         beq     .L329
 123         teq     ip, r2
 124         submi   r0, r0, r6
 125         addpl   r0, r0, r6
 126
 127 .L329:  cmp     r7, r1                  @ loop back if more samples to do
 128         bhi     term_17_loop
 129         b       store_1718              @ common exit for terms 17 & 18
 130
 131 /*
 132  ******************************************************************************
 133  * Loop to handle term = 18 condition
 134  *
 135  * r0 = dpp->weight_B           r8 = previous left sample
 136  * r1 = bptr                    r9 =
 137  * r2 = current sample          r10 = second previous left sample
 138  * r3 = previous right sample   r11 = 512 (for rounding)
 139  * r4 = dpp->weight_A           ip = decorrelation value
 140  * r5 = dpp                     sp =
 141  * r6 = dpp->delta              lr = second previous right sample
 142  * r7 = eptr                    pc =
 143  *******************************************************************************
 144  */
 145
 146 term_18_loop:
 147         sub     ip, r8, lr              @ decorr value =
 148         mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1
 149         adds    ip, r8, ip, asr #1
 150         ldr     r2, [r1], #4            @ get sample & update pointer
 151         mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
 152         add     r8, r2, r8, asr #10     @  shift, and add to new sample
 153         strne   r8, [r1, #-4]           @ if change possible, store sample back
 154         cmpne   r2, #0
 155         beq     .L337
 156         teq     ip, r2                  @ update weight based on signs
 157         submi   r4, r4, r6
 158         addpl   r4, r4, r6
 159
 160 .L337:  sub     ip, r3, r10             @ do same thing for right channel
 161         mov     r10, r3
 162         adds    ip, r3, ip, asr #1
 163         ldr     r2, [r1], #4
 164         mla     r3, ip, r0, r11
 165         add     r3, r2, r3, asr #10
 166         strne   r3, [r1, #-4]
 167         cmpne   r2, #0
 168         beq     .L341
 169         teq     ip, r2
 170         submi   r0, r0, r6
 171         addpl   r0, r0, r6
 172
 173 .L341:  cmp     r7, r1                  @ loop back if more samples to do
 174         bhi     term_18_loop
 175
 176 /* common exit for terms 17 & 18 */
 177
 178 store_1718:
 179         str     r3, [r5, #40]           @ store sample history into struct
 180         str     r8, [r5, #8]
 181         str     r10, [r5, #44]
 182         str     lr, [r5, #12]
 183         b       common_exit             @ and return
 184
 185 /*
 186  ******************************************************************************
 187  * Loop to handle term = 2 condition
 188  * (note that this case can be handled by the default term handler (1-8), but
 189  * this special case is faster because it doesn't have to read memory twice)
 190  *
 191  * r0 = dpp->weight_B           r8 = previous left sample
 192  * r1 = bptr                    r9 =
 193  * r2 = current sample          r10 = second previous left sample
 194  * r3 = previous right sample   r11 = 512 (for rounding)
 195  * r4 = dpp->weight_A           ip = decorrelation value
 196  * r5 = dpp                     sp =
 197  * r6 = dpp->delta              lr = second previous right sample
 198  * r7 = eptr                    pc =
 199  *******************************************************************************
 200  */
 201
 202 term_2_loop:
 203         movs    ip, lr                  @ get decorrelation value & test
 204         mov     lr, r8                  @ previous becomes 2nd previous
 205         ldr     r2, [r1], #4            @ get sample & update pointer
 206         mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
 207         add     r8, r2, r8, asr #10     @  shift, and add to new sample
 208         strne   r8, [r1, #-4]           @ if change possible, store sample back
 209         cmpne   r2, #0
 210         beq     .L225
 211         teq     ip, r2                  @ update weight based on signs
 212         submi   r4, r4, r6
 213         addpl   r4, r4, r6
 214
 215 .L225:  movs    ip, r10                 @ do same thing for right channel
 216         mov     r10, r3
 217         ldr     r2, [r1], #4
 218         mla     r3, ip, r0, r11
 219         add     r3, r2, r3, asr #10
 220         strne   r3, [r1, #-4]
 221         cmpne   r2, #0
 222         beq     .L229
 223         teq     ip, r2
 224         submi   r0, r0, r6
 225         addpl   r0, r0, r6
 226
 227 .L229:  cmp     r7, r1                  @ loop back if more samples to do
 228         bhi     term_2_loop
 229         b       default_term_exit       @ this exit updates all dpp->samples
 230
 231 /*
 232  ******************************************************************************
 233  * Loop to handle default term condition
 234  *
 235  * r0 = dpp->weight_B           r8 = result accumulator
 236  * r1 = bptr                    r9 =
 237  * r2 = dpp->term               r10 =
 238  * r3 = decorrelation value     r11 = 512 (for rounding)
 239  * r4 = dpp->weight_A           ip = current sample
 240  * r5 = dpp                     sp =
 241  * r6 = dpp->delta              lr =
 242  * r7 = eptr                    pc =
 243  *******************************************************************************
 244  */
 245
 246 term_default_loop:
 247         ldr     ip, [r1]                @ get original sample
 248         ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term
 249         mla     r8, r3, r4, r11         @ mult decorr value by weight, round,
 250         add     r8, ip, r8, asr #10     @  shift and add to new sample
 251         str     r8, [r1], #4            @ store update sample
 252         cmp     r3, #0
 253         cmpne   ip, #0
 254         beq     .L350
 255         teq     ip, r3                  @ update weight based on signs
 256         submi   r4, r4, r6
 257         addpl   r4, r4, r6
 258
 259 .L350:  ldr     ip, [r1]                @ do the same thing for right channel
 260         ldr     r3, [r1, -r2, asl #3]
 261         mla     r8, r3, r0, r11
 262         add     r8, ip, r8, asr #10
 263         str     r8, [r1], #4
 264         cmp     r3, #0
 265         cmpne   ip, #0
 266         beq     .L354
 267         teq     ip, r3
 268         submi   r0, r0, r6
 269         addpl   r0, r0, r6
 270
 271 .L354:  cmp     r7, r1                  @ loop back if more samples to do
 272         bhi     term_default_loop
 273
 274 /*
 275  * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
 276  * structure (even if they are not all used for the given term)
 277  */
 278
 279 default_term_exit:
 280         ldrsh   r3, [r5, #0]
 281         sub     ip, r3, #1
 282         mov     lr, #7
 283
 284 .L358:  and     r3, ip, #7
 285         add     r3, r5, r3, asl #2
 286         ldr     r2, [r1, #-4]
 287         str     r2, [r3, #40]
 288         ldr     r2, [r1, #-8]!
 289         str     r2, [r3, #8]
 290         sub     ip, ip, #1
 291         sub     lr, lr, #1
 292         cmn     lr, #1
 293         bne     .L358
 294         b       common_exit
 295
 296 /*
 297  ******************************************************************************
 298  * Loop to handle term = -1 condition
 299  *
 300  * r0 = dpp->weight_B           r8 =
 301  * r1 = bptr                    r9 =
 302  * r2 = intermediate result     r10 = -1024 (for clipping)
 303  * r3 = previous right sample   r11 = 512 (for rounding)
 304  * r4 = dpp->weight_A           ip = current sample
 305  * r5 = dpp                     sp =
 306  * r6 = dpp->delta              lr = updated left sample
 307  * r7 = eptr                    pc =
 308  *******************************************************************************
 309  */
 310
 311 term_minus_1:
 312         ldr     r3, [r1, #-4]
 313
 314 term_minus_1_loop:
 315         ldr     ip, [r1]                @ for left channel the decorrelation value
 316         mla     r2, r3, r4, r11         @  is the previous right sample (in r3)
 317         add     lr, ip, r2, asr #10
 318         str     lr, [r1], #8
 319         cmp     r3, #0
 320         cmpne   ip, #0
 321         beq     .L361
 322         teq     ip, r3                  @ update weight based on signs
 323         submi   r4, r4, r6
 324         addpl   r4, r4, r6
 325         cmp     r4, #1024
 326         movgt   r4, #1024
 327         cmp     r4, r10
 328         movlt   r4, r10
 329
 330 .L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value
 331         mla     r3, lr, r0, r11         @  is the just updated right sample (in lr)
 332         add     r3, r2, r3, asr #10
 333         str     r3, [r1, #-4]
 334         cmp     lr, #0
 335         cmpne   r2, #0
 336         beq     .L369
 337         teq     r2, lr
 338         submi   r0, r0, r6
 339         addpl   r0, r0, r6
 340         cmp     r0, #1024               @ then clip weight to +/-1024
 341         movgt   r0, #1024
 342         cmp     r0, r10
 343         movlt   r0, r10
 344
 345 .L369:  cmp     r7, r1                  @ loop back if more samples to do
 346         bhi     term_minus_1_loop
 347
 348         str     r3, [r5, #8]            @ else store right sample and exit
 349         b       common_exit
 350
 351 /*
 352  ******************************************************************************
 353  * Loop to handle term = -2 condition
 354  * (note that the channels are processed in the reverse order here)
 355  *
 356  * r0 = dpp->weight_B           r8 =
 357  * r1 = bptr                    r9 =
 358  * r2 = intermediate result     r10 = -1024 (for clipping)
 359  * r3 = previous left sample    r11 = 512 (for rounding)
 360  * r4 = dpp->weight_A           ip = current sample
 361  * r5 = dpp                     sp =
 362  * r6 = dpp->delta              lr = updated right sample
 363  * r7 = eptr                    pc =
 364  *******************************************************************************
 365  */
 366
 367 term_minus_2:
 368         ldr     r3, [r1, #-8]
 369
 370 term_minus_2_loop:
 371         ldr     ip, [r1, #4]            @ for right channel the decorrelation value
 372         mla     r2, r3, r0, r11         @  is the previous left sample (in r3)
 373         add     lr, ip, r2, asr #10
 374         str     lr, [r1, #4]
 375         cmp     r3, #0
 376         cmpne   ip, #0
 377         beq     .L380
 378         teq     ip, r3                  @ update weight based on signs
 379         submi   r0, r0, r6
 380         addpl   r0, r0, r6
 381         cmp     r0, #1024               @ then clip weight to +/-1024
 382         movgt   r0, #1024
 383         cmp     r0, r10
 384         movlt   r0, r10
 385
 386 .L380:  ldr     r2, [r1, #0]            @ for left channel the decorrelation value
 387         mla     r3, lr, r4, r11         @  is the just updated left sample (in lr)
 388         add     r3, r2, r3, asr #10
 389         str     r3, [r1], #8
 390         cmp     lr, #0
 391         cmpne   r2, #0
 392         beq     .L388
 393         teq     r2, lr
 394         submi   r4, r4, r6
 395         addpl   r4, r4, r6
 396         cmp     r4, #1024
 397         movgt   r4, #1024
 398         cmp     r4, r10
 399         movlt   r4, r10
 400
 401 .L388:  cmp     r7, r1                  @ loop back if more samples to do
 402         bhi     term_minus_2_loop
 403
 404         str     r3, [r5, #40]           @ else store left channel and exit
 405         b       common_exit
 406
 407 /*
 408  ******************************************************************************
 409  * Loop to handle term = -3 condition
 410  *
 411  * r0 = dpp->weight_B           r8 = previous left sample
 412  * r1 = bptr                    r9 =
 413  * r2 = current left sample     r10 = -1024 (for clipping)
 414  * r3 = previous right sample   r11 = 512 (for rounding)
 415  * r4 = dpp->weight_A           ip = intermediate result
 416  * r5 = dpp                     sp =
 417  * r6 = dpp->delta              lr =
 418  * r7 = eptr                    pc =
 419  *******************************************************************************
 420  */
 421
 422 term_minus_3:
 423         ldr     r3, [r1, #-4]           @ load previous samples
 424         ldr     r8, [r1, #-8]
 425
 426 term_minus_3_loop:
 427         ldr     ip, [r1]
 428         mla     r2, r3, r4, r11
 429         add     r2, ip, r2, asr #10
 430         str     r2, [r1], #4
 431         cmp     r3, #0
 432         cmpne   ip, #0
 433         beq     .L399
 434         teq     ip, r3                  @ update weight based on signs
 435         submi   r4, r4, r6
 436         addpl   r4, r4, r6
 437         cmp     r4, #1024               @ then clip weight to +/-1024
 438         movgt   r4, #1024
 439         cmp     r4, r10
 440         movlt   r4, r10
 441
 442 .L399:  movs    ip, r8                  @ ip = previous left we use now
 443         mov     r8, r2                  @ r8 = current left we use next time
 444         ldr     r2, [r1], #4
 445         mla     r3, ip, r0, r11
 446         add     r3, r2, r3, asr #10
 447         strne   r3, [r1, #-4]
 448         cmpne   r2, #0
 449         beq     .L407
 450         teq     ip, r2
 451         submi   r0, r0, r6
 452         addpl   r0, r0, r6
 453         cmp     r0, #1024
 454         movgt   r0, #1024
 455         cmp     r0, r10
 456         movlt   r0, r10
 457
 458 .L407:  cmp     r7, r1                  @ loop back if more samples to do
 459         bhi     term_minus_3_loop
 460
 461         str     r3, [r5, #8]            @ else store previous samples & exit
 462         str     r8, [r5, #40]
 463
 464 /*
 465  * Before finally exiting we must store weights back for next time
 466  */
 467
 468 common_exit:
 469         strh    r4, [r5, #4]
 470         strh    r0, [r5, #6]
 471         ldmfd   sp!, {r4 - r8, r10, r11, pc}
 472