apps/codecs/libmad/imdct_mcf5249.S

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2005 by Thom Johansen
  11  *
  12  * This program is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU General Public License
  14  * as published by the Free Software Foundation; either version 2
  15  * of the License, or (at your option) any later version.
  16  *
  17  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  18  * KIND, either express or implied.
  19  *
  20  ****************************************************************************/
  21 /* this will also be the home to III_imdct_l in the future */
  22
  23     .global III_imdct_s
  24 III_imdct_s:
  25     /* we need to save 9 registers and 36 samples of temp buffer */
  26     lea.l (-45*4, %sp), %sp
  27     movem.l %d2-%d7/%a2-%a4, (36*4, %sp)
  28     move.l (45*4 + 4, %sp), %a2  /* a2 = X */
  29     move.l %sp, %a3
  30
  31     /* IMDCT */
  32
  33     /* if additional precision is needed in this block, it is possible to
  34      * get more low bits out of the accext01 register _before_ doing the
  35      * movclrs.
  36      */
  37     moveq.l #3, %d5                     /* we do three outer loop iterations */
  38 .imdctloop:                             /* outer loop label */
  39     lea.l imdct_s, %a1                  /* load pointer to imdct coefs in a1 */
  40     movem.l (%a2), %d0-%d4/%a0          /* load some input data in d0-d4/a0 */
  41     lea.l (6*4, %a2), %a2
  42
  43     clr.l %d7                           /* clear loop variable */
  44     move.l (%a1)+, %a4                  /* load imdct coef in a4 */
  45 .macloop:                               /* inner loop label */
  46     mac.l %d0, %a4, (%a1)+, %a4, %acc0  /* mac sequence */
  47     mac.l %d1, %a4, (%a1)+, %a4, %acc0
  48     mac.l %d2, %a4, (%a1)+, %a4, %acc0
  49     mac.l %d3, %a4, (%a1)+, %a4, %acc0
  50     mac.l %d4, %a4, (%a1)+, %a4, %acc0
  51     mac.l %a0, %a4, (%a1)+, %a4, %acc0
  52     movclr.l %acc0, %d6                 /* get result, left shifted once */
  53     asl.l #3, %d6                       /* one shift free, shift three more */
  54     move.l %d6, (%a3, %d7.l*4)          /* yptr[i] = result */
  55     neg.l %d6
  56     neg.l %d7
  57     move.l %d6, (5*4, %a3, %d7.l*4)     /* yptr[5 - i] = -result */
  58     mac.l %d0, %a4, (%a1)+, %a4, %acc0  /* mac sequence */
  59     mac.l %d1, %a4, (%a1)+, %a4, %acc0
  60     mac.l %d2, %a4, (%a1)+, %a4, %acc0
  61     mac.l %d3, %a4, (%a1)+, %a4, %acc0
  62     mac.l %d4, %a4, (%a1)+, %a4, %acc0
  63     mac.l %a0, %a4, (%a1)+, %a4, %acc0
  64     movclr.l %acc0, %d6                 /* get result */
  65     asl.l #3, %d6
  66     move.l %d6, (11*4, %a3, %d7.l*4)    /* yptr[11 - i] = result */
  67     neg.l %d7
  68     move.l %d6, (6*4, %a3, %d7.l*4)     /* yptr[i + 6] = result */
  69     addq.l #1, %d7                      /* increment inner loop variable */
  70     moveq.l #3, %d6
  71     cmp.l %d6, %d7                      /* we do three inner loop iterations */
  72     jne .macloop
  73
  74     lea.l (12*4, %a3), %a3              /* add pointer increment */
  75     subq.l #1, %d5                      /* decrement outer loop variable */
  76     jne .imdctloop
  77
  78     /* windowing, overlapping and concatenation */
  79
  80     move.l (45*4 + 8, %sp), %a2       /* a2 = z */
  81     move.l %sp, %a3                   /* a3 = tmp buffer ptr */
  82     lea.l window_s, %a4               /* a4 = window coef pointer */
  83
  84     moveq.l #6, %d7                   /* six iterations */
  85 .overlaploop:
  86     clr.l (%a2)                       /* z[i + 0] = 0 */
  87     move.l (%a4), %d0
  88     move.l (%a3), %d2
  89     mac.l %d0, %d2, (6*4, %a4), %d1, %acc0
  90     move.l (6*4, %a3), %d2
  91     movclr.l %acc0, %d6
  92     asl.l #3, %d6
  93     move.l %d6, (6*4, %a2)            /* z[i + 6] = result */
  94
  95     mac.l %d1, %d2, (12*4, %a3), %d2, %acc0
  96     mac.l %d0, %d2, (18*4, %a3), %d2, %acc0
  97     movclr.l %acc0, %d6
  98     asl.l #3, %d6
  99     move.l %d6, (12*4, %a2)           /* z[i + 12] = result */
 100
 101     mac.l %d1, %d2, (24*4, %a3), %d2, %acc0
 102     mac.l %d0, %d2, (30*4, %a3), %d2, %acc0
 103     movclr.l %acc0, %d6
 104     asl.l #3, %d6
 105     move.l %d6, (18*4, %a2)           /* z[i + 18] = result */
 106
 107     mac.l %d1, %d2, %acc0
 108     movclr.l %acc0, %d6
 109     asl.l #3, %d6
 110     move.l %d6, (24*4, %a2)           /* z[i + 24] = result */
 111
 112     clr.l (30*4, %a2)                 /* z[i + 30] = 0 */
 113     addq.l #4, %a2                    /* increment all pointers */
 114     addq.l #4, %a3
 115     addq.l #4, %a4
 116     subq.l #1, %d7                    /* decrement loop counter */
 117     jne .overlaploop
 118     /* fall through to exit if we're done */
 119
 120     /* clean up */
 121     movem.l (36*4, %sp), %d2-%d7/%a2-%a4
 122     lea.l (45*4, %sp), %sp
 123     rts
 124