apps/codecs/lib/asm_mcf5249.h

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  *
   9  * Copyright (C) 2005 by Pedro Vasconcelos
  10  *
  11  * This program is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU General Public License
  13  * as published by the Free Software Foundation; either version 2
  14  * of the License, or (at your option) any later version.
  15  *
  16  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  17  * KIND, either express or implied.
  18  *
  19  ****************************************************************************/
  20 /* asm routines for wide math on the MCF5249 */
  21
  22 #if defined(CPU_COLDFIRE)
  23
  24 #ifndef _V_WIDE_MATH
  25 #define _V_WIDE_MATH
  26
  27 static inline int32_t MULT32(int32_t x, int32_t y) {
  28
  29   asm volatile ("mac.l %[x], %[y], %%acc0;"    /* multiply & shift  */
  30                 "movclr.l %%acc0, %[x];"       /* move & clear acc */
  31                 "asr.l #1, %[x];"              /* no overflow test */
  32                 : [x] "+&d" (x)
  33                 : [y] "r" (y)
  34                 : "cc");
  35   return x;
  36 }
  37
  38 static inline int32_t MULT31(int32_t x, int32_t y) {
  39   asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
  40                 "movclr.l %%acc0, %[x];"    /* move and clear */
  41                 : [x] "+&r" (x)
  42                 : [y] "r" (y)
  43                 : "cc");
  44   return x;
  45 }
  46
  47 static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
  48   int32_t r;
  49
  50   asm volatile ("mac.l %[x], %[y], %%acc0;"  /* multiply */
  51                 "mulu.l %[y], %[x];"         /* get lower half, avoid emac stall */
  52                 "movclr.l %%acc0, %[r];"     /* get higher half */
  53                 "asl.l #8, %[r];"            /* hi<<16, plus one free */
  54                 "asl.l #8, %[r];"
  55                 "lsr.l #8, %[x];"            /* (unsigned)lo >> 15 */
  56                 "lsr.l #7, %[x];"
  57                 "or.l %[x], %[r];"           /* logical-or results */
  58                 : [r] "=&d" (r), [x] "+d" (x)
  59                 : [y] "d" (y)
  60                 : "cc");
  61   return r;
  62 }
  63
  64 static inline
  65 void XPROD31(int32_t  a, int32_t  b,
  66              int32_t  t, int32_t  v,
  67              int32_t *x, int32_t *y)
  68 {
  69   asm volatile ("mac.l %[a], %[t], %%acc0;"
  70                 "mac.l %[b], %[v], %%acc0;"
  71                 "mac.l %[b], %[t], %%acc1;"
  72                 "msac.l %[a], %[v], %%acc1;"
  73                 "movclr.l %%acc0, %[a];"
  74                 "move.l %[a], (%[x]);"
  75                 "movclr.l %%acc1, %[a];"
  76                 "move.l %[a], (%[y]);"
  77                 : [a] "+&r" (a)
  78                 : [x] "a" (x), [y] "a" (y),
  79                   [b] "r" (b), [t] "r" (t), [v] "r" (v)
  80                 : "cc", "memory");
  81 }
  82
  83 static inline
  84 void XNPROD31(int32_t  a, int32_t  b,
  85               int32_t  t, int32_t  v,
  86               int32_t *x, int32_t *y)
  87 {
  88   asm volatile ("mac.l %[a], %[t], %%acc0;"
  89                 "msac.l %[b], %[v], %%acc0;"
  90                 "mac.l %[b], %[t], %%acc1;"
  91                 "mac.l %[a], %[v], %%acc1;"
  92                 "movclr.l %%acc0, %[a];"
  93                 "move.l %[a], (%[x]);"
  94                 "movclr.l %%acc1, %[a];"
  95                 "move.l %[a], (%[y]);"
  96                 : [a] "+&r" (a)
  97                 : [x] "a" (x), [y] "a" (y),
  98                   [b] "r" (b), [t] "r" (t), [v] "r" (v)
  99                 : "cc", "memory");
 100 }
 101
 102 #if 0    /* canonical Tremor definition */
 103 #define XPROD32(_a, _b, _t, _v, _x, _y)         \
 104   { (_x)=MULT32(_a,_t)+MULT32(_b,_v);           \
 105     (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
 106 #endif
 107
 108 /* this could lose the LSB by overflow, but i don't think it'll ever happen.
 109    if anyone think they can hear a bug caused by this, please try the above
 110    version. */
 111 #define XPROD32(_a, _b, _t, _v, _x, _y)     \
 112   asm volatile ("mac.l %[a], %[t], %%acc0;" \
 113                 "mac.l %[b], %[v], %%acc0;" \
 114                 "mac.l %[b], %[t], %%acc1;" \
 115                 "msac.l %[a], %[v], %%acc1;" \
 116                 "movclr.l %%acc0, %[x];" \
 117                 "asr.l #1, %[x];" \
 118                 "movclr.l %%acc1, %[y];" \
 119                 "asr.l #1, %[y];" \
 120                 : [x] "=d" (_x), [y] "=d" (_y) \
 121                 : [a] "r" (_a), [b] "r" (_b), \
 122                   [t] "r" (_t), [v] "r" (_v) \
 123                 : "cc");
 124
 125 #define XPROD31_R(_a, _b, _t, _v, _x, _y)   \
 126   asm volatile ("mac.l %[a], %[t], %%acc0;" \
 127                 "mac.l %[b], %[v], %%acc0;" \
 128                 "mac.l %[b], %[t], %%acc1;" \
 129                 "msac.l %[a], %[v], %%acc1;" \
 130                 "movclr.l %%acc0, %[x];" \
 131                 "movclr.l %%acc1, %[y];" \
 132                 : [x] "=r" (_x), [y] "=r" (_y) \
 133                 : [a] "r" (_a), [b] "r" (_b), \
 134                   [t] "r" (_t), [v] "r" (_v) \
 135                 : "cc");
 136
 137 #define XNPROD31_R(_a, _b, _t, _v, _x, _y)  \
 138   asm volatile ("mac.l %[a], %[t], %%acc0;" \
 139                 "msac.l %[b], %[v], %%acc0;" \
 140                 "mac.l %[b], %[t], %%acc1;" \
 141                 "mac.l %[a], %[v], %%acc1;" \
 142                 "movclr.l %%acc0, %[x];" \
 143                 "movclr.l %%acc1, %[y];" \
 144                 : [x] "=r" (_x), [y] "=r" (_y) \
 145                 : [a] "r" (_a), [b] "r" (_b), \
 146                   [t] "r" (_t), [v] "r" (_v) \
 147                 : "cc");
 148
 149 #ifndef _V_VECT_OPS
 150 #define _V_VECT_OPS
 151
 152 /* asm versions of vector operations for block.c, window.c */
 153 /* assumes MAC is initialized & accumulators cleared */
 154 static inline
 155 void vect_add(int32_t *x, const int32_t *y, int n)
 156 {
 157   /* align to 16 bytes */
 158   while(n>0 && (int)x&15) {
 159     *x++ += *y++;
 160     n--;
 161   }
 162   asm volatile ("bra 1f;"
 163                 "0:"                          /* loop start */
 164                 "movem.l (%[x]), %%d0-%%d3;"  /* fetch values */
 165                 "movem.l (%[y]), %%a0-%%a3;"
 166                 /* add */
 167                 "add.l %%a0, %%d0;"
 168                 "add.l %%a1, %%d1;"
 169                 "add.l %%a2, %%d2;"
 170                 "add.l %%a3, %%d3;"
 171                 /* store and advance */
 172                 "movem.l %%d0-%%d3, (%[x]);"
 173                 "lea.l (4*4, %[x]), %[x];"
 174                 "lea.l (4*4, %[y]), %[y];"
 175                 "subq.l #4, %[n];"     /* done 4 elements */
 176                 "1: cmpi.l #4, %[n];"
 177                 "bge 0b;"
 178                 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
 179                 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
 180                     "cc", "memory");
 181   /* add final elements */
 182   while (n>0) {
 183     *x++ += *y++;
 184     n--;
 185   }
 186 }
 187
 188 static inline
 189 void vect_copy(int32_t *x, int32_t *y, int n)
 190 {
 191   /* align to 16 bytes */
 192   while(n>0 && (int)x&15) {
 193     *x++ = *y++;
 194     n--;
 195   }
 196   asm volatile ("bra 1f;"
 197                 "0:"                                    /* loop start */
 198                 "movem.l (%[y]), %%d0-%%d3;"            /* fetch values */
 199                 "movem.l %%d0-%%d3, (%[x]);"            /* store */
 200                 "lea.l (4*4, %[x]), %[x];"              /* advance */
 201                 "lea.l (4*4, %[y]), %[y];"
 202                 "subq.l #4, %[n];"                      /* done 4 elements */
 203                 "1: cmpi.l #4, %[n];"
 204                 "bge 0b;"
 205                 : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
 206                 : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
 207   /* copy final elements */
 208   while (n>0) {
 209     *x++ = *y++;
 210     n--;
 211   }
 212 }
 213
 214 static inline
 215 void vect_mult_fw(int32_t *data, int32_t *window, int n)
 216 {
 217   /* ensure data is aligned to 16-bytes */
 218   while(n>0 && (int)data&15) {
 219     *data = MULT31(*data, *window);
 220     data++;
 221     window++;
 222     n--;
 223   }
 224   asm volatile ("movem.l (%[d]), %%d0-%%d3;"  /* loop start */
 225                 "movem.l (%[w]), %%a0-%%a3;"  /* pre-fetch registers */
 226                 "lea.l (4*4, %[w]), %[w];"
 227                 "bra 1f;"               /* jump to loop condition */
 228                 "0:" /* loop body */
 229                 /* multiply and load next window values */
 230                 "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
 231                 "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
 232                 "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
 233                 "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
 234                 "movclr.l %%acc0, %%d0;"  /* get the products */
 235                 "movclr.l %%acc1, %%d1;"
 236                 "movclr.l %%acc2, %%d2;"
 237                 "movclr.l %%acc3, %%d3;"
 238                 /* store and advance */
 239                 "movem.l %%d0-%%d3, (%[d]);"
 240                 "lea.l (4*4, %[d]), %[d];"
 241                 "movem.l (%[d]), %%d0-%%d3;"
 242                 "subq.l #4, %[n];"     /* done 4 elements */
 243                 "1: cmpi.l #4, %[n];"
 244                 "bge 0b;"
 245                 /* multiply final elements */
 246                 "tst.l %[n];"
 247                 "beq 1f;"      /* n=0 */
 248                 "mac.l %%d0, %%a0, %%acc0;"
 249                 "movclr.l %%acc0, %%d0;"
 250                 "move.l %%d0, (%[d])+;"
 251                 "subq.l #1, %[n];"
 252                 "beq 1f;"     /* n=1 */
 253                 "mac.l %%d1, %%a1, %%acc0;"
 254                 "movclr.l %%acc0, %%d1;"
 255                 "move.l %%d1, (%[d])+;"
 256                 "subq.l #1, %[n];"
 257                 "beq 1f;"     /* n=2 */
 258                 /* otherwise n = 3 */
 259                 "mac.l %%d2, %%a2, %%acc0;"
 260                 "movclr.l %%acc0, %%d2;"
 261                 "move.l %%d2, (%[d])+;"
 262                 "1:"
 263                 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
 264                 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
 265                     "cc", "memory");
 266 }
 267
 268 static inline
 269 void vect_mult_bw(int32_t *data, int32_t *window, int n)
 270 {
 271   /* ensure at least data is aligned to 16-bytes */
 272   while(n>0 && (int)data&15) {
 273     *data = MULT31(*data, *window);
 274     data++;
 275     window--;
 276     n--;
 277   }
 278   asm volatile ("lea.l (-3*4, %[w]), %[w];"     /* loop start */
 279                 "movem.l (%[d]), %%d0-%%d3;"    /* pre-fetch registers */
 280                 "movem.l (%[w]), %%a0-%%a3;"
 281                 "bra 1f;"               /* jump to loop condition */
 282                 "0:" /* loop body */
 283                 /* multiply and load next window value */
 284                 "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
 285                 "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
 286                 "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
 287                 "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
 288                 "movclr.l %%acc0, %%d0;"  /* get the products */
 289                 "movclr.l %%acc1, %%d1;"
 290                 "movclr.l %%acc2, %%d2;"
 291                 "movclr.l %%acc3, %%d3;"
 292                 /* store and advance */
 293                 "movem.l %%d0-%%d3, (%[d]);"
 294                 "lea.l (4*4, %[d]), %[d];"
 295                 "movem.l (%[d]), %%d0-%%d3;"
 296                 "subq.l #4, %[n];"     /* done 4 elements */
 297                 "1: cmpi.l #4, %[n];"
 298                 "bge 0b;"
 299                 /* multiply final elements */
 300                 "tst.l %[n];"
 301                 "beq 1f;"      /* n=0 */
 302                 "mac.l %%d0, %%a3, %%acc0;"
 303                 "movclr.l %%acc0, %%d0;"
 304                 "move.l %%d0, (%[d])+;"
 305                 "subq.l #1, %[n];"
 306                 "beq 1f;"     /* n=1 */
 307                 "mac.l %%d1, %%a2, %%acc0;"
 308                 "movclr.l %%acc0, %%d1;"
 309                 "move.l %%d1, (%[d])+;"
 310                 "subq.l #1, %[n];"
 311                 "beq 1f;"     /* n=2 */
 312                 /* otherwise n = 3 */
 313                 "mac.l %%d2, %%a1, %%acc0;"
 314                 "movclr.l %%acc0, %%d2;"
 315                 "move.l %%d2, (%[d])+;"
 316                 "1:"
 317                 : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
 318                 : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
 319                     "cc", "memory");
 320 }
 321
 322 #endif
 323
 324 #endif
 325 /* not used anymore */
 326 /*
 327 #ifndef _V_CLIP_MATH
 328 #define _V_CLIP_MATH
 329
 330 * this is portable C and simple; why not use this as default?
 331 static inline int32_t CLIP_TO_15(register int32_t x) {
 332   register int32_t hi=32767, lo=-32768;
 333   return (x>=hi ? hi : (x<=lo ? lo : x));
 334 }
 335
 336 #endif
 337 */
 338 #endif