apps/codecs/libwmapro/wmapro_math.h

   1 #ifndef _WMAPRO_MATH_H_
   2 #define _WMAPRO_MATH_H_
   3
   4 #include <inttypes.h>
   5
   6 /* rockbox: not used
   7 #define fixtof16(x)       (float)((float)(x) / (float)(1 << 16))
   8 #define fixtof31(x)       (float)((float)(x) / (float)(1 << 31))
   9 #define ftofix16(x)       ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
  10 #define ftofix31(x)       ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5:0.5)))
  11 */
  12
  13 #if defined(CPU_ARM)
  14     /* Calculates: result = (X*Y)>>Z */
  15     #define fixmulshift(X,Y,Z) \
  16     ({ \
  17         int32_t lo; \
  18         int32_t hi; \
  19         asm volatile ( \
  20             "smull %[lo], %[hi], %[x], %[y] \n\t"   /* multiply */ \
  21             "mov   %[lo], %[lo], lsr %[shr] \n\t"   /* lo >>= Z */ \
  22             "orr   %[lo], %[lo], %[hi], lsl %[shl]" /* lo |= (hi << (32-Z)) */ \
  23             : [lo]"=&r"(lo), [hi]"=&r"(hi) \
  24             : [x]"r"(X), [y]"r"(Y), [shr]"r"(Z), [shl]"r"(32-Z)); \
  25         lo; \
  26     })
  27
  28     /* Calculates: result = (X*Y)>>16 */
  29     #define fixmul16(X,Y) \
  30      ({ \
  31         int32_t lo; \
  32         int32_t hi; \
  33         asm volatile ( \
  34            "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
  35            "mov   %[lo], %[lo], lsr #16    \n\t" /* lo >>= 16 */ \
  36            "orr   %[lo], %[lo], %[hi], lsl #16"  /* lo |= (hi << 16) */ \
  37            : [lo]"=&r"(lo), [hi]"=&r"(hi) \
  38            : [x]"r"(X), [y]"r"(Y)); \
  39         lo; \
  40      })
  41
  42     /* Calculates: result = (X*Y)>>24 */
  43     #define fixmul24(X,Y) \
  44      ({ \
  45         int32_t lo; \
  46         int32_t hi; \
  47         asm volatile ( \
  48            "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
  49            "mov   %[lo], %[lo], lsr #24    \n\t" /* lo >>= 24 */ \
  50            "orr   %[lo], %[lo], %[hi], lsl #8"   /* lo |= (hi << 8) */ \
  51            : [lo]"=&r"(lo), [hi]"=&r"(hi) \
  52            : [x]"r"(X), [y]"r"(Y)); \
  53         lo; \
  54      })
  55
  56     /* Calculates: result = (X*Y)>>31, loose 1 bit precision */
  57     #define fixmul31(X,Y) \
  58      ({ \
  59         int32_t lo; \
  60         int32_t hi; \
  61         asm volatile ( \
  62            "smull %[lo], %[hi], %[x], %[y] \n\t" /* multiply */ \
  63            "mov   %[hi], %[hi], lsl #1"          /* hi <<= 1 */ \
  64            : [lo]"=&r"(lo), [hi]"=&r"(hi) \
  65            : [x]"r"(X), [y]"r"(Y)); \
  66         hi; \
  67      })
  68 #elif defined(CPU_COLDFIRE)
  69     /* Calculates: result = (X*Y)>>Z */
  70     #define fixmulshift(X,Y,Z) \
  71     ({ \
  72         int32_t t1; \
  73         int32_t t2; \
  74         asm volatile ( \
  75             "mac.l   %[x],%[y],%%acc0\n\t" /* multiply */ \
  76             "mulu.l  %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
  77             "movclr.l %%acc0,%[t1]   \n\t" /* get higher half */ \
  78             "moveq.l #31,%[t2]       \n\t" \
  79             "sub.l   %[sh],%[t2]     \n\t" /* t2 = 31 - shift */ \
  80             "ble.s   1f              \n\t" \
  81             "asl.l   %[t2],%[t1]     \n\t" /* hi <<= 31 - shift */ \
  82             "lsr.l   %[sh],%[x]      \n\t" /* (unsigned)lo >>= shift */ \
  83             "or.l    %[x],%[t1]      \n\t" /* combine result */ \
  84             "bra.s   2f              \n\t" \
  85          "1:                         \n\t" \
  86             "neg.l   %[t2]           \n\t" /* t2 = shift - 31 */ \
  87             "asr.l   %[t2],%[t1]     \n\t" /* hi >>= t2 */ \
  88          "2:                         \n" \
  89         : [t1]"=&d"(t1), [t2]"=&d"(t2) \
  90         : [x] "d"((X)), [y] "d"((Y)), [sh]"d"((Z))); \
  91         t1; \
  92     })
  93
  94     /* Calculates: result = (X*Y)>>16 */
  95     #define fixmul16(X,Y) \
  96     ({ \
  97         int32_t t, x = (X); \
  98         asm volatile ( \
  99             "mac.l    %[x],%[y],%%acc0\n\t" /* multiply */ \
 100             "mulu.l   %[y],%[x]       \n\t" /* get lower half, avoid emac stall */ \
 101             "movclr.l %%acc0,%[t]     \n\t" /* get higher half */ \
 102             "lsr.l    #1,%[t]         \n\t" /* hi >>= 1 to compensate emac shift */ \
 103             "move.w   %[t],%[x]       \n\t" /* combine halfwords */\
 104             "swap     %[x]            \n\t" \
 105             : [t]"=&d"(t), [x] "+d" (x) \
 106             : [y] "d" ((Y))); \
 107         x; \
 108     })
 109
 110     /* Calculates: result = (X*Y)>>31 (may lose msb to overflow) */
 111     #define fixmul31(X,Y) \
 112     ({ \
 113        int32_t t; \
 114        asm volatile ( \
 115           "mac.l %[x], %[y], %%acc0\n\t"   /* multiply */ \
 116           "movclr.l %%acc0, %[t]\n\t"      /* get higher half as result */ \
 117           : [t] "=d" (t) \
 118           : [x] "r" ((X)), [y] "r" ((Y))); \
 119        t; \
 120     })
 121 #else
 122     static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
 123     {
 124         int64_t temp;
 125         temp = x;
 126         temp *= y;
 127
 128         temp >>= shamt;
 129
 130         return (int32_t)temp;
 131     }
 132
 133     static inline int32_t fixmul31(int32_t x, int32_t y)
 134     {
 135         int64_t temp;
 136         temp = x;
 137         temp *= y;
 138
 139         temp >>= 31;
 140
 141         return (int32_t)temp;
 142     }
 143
 144     static inline int32_t fixmul24(int32_t x, int32_t y)
 145     {
 146         int64_t temp;
 147         temp = x;
 148         temp *= y;
 149
 150         temp >>= 24;
 151
 152         return (int32_t)temp;
 153     }
 154
 155     static inline int32_t fixmul16(int32_t x, int32_t y)
 156     {
 157         int64_t temp;
 158         temp = x;
 159         temp *= y;
 160
 161         temp >>= 16;
 162
 163         return (int32_t)temp;
 164     }
 165 #endif /* CPU_COLDFIRE, CPU_ARM */
 166
 167 #if defined(CPU_COLDFIRE)
 168     #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
 169         asm volatile ( \
 170             "mac.l    %[s0], %[wj], %%acc0 \n\t" \
 171             "msac.l   %[s1], %[wi], %%acc0 \n\t" \
 172             "mac.l    %[s0], %[wi], %%acc1 \n\t" \
 173             "mac.l    %[s1], %[wj], %%acc1 \n\t" \
 174             "movclr.l %%acc0, %[s0]        \n\t" \
 175             "move.l   %[s0], (%[dst_i])    \n\t" \
 176             "movclr.l %%acc1, %[s0]        \n\t" \
 177             "move.l   %[s0], (%[dst_j])    \n\t" \
 178             : [s0] "+r" (s0) /* register is clobbered so specify it as an input */ \
 179             : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]), \
 180               [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj) \
 181             : "cc", "memory");
 182 #else
 183     #define VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj) \
 184         dst[i] = fixmul31(wj, s0) - fixmul31(wi, s1); \
 185         dst[j] = fixmul31(wi, s0) + fixmul31(wj, s1);
 186 #endif /* CPU_COLDFIRE */
 187
 188 static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
 189                                    const int32_t *src1, const int32_t *win,
 190                                    int len)
 191 {
 192     int i, j;
 193     dst += len;
 194     win += len;
 195     src0+= len;
 196     for(i=-len, j=len-1; i<0; i++, j--) {
 197         int32_t s0 = src0[i]; /* s0 = src0[      0 ... len-1] */
 198         int32_t s1 = src1[j]; /* s1 = src1[2*len-1 ... len]   */
 199         int32_t wi = -win[i]; /* wi = -win[      0 ... len-1] */
 200         int32_t wj = -win[j]; /* wj = -win[2*len-1 ... len]   */
 201         VECT_MUL_WIN_KERNEL(i, j, s0, s1, wi, wj);
 202     }
 203 }
 204
 205 #if defined(CPU_ARM)
 206     #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
 207         asm volatile ( \
 208             "ldmia %[src]!, {r1-r4}   \n\t" \
 209             "smull r0, r5, r1, %[mul] \n\t" \
 210             "mov   r0, r0, lsr #16    \n\t" \
 211             "orr   r0, r0, r5, lsl #16\n\t" \
 212             "smull r1, r5, r2, %[mul] \n\t" \
 213             "mov   r1, r1, lsr #16    \n\t" \
 214             "orr   r1, r1, r5, lsl #16\n\t" \
 215             "smull r2, r5, r3, %[mul] \n\t" \
 216             "mov   r2, r2, lsr #16    \n\t" \
 217             "orr   r2, r2, r5, lsl #16\n\t" \
 218             "smull r3, r5, r4, %[mul] \n\t" \
 219             "mov   r3, r3, lsr #16    \n\t" \
 220             "orr   r3, r3, r5, lsl #16\n\t" \
 221             "stmia %[dst]!, {r0-r3}   \n"   \
 222             : [dst]"+r"(dst), [src]"+r"(src) \
 223             : [mul]"r"(mul) \
 224             : "r0", "r1", "r2", "r3", "r4", "r5", "memory");
 225 #else
 226     #define VECT_MUL_SCALAR_KERNEL(dst, src, mul) \
 227         dst[i  ] = fixmul16(src[i  ], mul); \
 228         dst[i+1] = fixmul16(src[i+1], mul); \
 229         dst[i+2] = fixmul16(src[i+2], mul); \
 230         dst[i+3] = fixmul16(src[i+3], mul);
 231 #endif /* CPU_ARM, CPU_COLDFIRE */
 232
 233 static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
 234                                         int32_t mul, int len)
 235 {
 236     /* len is _always_ a multiple of 4, because len is the difference of sfb's
 237      * which themselves are always a multiple of 4. */
 238     int i;
 239     for (i=0; i<len; i+=4) {
 240         VECT_MUL_SCALAR_KERNEL(dst, src, mul);
 241     }
 242 }
 243
 244 static inline int av_clip(int a, int amin, int amax)
 245 {
 246     if      (a < amin) return amin;
 247     else if (a > amax) return amax;
 248     else               return a;
 249 }
 250 #endif /* _WMAPRO_MATH_H_ */