extern/ffmpeg/libavcodec/vp3dsp.c

   1 /*
   2  * Copyright (C) 2004 the ffmpeg project
   3  *
   4  * This library is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 /**
  20  * @file vp3dsp.c
  21  * Standard C DSP-oriented functions cribbed from the original VP3
  22  * source code.
  23  */
  24
  25 #include "common.h"
  26 #include "avcodec.h"
  27 #include "dsputil.h"
  28
  29 #define IdctAdjustBeforeShift 8
  30 #define xC1S7 64277
  31 #define xC2S6 60547
  32 #define xC3S5 54491
  33 #define xC4S4 46341
  34 #define xC5S3 36410
  35 #define xC6S2 25080
  36 #define xC7S1 12785
  37
  38 static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
  39 {
  40     int16_t *ip = input;
  41     uint8_t *cm = cropTbl + MAX_NEG_CROP;
  42
  43     int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
  44     int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
  45     int t1, t2;
  46
  47     int i;
  48
  49     /* Inverse DCT on the rows now */
  50     for (i = 0; i < 8; i++) {
  51         /* Check for non-zero values */
  52         if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
  53             t1 = (int32_t)(xC1S7 * ip[1]);
  54             t2 = (int32_t)(xC7S1 * ip[7]);
  55             t1 >>= 16;
  56             t2 >>= 16;
  57             A_ = t1 + t2;
  58
  59             t1 = (int32_t)(xC7S1 * ip[1]);
  60             t2 = (int32_t)(xC1S7 * ip[7]);
  61             t1 >>= 16;
  62             t2 >>= 16;
  63             B_ = t1 - t2;
  64
  65             t1 = (int32_t)(xC3S5 * ip[3]);
  66             t2 = (int32_t)(xC5S3 * ip[5]);
  67             t1 >>= 16;
  68             t2 >>= 16;
  69             C_ = t1 + t2;
  70
  71             t1 = (int32_t)(xC3S5 * ip[5]);
  72             t2 = (int32_t)(xC5S3 * ip[3]);
  73             t1 >>= 16;
  74             t2 >>= 16;
  75             D_ = t1 - t2;
  76
  77
  78             t1 = (int32_t)(xC4S4 * (A_ - C_));
  79             t1 >>= 16;
  80             _Ad = t1;
  81
  82             t1 = (int32_t)(xC4S4 * (B_ - D_));
  83             t1 >>= 16;
  84             _Bd = t1;
  85
  86
  87             _Cd = A_ + C_;
  88             _Dd = B_ + D_;
  89
  90             t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
  91             t1 >>= 16;
  92             E_ = t1;
  93
  94             t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
  95             t1 >>= 16;
  96             F_ = t1;
  97
  98             t1 = (int32_t)(xC2S6 * ip[2]);
  99             t2 = (int32_t)(xC6S2 * ip[6]);
 100             t1 >>= 16;
 101             t2 >>= 16;
 102             G_ = t1 + t2;
 103
 104             t1 = (int32_t)(xC6S2 * ip[2]);
 105             t2 = (int32_t)(xC2S6 * ip[6]);
 106             t1 >>= 16;
 107             t2 >>= 16;
 108             H_ = t1 - t2;
 109
 110
 111             _Ed = E_ - G_;
 112             _Gd = E_ + G_;
 113
 114             _Add = F_ + _Ad;
 115             _Bdd = _Bd - H_;
 116
 117             _Fd = F_ - _Ad;
 118             _Hd = _Bd + H_;
 119
 120             /*  Final sequence of operations over-write original inputs. */
 121             ip[0] = _Gd + _Cd ;
 122             ip[7] = _Gd - _Cd ;
 123
 124             ip[1] = _Add + _Hd;
 125             ip[2] = _Add - _Hd;
 126
 127             ip[3] = _Ed + _Dd ;
 128             ip[4] = _Ed - _Dd ;
 129
 130             ip[5] = _Fd + _Bdd;
 131             ip[6] = _Fd - _Bdd;
 132
 133         }
 134
 135         ip += 8;            /* next row */
 136     }
 137
 138     ip = input;
 139
 140     for ( i = 0; i < 8; i++) {
 141         /* Check for non-zero values (bitwise or faster than ||) */
 142         if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
 143              ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
 144
 145             t1 = (int32_t)(xC1S7 * ip[1*8]);
 146             t2 = (int32_t)(xC7S1 * ip[7*8]);
 147             t1 >>= 16;
 148             t2 >>= 16;
 149             A_ = t1 + t2;
 150
 151             t1 = (int32_t)(xC7S1 * ip[1*8]);
 152             t2 = (int32_t)(xC1S7 * ip[7*8]);
 153             t1 >>= 16;
 154             t2 >>= 16;
 155             B_ = t1 - t2;
 156
 157             t1 = (int32_t)(xC3S5 * ip[3*8]);
 158             t2 = (int32_t)(xC5S3 * ip[5*8]);
 159             t1 >>= 16;
 160             t2 >>= 16;
 161             C_ = t1 + t2;
 162
 163             t1 = (int32_t)(xC3S5 * ip[5*8]);
 164             t2 = (int32_t)(xC5S3 * ip[3*8]);
 165             t1 >>= 16;
 166             t2 >>= 16;
 167             D_ = t1 - t2;
 168
 169
 170             t1 = (int32_t)(xC4S4 * (A_ - C_));
 171             t1 >>= 16;
 172             _Ad = t1;
 173
 174             t1 = (int32_t)(xC4S4 * (B_ - D_));
 175             t1 >>= 16;
 176             _Bd = t1;
 177
 178
 179             _Cd = A_ + C_;
 180             _Dd = B_ + D_;
 181
 182             t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
 183             t1 >>= 16;
 184             E_ = t1;
 185
 186             t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
 187             t1 >>= 16;
 188             F_ = t1;
 189
 190             t1 = (int32_t)(xC2S6 * ip[2*8]);
 191             t2 = (int32_t)(xC6S2 * ip[6*8]);
 192             t1 >>= 16;
 193             t2 >>= 16;
 194             G_ = t1 + t2;
 195
 196             t1 = (int32_t)(xC6S2 * ip[2*8]);
 197             t2 = (int32_t)(xC2S6 * ip[6*8]);
 198             t1 >>= 16;
 199             t2 >>= 16;
 200             H_ = t1 - t2;
 201
 202
 203             _Ed = E_ - G_;
 204             _Gd = E_ + G_;
 205
 206             _Add = F_ + _Ad;
 207             _Bdd = _Bd - H_;
 208
 209             _Fd = F_ - _Ad;
 210             _Hd = _Bd + H_;
 211
 212             if(type==1){  //HACK
 213                 _Gd += 16*128;
 214                 _Add+= 16*128;
 215                 _Ed += 16*128;
 216                 _Fd += 16*128;
 217             }
 218             _Gd += IdctAdjustBeforeShift;
 219             _Add += IdctAdjustBeforeShift;
 220             _Ed += IdctAdjustBeforeShift;
 221             _Fd += IdctAdjustBeforeShift;
 222
 223             /* Final sequence of operations over-write original inputs. */
 224             if(type==0){
 225                 ip[0*8] = (_Gd + _Cd )  >> 4;
 226                 ip[7*8] = (_Gd - _Cd )  >> 4;
 227
 228                 ip[1*8] = (_Add + _Hd ) >> 4;
 229                 ip[2*8] = (_Add - _Hd ) >> 4;
 230
 231                 ip[3*8] = (_Ed + _Dd )  >> 4;
 232                 ip[4*8] = (_Ed - _Dd )  >> 4;
 233
 234                 ip[5*8] = (_Fd + _Bdd ) >> 4;
 235                 ip[6*8] = (_Fd - _Bdd ) >> 4;
 236             }else if(type==1){
 237                 dst[0*stride] = cm[(_Gd + _Cd )  >> 4];
 238                 dst[7*stride] = cm[(_Gd - _Cd )  >> 4];
 239
 240                 dst[1*stride] = cm[(_Add + _Hd ) >> 4];
 241                 dst[2*stride] = cm[(_Add - _Hd ) >> 4];
 242
 243                 dst[3*stride] = cm[(_Ed + _Dd )  >> 4];
 244                 dst[4*stride] = cm[(_Ed - _Dd )  >> 4];
 245
 246                 dst[5*stride] = cm[(_Fd + _Bdd ) >> 4];
 247                 dst[6*stride] = cm[(_Fd - _Bdd ) >> 4];
 248             }else{
 249                 dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd )  >> 4)];
 250                 dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd )  >> 4)];
 251
 252                 dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)];
 253                 dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)];
 254
 255                 dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd )  >> 4)];
 256                 dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd )  >> 4)];
 257
 258                 dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)];
 259                 dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)];
 260             }
 261
 262         } else {
 263             if(type==0){
 264                 ip[0*8] =
 265                 ip[1*8] =
 266                 ip[2*8] =
 267                 ip[3*8] =
 268                 ip[4*8] =
 269                 ip[5*8] =
 270                 ip[6*8] =
 271                 ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
 272             }else if(type==1){
 273                 dst[0*stride]=
 274                 dst[1*stride]=
 275                 dst[2*stride]=
 276                 dst[3*stride]=
 277                 dst[4*stride]=
 278                 dst[5*stride]=
 279                 dst[6*stride]=
 280                 dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
 281             }else{
 282                 if(ip[0*8]){
 283                     int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
 284                     dst[0*stride] = cm[dst[0*stride] + v];
 285                     dst[1*stride] = cm[dst[1*stride] + v];
 286                     dst[2*stride] = cm[dst[2*stride] + v];
 287                     dst[3*stride] = cm[dst[3*stride] + v];
 288                     dst[4*stride] = cm[dst[4*stride] + v];
 289                     dst[5*stride] = cm[dst[5*stride] + v];
 290                     dst[6*stride] = cm[dst[6*stride] + v];
 291                     dst[7*stride] = cm[dst[7*stride] + v];
 292                 }
 293             }
 294         }
 295
 296         ip++;            /* next column */
 297         dst++;
 298     }
 299 }
 300
 301 void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
 302     idct(NULL, 0, block, 0);
 303 }
 304
 305 void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
 306     idct(dest, line_size, block, 1);
 307 }
 308
 309 void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
 310     idct(dest, line_size, block, 2);
 311 }