libavcodec/imgresample.c

   1 /*
   2  * High quality image resampling with polyphase filters
   3  * Copyright (c) 2001 Fabrice Bellard.
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file imgresample.c
  24  * High quality image resampling with polyphase filters .
  25  */
  26
  27 #include "avcodec.h"
  28 #include "swscale.h"
  29 #include "dsputil.h"
  30
  31 #define NB_COMPONENTS 3
  32
  33 #define PHASE_BITS 4
  34 #define NB_PHASES  (1 << PHASE_BITS)
  35 #define NB_TAPS    4
  36 #define FCENTER    1  /* index of the center of the filter */
  37 //#define TEST    1  /* Test it */
  38
  39 #define POS_FRAC_BITS 16
  40 #define POS_FRAC      (1 << POS_FRAC_BITS)
  41 /* 6 bits precision is needed for MMX */
  42 #define FILTER_BITS   8
  43
  44 #define LINE_BUF_HEIGHT (NB_TAPS * 4)
  45
  46 struct SwsContext {
  47     AVClass *av_class;
  48     struct ImgReSampleContext *resampling_ctx;
  49     enum PixelFormat src_pix_fmt, dst_pix_fmt;
  50 };
  51
  52 struct ImgReSampleContext {
  53     int iwidth, iheight, owidth, oheight;
  54     int topBand, bottomBand, leftBand, rightBand;
  55     int padtop, padbottom, padleft, padright;
  56     int pad_owidth, pad_oheight;
  57     int h_incr, v_incr;
  58     DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
  59     DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
  60     uint8_t *line_buf;
  61 };
  62
  63 void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type);
  64
  65 static inline int get_phase(int pos)
  66 {
  67     return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
  68 }
  69
  70 /* This function must be optimized */
  71 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
  72                             int src_width, int src_start, int src_incr,
  73                             int16_t *filters)
  74 {
  75     int src_pos, phase, sum, i;
  76     const uint8_t *s;
  77     int16_t *filter;
  78
  79     src_pos = src_start;
  80     for(i=0;i<dst_width;i++) {
  81 #ifdef TEST
  82         /* test */
  83         if ((src_pos >> POS_FRAC_BITS) < 0 ||
  84             (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
  85             av_abort();
  86 #endif
  87         s = src + (src_pos >> POS_FRAC_BITS);
  88         phase = get_phase(src_pos);
  89         filter = filters + phase * NB_TAPS;
  90 #if NB_TAPS == 4
  91         sum = s[0] * filter[0] +
  92             s[1] * filter[1] +
  93             s[2] * filter[2] +
  94             s[3] * filter[3];
  95 #else
  96         {
  97             int j;
  98             sum = 0;
  99             for(j=0;j<NB_TAPS;j++)
 100                 sum += s[j] * filter[j];
 101         }
 102 #endif
 103         sum = sum >> FILTER_BITS;
 104         if (sum < 0)
 105             sum = 0;
 106         else if (sum > 255)
 107             sum = 255;
 108         dst[0] = sum;
 109         src_pos += src_incr;
 110         dst++;
 111     }
 112 }
 113
 114 /* This function must be optimized */
 115 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 116                        int wrap, int16_t *filter)
 117 {
 118     int sum, i;
 119     const uint8_t *s;
 120
 121     s = src;
 122     for(i=0;i<dst_width;i++) {
 123 #if NB_TAPS == 4
 124         sum = s[0 * wrap] * filter[0] +
 125             s[1 * wrap] * filter[1] +
 126             s[2 * wrap] * filter[2] +
 127             s[3 * wrap] * filter[3];
 128 #else
 129         {
 130             int j;
 131             uint8_t *s1 = s;
 132
 133             sum = 0;
 134             for(j=0;j<NB_TAPS;j++) {
 135                 sum += s1[0] * filter[j];
 136                 s1 += wrap;
 137             }
 138         }
 139 #endif
 140         sum = sum >> FILTER_BITS;
 141         if (sum < 0)
 142             sum = 0;
 143         else if (sum > 255)
 144             sum = 255;
 145         dst[0] = sum;
 146         dst++;
 147         s++;
 148     }
 149 }
 150
 151 #ifdef HAVE_MMX
 152
 153 #include "i386/mmx.h"
 154
 155 #define FILTER4(reg) \
 156 {\
 157         s = src + (src_pos >> POS_FRAC_BITS);\
 158         phase = get_phase(src_pos);\
 159         filter = filters + phase * NB_TAPS;\
 160         movq_m2r(*s, reg);\
 161         punpcklbw_r2r(mm7, reg);\
 162         movq_m2r(*filter, mm6);\
 163         pmaddwd_r2r(reg, mm6);\
 164         movq_r2r(mm6, reg);\
 165         psrlq_i2r(32, reg);\
 166         paddd_r2r(mm6, reg);\
 167         psrad_i2r(FILTER_BITS, reg);\
 168         src_pos += src_incr;\
 169 }
 170
 171 #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016"PRIx64"\n", tmp.uq);
 172
 173 /* XXX: do four pixels at a time */
 174 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
 175                                  const uint8_t *src, int src_width,
 176                                  int src_start, int src_incr, int16_t *filters)
 177 {
 178     int src_pos, phase;
 179     const uint8_t *s;
 180     int16_t *filter;
 181     mmx_t tmp;
 182
 183     src_pos = src_start;
 184     pxor_r2r(mm7, mm7);
 185
 186     while (dst_width >= 4) {
 187
 188         FILTER4(mm0);
 189         FILTER4(mm1);
 190         FILTER4(mm2);
 191         FILTER4(mm3);
 192
 193         packuswb_r2r(mm7, mm0);
 194         packuswb_r2r(mm7, mm1);
 195         packuswb_r2r(mm7, mm3);
 196         packuswb_r2r(mm7, mm2);
 197         movq_r2m(mm0, tmp);
 198         dst[0] = tmp.ub[0];
 199         movq_r2m(mm1, tmp);
 200         dst[1] = tmp.ub[0];
 201         movq_r2m(mm2, tmp);
 202         dst[2] = tmp.ub[0];
 203         movq_r2m(mm3, tmp);
 204         dst[3] = tmp.ub[0];
 205         dst += 4;
 206         dst_width -= 4;
 207     }
 208     while (dst_width > 0) {
 209         FILTER4(mm0);
 210         packuswb_r2r(mm7, mm0);
 211         movq_r2m(mm0, tmp);
 212         dst[0] = tmp.ub[0];
 213         dst++;
 214         dst_width--;
 215     }
 216     emms();
 217 }
 218
 219 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
 220                             int wrap, int16_t *filter)
 221 {
 222     int sum, i, v;
 223     const uint8_t *s;
 224     mmx_t tmp;
 225     mmx_t coefs[4];
 226
 227     for(i=0;i<4;i++) {
 228         v = filter[i];
 229         coefs[i].uw[0] = v;
 230         coefs[i].uw[1] = v;
 231         coefs[i].uw[2] = v;
 232         coefs[i].uw[3] = v;
 233     }
 234
 235     pxor_r2r(mm7, mm7);
 236     s = src;
 237     while (dst_width >= 4) {
 238         movq_m2r(s[0 * wrap], mm0);
 239         punpcklbw_r2r(mm7, mm0);
 240         movq_m2r(s[1 * wrap], mm1);
 241         punpcklbw_r2r(mm7, mm1);
 242         movq_m2r(s[2 * wrap], mm2);
 243         punpcklbw_r2r(mm7, mm2);
 244         movq_m2r(s[3 * wrap], mm3);
 245         punpcklbw_r2r(mm7, mm3);
 246
 247         pmullw_m2r(coefs[0], mm0);
 248         pmullw_m2r(coefs[1], mm1);
 249         pmullw_m2r(coefs[2], mm2);
 250         pmullw_m2r(coefs[3], mm3);
 251
 252         paddw_r2r(mm1, mm0);
 253         paddw_r2r(mm3, mm2);
 254         paddw_r2r(mm2, mm0);
 255         psraw_i2r(FILTER_BITS, mm0);
 256
 257         packuswb_r2r(mm7, mm0);
 258         movq_r2m(mm0, tmp);
 259
 260         *(uint32_t *)dst = tmp.ud[0];
 261         dst += 4;
 262         s += 4;
 263         dst_width -= 4;
 264     }
 265     while (dst_width > 0) {
 266         sum = s[0 * wrap] * filter[0] +
 267             s[1 * wrap] * filter[1] +
 268             s[2 * wrap] * filter[2] +
 269             s[3 * wrap] * filter[3];
 270         sum = sum >> FILTER_BITS;
 271         if (sum < 0)
 272             sum = 0;
 273         else if (sum > 255)
 274             sum = 255;
 275         dst[0] = sum;
 276         dst++;
 277         s++;
 278         dst_width--;
 279     }
 280     emms();
 281 }
 282 #endif /* HAVE_MMX */
 283
 284 #ifdef HAVE_ALTIVEC
 285 typedef         union {
 286     vector unsigned char v;
 287     unsigned char c[16];
 288 } vec_uc_t;
 289
 290 typedef         union {
 291     vector signed short v;
 292     signed short s[8];
 293 } vec_ss_t;
 294
 295 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
 296                           int wrap, int16_t *filter)
 297 {
 298     int sum, i;
 299     const uint8_t *s;
 300     vector unsigned char *tv, tmp, dstv, zero;
 301     vec_ss_t srchv[4], srclv[4], fv[4];
 302     vector signed short zeros, sumhv, sumlv;
 303     s = src;
 304
 305     for(i=0;i<4;i++)
 306     {
 307         /*
 308            The vec_madds later on does an implicit >>15 on the result.
 309            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
 310            a signed short, we have just enough bits to pre-shift our
 311            filter constants <<7 to compensate for vec_madds.
 312         */
 313         fv[i].s[0] = filter[i] << (15-FILTER_BITS);
 314         fv[i].v = vec_splat(fv[i].v, 0);
 315     }
 316
 317     zero = vec_splat_u8(0);
 318     zeros = vec_splat_s16(0);
 319
 320
 321     /*
 322        When we're resampling, we'd ideally like both our input buffers,
 323        and output buffers to be 16-byte aligned, so we can do both aligned
 324        reads and writes. Sadly we can't always have this at the moment, so
 325        we opt for aligned writes, as unaligned writes have a huge overhead.
 326        To do this, do enough scalar resamples to get dst 16-byte aligned.
 327     */
 328     i = (-(int)dst) & 0xf;
 329     while(i>0) {
 330         sum = s[0 * wrap] * filter[0] +
 331         s[1 * wrap] * filter[1] +
 332         s[2 * wrap] * filter[2] +
 333         s[3 * wrap] * filter[3];
 334         sum = sum >> FILTER_BITS;
 335         if (sum<0) sum = 0; else if (sum>255) sum=255;
 336         dst[0] = sum;
 337         dst++;
 338         s++;
 339         dst_width--;
 340         i--;
 341     }
 342
 343     /* Do our altivec resampling on 16 pixels at once. */
 344     while(dst_width>=16) {
 345         /*
 346            Read 16 (potentially unaligned) bytes from each of
 347            4 lines into 4 vectors, and split them into shorts.
 348            Interleave the multipy/accumulate for the resample
 349            filter with the loads to hide the 3 cycle latency
 350            the vec_madds have.
 351         */
 352         tv = (vector unsigned char *) &s[0 * wrap];
 353         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
 354         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
 355         srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
 356         sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
 357         sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
 358
 359         tv = (vector unsigned char *) &s[1 * wrap];
 360         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
 361         srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
 362         srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
 363         sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
 364         sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
 365
 366         tv = (vector unsigned char *) &s[2 * wrap];
 367         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
 368         srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
 369         srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
 370         sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
 371         sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
 372
 373         tv = (vector unsigned char *) &s[3 * wrap];
 374         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
 375         srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
 376         srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
 377         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
 378         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 379
 380         /*
 381            Pack the results into our destination vector,
 382            and do an aligned write of that back to memory.
 383         */
 384         dstv = vec_packsu(sumhv, sumlv) ;
 385         vec_st(dstv, 0, (vector unsigned char *) dst);
 386
 387         dst+=16;
 388         s+=16;
 389         dst_width-=16;
 390     }
 391
 392     /*
 393        If there are any leftover pixels, resample them
 394        with the slow scalar method.
 395     */
 396     while(dst_width>0) {
 397         sum = s[0 * wrap] * filter[0] +
 398         s[1 * wrap] * filter[1] +
 399         s[2 * wrap] * filter[2] +
 400         s[3 * wrap] * filter[3];
 401         sum = sum >> FILTER_BITS;
 402         if (sum<0) sum = 0; else if (sum>255) sum=255;
 403         dst[0] = sum;
 404         dst++;
 405         s++;
 406         dst_width--;
 407     }
 408 }
 409 #endif /* HAVE_ALTIVEC */
 410
 411 /* slow version to handle limit cases. Does not need optimisation */
 412 static void h_resample_slow(uint8_t *dst, int dst_width,
 413                             const uint8_t *src, int src_width,
 414                             int src_start, int src_incr, int16_t *filters)
 415 {
 416     int src_pos, phase, sum, j, v, i;
 417     const uint8_t *s, *src_end;
 418     int16_t *filter;
 419
 420     src_end = src + src_width;
 421     src_pos = src_start;
 422     for(i=0;i<dst_width;i++) {
 423         s = src + (src_pos >> POS_FRAC_BITS);
 424         phase = get_phase(src_pos);
 425         filter = filters + phase * NB_TAPS;
 426         sum = 0;
 427         for(j=0;j<NB_TAPS;j++) {
 428             if (s < src)
 429                 v = src[0];
 430             else if (s >= src_end)
 431                 v = src_end[-1];
 432             else
 433                 v = s[0];
 434             sum += v * filter[j];
 435             s++;
 436         }
 437         sum = sum >> FILTER_BITS;
 438         if (sum < 0)
 439             sum = 0;
 440         else if (sum > 255)
 441             sum = 255;
 442         dst[0] = sum;
 443         src_pos += src_incr;
 444         dst++;
 445     }
 446 }
 447
 448 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 449                        int src_width, int src_start, int src_incr,
 450                        int16_t *filters)
 451 {
 452     int n, src_end;
 453
 454     if (src_start < 0) {
 455         n = (0 - src_start + src_incr - 1) / src_incr;
 456         h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
 457         dst += n;
 458         dst_width -= n;
 459         src_start += n * src_incr;
 460     }
 461     src_end = src_start + dst_width * src_incr;
 462     if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
 463         n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
 464             src_incr;
 465     } else {
 466         n = dst_width;
 467     }
 468 #ifdef HAVE_MMX
 469     if ((mm_flags & MM_MMX) && NB_TAPS == 4)
 470         h_resample_fast4_mmx(dst, n,
 471                              src, src_width, src_start, src_incr, filters);
 472     else
 473 #endif
 474         h_resample_fast(dst, n,
 475                         src, src_width, src_start, src_incr, filters);
 476     if (n < dst_width) {
 477         dst += n;
 478         dst_width -= n;
 479         src_start += n * src_incr;
 480         h_resample_slow(dst, dst_width,
 481                         src, src_width, src_start, src_incr, filters);
 482     }
 483 }
 484
 485 static void component_resample(ImgReSampleContext *s,
 486                                uint8_t *output, int owrap, int owidth, int oheight,
 487                                uint8_t *input, int iwrap, int iwidth, int iheight)
 488 {
 489     int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
 490     uint8_t *new_line, *src_line;
 491
 492     last_src_y = - FCENTER - 1;
 493     /* position of the bottom of the filter in the source image */
 494     src_y = (last_src_y + NB_TAPS) * POS_FRAC;
 495     ring_y = NB_TAPS; /* position in ring buffer */
 496     for(y=0;y<oheight;y++) {
 497         /* apply horizontal filter on new lines from input if needed */
 498         src_y1 = src_y >> POS_FRAC_BITS;
 499         while (last_src_y < src_y1) {
 500             if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
 501                 ring_y = NB_TAPS;
 502             last_src_y++;
 503             /* handle limit conditions : replicate line (slightly
 504                inefficient because we filter multiple times) */
 505             y1 = last_src_y;
 506             if (y1 < 0) {
 507                 y1 = 0;
 508             } else if (y1 >= iheight) {
 509                 y1 = iheight - 1;
 510             }
 511             src_line = input + y1 * iwrap;
 512             new_line = s->line_buf + ring_y * owidth;
 513             /* apply filter and handle limit cases correctly */
 514             h_resample(new_line, owidth,
 515                        src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
 516                        &s->h_filters[0][0]);
 517             /* handle ring buffer wraping */
 518             if (ring_y >= LINE_BUF_HEIGHT) {
 519                 memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
 520                        new_line, owidth);
 521             }
 522         }
 523         /* apply vertical filter */
 524         phase_y = get_phase(src_y);
 525 #ifdef HAVE_MMX
 526         /* desactivated MMX because loss of precision */
 527         if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
 528             v_resample4_mmx(output, owidth,
 529                             s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 530                             &s->v_filters[phase_y][0]);
 531         else
 532 #endif
 533 #ifdef HAVE_ALTIVEC
 534             if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
 535                 v_resample16_altivec(output, owidth,
 536                                 s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 537                                 &s->v_filters[phase_y][0]);
 538         else
 539 #endif
 540             v_resample(output, owidth,
 541                        s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
 542                        &s->v_filters[phase_y][0]);
 543
 544         src_y += s->v_incr;
 545
 546         output += owrap;
 547     }
 548 }
 549
 550 ImgReSampleContext *img_resample_init(int owidth, int oheight,
 551                                       int iwidth, int iheight)
 552 {
 553     return img_resample_full_init(owidth, oheight, iwidth, iheight,
 554             0, 0, 0, 0, 0, 0, 0, 0);
 555 }
 556
 557 ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
 558                                       int iwidth, int iheight,
 559                                       int topBand, int bottomBand,
 560         int leftBand, int rightBand,
 561         int padtop, int padbottom,
 562         int padleft, int padright)
 563 {
 564     ImgReSampleContext *s;
 565
 566     if (!owidth || !oheight || !iwidth || !iheight)
 567         return NULL;
 568
 569     s = av_mallocz(sizeof(ImgReSampleContext));
 570     if (!s)
 571         return NULL;
 572     if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
 573         return NULL;
 574     s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
 575     if (!s->line_buf)
 576         goto fail;
 577
 578     s->owidth = owidth;
 579     s->oheight = oheight;
 580     s->iwidth = iwidth;
 581     s->iheight = iheight;
 582
 583     s->topBand = topBand;
 584     s->bottomBand = bottomBand;
 585     s->leftBand = leftBand;
 586     s->rightBand = rightBand;
 587
 588     s->padtop = padtop;
 589     s->padbottom = padbottom;
 590     s->padleft = padleft;
 591     s->padright = padright;
 592
 593     s->pad_owidth = owidth - (padleft + padright);
 594     s->pad_oheight = oheight - (padtop + padbottom);
 595
 596     s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
 597     s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
 598
 599     av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  /
 600             (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 601     av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
 602             (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 603
 604     return s;
 605 fail:
 606     av_free(s);
 607     return NULL;
 608 }
 609
 610 void img_resample(ImgReSampleContext *s,
 611                   AVPicture *output, const AVPicture *input)
 612 {
 613     int i, shift;
 614     uint8_t* optr;
 615
 616     for (i=0;i<3;i++) {
 617         shift = (i == 0) ? 0 : 1;
 618
 619         optr = output->data[i] + (((output->linesize[i] *
 620                         s->padtop) + s->padleft) >> shift);
 621
 622         component_resample(s, optr, output->linesize[i],
 623                 s->pad_owidth >> shift, s->pad_oheight >> shift,
 624                 input->data[i] + (input->linesize[i] *
 625                     (s->topBand >> shift)) + (s->leftBand >> shift),
 626                 input->linesize[i], ((s->iwidth - s->leftBand -
 627                         s->rightBand) >> shift),
 628                            (s->iheight - s->topBand - s->bottomBand) >> shift);
 629     }
 630 }
 631
 632 void img_resample_close(ImgReSampleContext *s)
 633 {
 634     av_free(s->line_buf);
 635     av_free(s);
 636 }
 637
 638 struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat,
 639                                   int dstW, int dstH, int dstFormat,
 640                                   int flags, SwsFilter *srcFilter,
 641                                   SwsFilter *dstFilter, double *param)
 642 {
 643     struct SwsContext *ctx;
 644
 645     ctx = av_malloc(sizeof(struct SwsContext));
 646     if (ctx)
 647         ctx->av_class = av_mallocz(sizeof(AVClass));
 648     if (!ctx || !ctx->av_class) {
 649         av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n");
 650
 651         return NULL;
 652     }
 653
 654     if ((srcH != dstH) || (srcW != dstW)) {
 655         if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) {
 656             av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n");
 657         }
 658         ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH);
 659     } else {
 660         ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext));
 661         ctx->resampling_ctx->iheight = srcH;
 662         ctx->resampling_ctx->iwidth = srcW;
 663         ctx->resampling_ctx->oheight = dstH;
 664         ctx->resampling_ctx->owidth = dstW;
 665     }
 666     ctx->src_pix_fmt = srcFormat;
 667     ctx->dst_pix_fmt = dstFormat;
 668
 669     return ctx;
 670 }
 671
 672 void sws_freeContext(struct SwsContext *ctx)
 673 {
 674     if (!ctx)
 675         return;
 676     if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
 677         (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
 678         img_resample_close(ctx->resampling_ctx);
 679     } else {
 680         av_free(ctx->resampling_ctx);
 681     }
 682     av_free(ctx->av_class);
 683     av_free(ctx);
 684 }
 685
 686
 687 /**
 688  * Checks if context is valid or reallocs a new one instead.
 689  * If context is NULL, just calls sws_getContext() to get a new one.
 690  * Otherwise, checks if the parameters are the same already saved in context.
 691  * If that is the case, returns the current context.
 692  * Otherwise, frees context and gets a new one.
 693  *
 694  * Be warned that srcFilter, dstFilter are not checked, they are
 695  * asumed to remain valid.
 696  */
 697 struct SwsContext *sws_getCachedContext(struct SwsContext *ctx,
 698                         int srcW, int srcH, int srcFormat,
 699                         int dstW, int dstH, int dstFormat, int flags,
 700                         SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
 701 {
 702     if (ctx != NULL) {
 703         if ((ctx->resampling_ctx->iwidth != srcW) ||
 704                         (ctx->resampling_ctx->iheight != srcH) ||
 705                         (ctx->src_pix_fmt != srcFormat) ||
 706                         (ctx->resampling_ctx->owidth != dstW) ||
 707                         (ctx->resampling_ctx->oheight != dstH) ||
 708                         (ctx->dst_pix_fmt != dstFormat))
 709         {
 710             sws_freeContext(ctx);
 711             ctx = NULL;
 712         }
 713     }
 714     if (ctx == NULL) {
 715         return sws_getContext(srcW, srcH, srcFormat,
 716                         dstW, dstH, dstFormat, flags,
 717                         srcFilter, dstFilter, param);
 718     }
 719     return ctx;
 720 }
 721
 722 int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[],
 723               int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
 724 {
 725     AVPicture src_pict, dst_pict;
 726     int i, res = 0;
 727     AVPicture picture_format_temp;
 728     AVPicture picture_resample_temp, *formatted_picture, *resampled_picture;
 729     uint8_t *buf1 = NULL, *buf2 = NULL;
 730     enum PixelFormat current_pix_fmt;
 731
 732     for (i = 0; i < 4; i++) {
 733         src_pict.data[i] = src[i];
 734         src_pict.linesize[i] = srcStride[i];
 735         dst_pict.data[i] = dst[i];
 736         dst_pict.linesize[i] = dstStride[i];
 737     }
 738     if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) ||
 739         (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) {
 740         /* We have to rescale the picture, but only YUV420P rescaling is supported... */
 741
 742         if (ctx->src_pix_fmt != PIX_FMT_YUV420P) {
 743             int size;
 744
 745             /* create temporary picture for rescaling input*/
 746             size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
 747             buf1 = av_malloc(size);
 748             if (!buf1) {
 749                 res = -1;
 750                 goto the_end;
 751             }
 752             formatted_picture = &picture_format_temp;
 753             avpicture_fill((AVPicture*)formatted_picture, buf1,
 754                            PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight);
 755
 756             if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P,
 757                             &src_pict, ctx->src_pix_fmt,
 758                             ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) {
 759
 760                 av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
 761                 res = -1;
 762                 goto the_end;
 763             }
 764         } else {
 765             formatted_picture = &src_pict;
 766         }
 767
 768         if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) {
 769             int size;
 770
 771             /* create temporary picture for rescaling output*/
 772             size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
 773             buf2 = av_malloc(size);
 774             if (!buf2) {
 775                 res = -1;
 776                 goto the_end;
 777             }
 778             resampled_picture = &picture_resample_temp;
 779             avpicture_fill((AVPicture*)resampled_picture, buf2,
 780                            PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
 781
 782         } else {
 783             resampled_picture = &dst_pict;
 784         }
 785
 786         /* ...and finally rescale!!! */
 787         img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture);
 788         current_pix_fmt = PIX_FMT_YUV420P;
 789     } else {
 790         resampled_picture = &src_pict;
 791         current_pix_fmt = ctx->src_pix_fmt;
 792     }
 793
 794     if (current_pix_fmt != ctx->dst_pix_fmt) {
 795         if (img_convert(&dst_pict, ctx->dst_pix_fmt,
 796                         resampled_picture, current_pix_fmt,
 797                         ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) {
 798
 799             av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n");
 800
 801             res = -1;
 802             goto the_end;
 803         }
 804     } else if (resampled_picture != &dst_pict) {
 805         av_picture_copy(&dst_pict, resampled_picture, current_pix_fmt,
 806                         ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight);
 807     }
 808
 809 the_end:
 810     av_free(buf1);
 811     av_free(buf2);
 812     return res;
 813 }
 814
 815
 816 #ifdef TEST
 817 #include <stdio.h>
 818 #undef exit
 819
 820 /* input */
 821 #define XSIZE 256
 822 #define YSIZE 256
 823 uint8_t img[XSIZE * YSIZE];
 824
 825 /* output */
 826 #define XSIZE1 512
 827 #define YSIZE1 512
 828 uint8_t img1[XSIZE1 * YSIZE1];
 829 uint8_t img2[XSIZE1 * YSIZE1];
 830
 831 void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
 832 {
 833 #undef fprintf
 834     FILE *f;
 835     f=fopen(filename,"w");
 836     fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
 837     fwrite(img,1, xsize * ysize,f);
 838     fclose(f);
 839 #define fprintf please_use_av_log
 840 }
 841
 842 static void dump_filter(int16_t *filter)
 843 {
 844     int i, ph;
 845
 846     for(ph=0;ph<NB_PHASES;ph++) {
 847         av_log(NULL, AV_LOG_INFO, "%2d: ", ph);
 848         for(i=0;i<NB_TAPS;i++) {
 849             av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0);
 850         }
 851         av_log(NULL, AV_LOG_INFO, "\n");
 852     }
 853 }
 854
 855 #ifdef HAVE_MMX
 856 int mm_flags;
 857 #endif
 858
 859 int main(int argc, char **argv)
 860 {
 861     int x, y, v, i, xsize, ysize;
 862     ImgReSampleContext *s;
 863     float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
 864     char buf[256];
 865
 866     /* build test image */
 867     for(y=0;y<YSIZE;y++) {
 868         for(x=0;x<XSIZE;x++) {
 869             if (x < XSIZE/2 && y < YSIZE/2) {
 870                 if (x < XSIZE/4 && y < YSIZE/4) {
 871                     if ((x % 10) <= 6 &&
 872                         (y % 10) <= 6)
 873                         v = 0xff;
 874                     else
 875                         v = 0x00;
 876                 } else if (x < XSIZE/4) {
 877                     if (x & 1)
 878                         v = 0xff;
 879                     else
 880                         v = 0;
 881                 } else if (y < XSIZE/4) {
 882                     if (y & 1)
 883                         v = 0xff;
 884                     else
 885                         v = 0;
 886                 } else {
 887                     if (y < YSIZE*3/8) {
 888                         if ((y+x) & 1)
 889                             v = 0xff;
 890                         else
 891                             v = 0;
 892                     } else {
 893                         if (((x+3) % 4) <= 1 &&
 894                             ((y+3) % 4) <= 1)
 895                             v = 0xff;
 896                         else
 897                             v = 0x00;
 898                     }
 899                 }
 900             } else if (x < XSIZE/2) {
 901                 v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
 902             } else if (y < XSIZE/2) {
 903                 v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
 904             } else {
 905                 v = ((x + y - XSIZE) * 255) / XSIZE;
 906             }
 907             img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
 908         }
 909     }
 910     save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
 911     for(i=0;i<sizeof(factors)/sizeof(float);i++) {
 912         fact = factors[i];
 913         xsize = (int)(XSIZE * fact);
 914         ysize = (int)((YSIZE - 100) * fact);
 915         s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0);
 916         av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact);
 917         dump_filter(&s->h_filters[0][0]);
 918         component_resample(s, img1, xsize, xsize, ysize,
 919                            img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
 920         img_resample_close(s);
 921
 922         snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i);
 923         save_pgm(buf, img1, xsize, ysize);
 924     }
 925
 926     /* mmx test */
 927 #ifdef HAVE_MMX
 928     av_log(NULL, AV_LOG_INFO, "MMX test\n");
 929     fact = 0.72;
 930     xsize = (int)(XSIZE * fact);
 931     ysize = (int)(YSIZE * fact);
 932     mm_flags = MM_MMX;
 933     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 934     component_resample(s, img1, xsize, xsize, ysize,
 935                        img, XSIZE, XSIZE, YSIZE);
 936
 937     mm_flags = 0;
 938     s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
 939     component_resample(s, img2, xsize, xsize, ysize,
 940                        img, XSIZE, XSIZE, YSIZE);
 941     if (memcmp(img1, img2, xsize * ysize) != 0) {
 942         av_log(NULL, AV_LOG_ERROR, "mmx error\n");
 943         exit(1);
 944     }
 945     av_log(NULL, AV_LOG_INFO, "MMX OK\n");
 946 #endif /* HAVE_MMX */
 947     return 0;
 948 }
 949
 950 #endif /* TEST */