libmpeg2/libmpeg-0.4.1.diff

   1 --- libmpeg2/cpu_accel.c        2006-06-16 20:12:26.000000000 +0200
   2 +++ libmpeg2/cpu_accel.c        2006-06-16 20:12:50.000000000 +0200
   3 @@ -22,6 +26,7 @@
   4   */
   5
   6  #include "config.h"
   7 +#include "cpudetect.h"
   8
   9  #include <inttypes.h>
  10
  11 @@ -30,9 +35,17 @@
  12  #include "mpeg2_internal.h"
  13
  14  #ifdef ACCEL_DETECT
  15 -#ifdef ARCH_X86
  16 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
  17 +
  18 +/* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow!
  19 + * instructions via assembly. However, it is regarded as duplicated work
  20 + * in MPlayer, so that we enforce using MPlayer's implementation.
  21 + */
  22 +#define USE_MPLAYER_CPUDETECT
  23 +
  24  static inline uint32_t arch_accel (void)
  25  {
  26 +#if !defined(USE_MPLAYER_CPUDETECT)
  27      uint32_t eax, ebx, ecx, edx;
  28      int AMD;
  29      uint32_t caps;
  30 @@ -105,7 +120,21 @@
  31         caps |= MPEG2_ACCEL_X86_MMXEXT;
  32
  33      return caps;
  34 +#else /* USE_MPLAYER_CPUDETECT: Use MPlayer's CPU capability property. */
  35 +    caps = 0;
  36 +    if (gCpuCaps.hasMMX)
  37 +        caps |= MPEG2_ACCEL_X86_MMX;
  38 +    if (gCpuCaps.hasSSE2)
  39 +       caps |= MPEG2_ACCEL_X86_SSE2;
  40 +    if (gCpuCaps.hasMMX2)
  41 +       caps |= MPEG2_ACCEL_X86_MMXEXT;
  42 +    if (gCpuCaps.has3DNow)
  43 +       caps |= MPEG2_ACCEL_X86_3DNOW;
  44 +
  45 +    return caps;
  46 +
  47 +#endif /* USE_MPLAYER_CPUDETECT */
  48  }
  49 -#endif /* ARCH_X86 */
  50 +#endif /* ARCH_X86 || ARCH_X86_64 */
  51
  52  #if defined(ARCH_PPC) || defined(ARCH_SPARC)
  53 @@ -166,10 +168,10 @@
  54
  55      canjump = 1;
  56
  57 -#ifdef HAVE_ALTIVEC_H  /* gnu */
  58 -#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
  59 -#else                  /* apple */
  60 +#if defined(__APPLE_CC__)      /* apple */
  61  #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
  62 +#else                  /* gnu */
  63 +#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
  64  #endif
  65      asm volatile ("mtspr 256, %0\n\t"
  66                   VAND (0, 0, 0)
  67 @@ -212,7 +241,7 @@
  68
  69      accel = 0;
  70  #ifdef ACCEL_DETECT
  71 -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
  72 +#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
  73      accel = arch_accel ();
  74  #endif
  75  #endif
  76 --- libmpeg2/cpu_state.c        2006-06-16 20:12:26.000000000 +0200
  77 +++ libmpeg2/cpu_state.c        2006-06-16 20:12:50.000000000 +0200
  78 @@ -29,14 +33,14 @@
  79  #include "mpeg2.h"
  80  #include "attributes.h"
  81  #include "mpeg2_internal.h"
  82 -#ifdef ARCH_X86
  83 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
  84  #include "mmx.h"
  85  #endif
  86
  87  void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
  88  void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
  89
  90 -#ifdef ARCH_X86
  91 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
  92  static void state_restore_mmx (cpu_state_t * state)
  93  {
  94      emms ();
  95 @@ -48,18 +48,18 @@
  96  #endif
  97
  98  #ifdef ARCH_PPC
  99 -#ifdef HAVE_ALTIVEC_H  /* gnu */
 100 -#define LI(a,b) "li " #a "," #b "\n\t"
 101 -#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
 102 -#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
 103 -#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
 104 -#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
 105 -#else                  /* apple */
 106 +#if defined(__APPLE_CC__)      /* apple */
 107  #define LI(a,b) "li r" #a "," #b "\n\t"
 108  #define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t"
 109  #define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t"
 110  #define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t"
 111  #define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t"
 112 +#else                  /* gnu */
 113 +#define LI(a,b) "li " #a "," #b "\n\t"
 114 +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
 115 +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
 116 +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
 117 +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
 118  #endif
 119
 120  static void state_save_altivec (cpu_state_t * state)
 121 @@ -115,9 +119,9 @@
 122
 123  void mpeg2_cpu_state_init (uint32_t accel)
 124  {
 125 -#ifdef ARCH_X86
 126 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
 127      if (accel & MPEG2_ACCEL_X86_MMX) {
 128         mpeg2_cpu_state_restore = state_restore_mmx;
 129      }
 130  #endif
 131  #ifdef ARCH_PPC
 132 --- libmpeg2/decode.c   2006-06-16 20:12:26.000000000 +0200
 133 +++ libmpeg2/decode.c   2006-06-16 20:12:50.000000000 +0200
 134 @@ -351,6 +355,15 @@
 135      fbuf->buf[1] = buf[1];
 136      fbuf->buf[2] = buf[2];
 137      fbuf->id = id;
 138 +    // HACK! FIXME! At first I frame, copy pointers to prediction frame too!
 139 +    if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){
 140 +       mpeg2dec->fbuf[1]->buf[0]=buf[0];
 141 +       mpeg2dec->fbuf[1]->buf[1]=buf[1];
 142 +       mpeg2dec->fbuf[1]->buf[2]=buf[2];
 143 +       mpeg2dec->fbuf[1]->id=NULL;
 144 +    }
 145 +//        printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n",
 146 +//         mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]);
 147  }
 148
 149  void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf)
 150 --- libmpeg2/header.c   2006-06-16 20:12:26.000000000 +0200
 151 +++ libmpeg2/header.c   2006-06-16 20:12:50.000000000 +0200
 152 @@ -100,6 +104,9 @@
 153      mpeg2dec->decoder.convert = NULL;
 154      mpeg2dec->decoder.convert_id = NULL;
 155      mpeg2dec->picture = mpeg2dec->pictures;
 156 +    memset(&mpeg2dec->fbuf_alloc[0].fbuf, 0, sizeof(mpeg2_fbuf_t));
 157 +    memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t));
 158 +    memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t));
 159      mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf;
 160      mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf;
 161      mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf;
 162 @@ -551,6 +558,7 @@
 163         if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) {
 164             picture->nb_fields = (buffer[3] & 2) ? 3 : 2;
 165             flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0;
 166 +           flags |= (buffer[3] &   2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0;
 167         } else
 168             picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2;
 169         break;
 170 @@ -799,6 +807,7 @@
 171         mpeg2dec->scaled[index] = mpeg2dec->q_scale_type;
 172         for (i = 0; i < 32; i++) {
 173             k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1);
 174 +           decoder->quantizer_scales[i] = k;
 175             for (j = 0; j < 64; j++)
 176                 decoder->quantizer_prescale[index][i][j] =
 177                     k * mpeg2dec->quantizer_matrix[index][j];
 178 --- libmpeg2/idct.c     (revision 26652)
 179 +++ libmpeg2/idct.c     (working copy)
 180 @@ -250,7 +254,7 @@
 181         mpeg2_idct_mmx_init ();
 182      } else
 183  #endif
 184 -#ifdef ARCH_PPC
 185 +#ifdef HAVE_ALTIVEC
 186      if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
 187         mpeg2_idct_copy = mpeg2_idct_copy_altivec;
 188         mpeg2_idct_add = mpeg2_idct_add_altivec;
 189 --- libmpeg2/idct_mmx.c 2006-06-16 20:12:26.000000000 +0200
 190 +++ libmpeg2/idct_mmx.c 2006-06-16 20:12:50.000000000 +0200
 191 @@ -23,7 +27,7 @@
 192
 193  #include "config.h"
 194
 195 -#ifdef ARCH_X86
 196 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
 197
 198  #include <inttypes.h>
 199
 200 --- libmpeg2/motion_comp.c      2006-06-16 20:12:26.000000000 +0200
 201 +++ libmpeg2/motion_comp.c      2006-06-16 20:12:50.000000000 +0200
 202 @@ -46,7 +46,7 @@
 203         mpeg2_mc = mpeg2_mc_mmx;
 204      else
 205  #endif
 206 -#ifdef ARCH_PPC
 207 +#ifdef HAVE_ALTIVEC
 208      if (accel & MPEG2_ACCEL_PPC_ALTIVEC)
 209         mpeg2_mc = mpeg2_mc_altivec;
 210      else
 211 @@ -67,6 +61,13 @@
 212         mpeg2_mc = mpeg2_mc_vis;
 213      else
 214  #endif
 215 +#ifdef ARCH_ARM
 216 +    if (accel & MPEG2_ACCEL_ARM_IWMMXT)
 217 +       mpeg2_mc = mpeg2_mc_iwmmxt;
 218 +    else if (accel & MPEG2_ACCEL_ARM)
 219 +       mpeg2_mc = mpeg2_mc_arm;
 220 +    else
 221 +#endif
 222         mpeg2_mc = mpeg2_mc_c;
 223  }
 224
 225 --- libmpeg2/motion_comp_mmx.c  2006-06-16 20:12:26.000000000 +0200
 226 +++ libmpeg2/motion_comp_mmx.c  2006-06-16 20:12:50.000000000 +0200
 227 @@ -23,7 +27,7 @@
 228
 229  #include "config.h"
 230
 231 -#ifdef ARCH_X86
 232 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
 233
 234  #include <inttypes.h>
 235
 236 --- include/mpeg2.h     2006-06-16 20:12:26.000000000 +0200
 237 +++ libmpeg2/mpeg2.h    2006-06-16 20:12:50.000000000 +0200
 238 @@ -82,6 +86,7 @@
 239  #define PIC_FLAG_COMPOSITE_DISPLAY 32
 240  #define PIC_FLAG_SKIP 64
 241  #define PIC_FLAG_TAGS 128
 242 +#define PIC_FLAG_REPEAT_FIRST_FIELD 256
 243  #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000
 244
 245  typedef struct mpeg2_picture_s {
 246 @@ -156,10 +160,13 @@
 247  #define MPEG2_ACCEL_X86_3DNOW 2
 248  #define MPEG2_ACCEL_X86_MMXEXT 4
 249 +#define MPEG2_ACCEL_X86_SSE2 8
 250  #define MPEG2_ACCEL_PPC_ALTIVEC 1
 251  #define MPEG2_ACCEL_ALPHA 1
 252  #define MPEG2_ACCEL_ALPHA_MVI 2
 253  #define MPEG2_ACCEL_SPARC_VIS 1
 254  #define MPEG2_ACCEL_SPARC_VIS2 2
 255 +#define MPEG2_ACCEL_ARM 1
 256 +#define MPEG2_ACCEL_ARM_IWMMXT 2
 257  #define MPEG2_ACCEL_DETECT 0x80000000
 258
 259  uint32_t mpeg2_accel (uint32_t accel);
 260 --- libmpeg2/mpeg2_internal.h   2006-06-16 20:12:26.000000000 +0200
 261 +++ libmpeg2/mpeg2_internal.h   2006-06-16 20:12:50.000000000 +0200
 262 @@ -144,6 +148,11 @@
 263      int second_field;
 264
 265      int mpeg1;
 266 +
 267 +    int quantizer_scales[32];
 268 +    int quantizer_scale;
 269 +    char* quant_store;
 270 +    int quant_stride;
 271  };
 272
 273  typedef struct {
 274 @@ -214,6 +224,9 @@
 275      int8_t q_scale_type, scaled[4];
 276      uint8_t quantizer_matrix[4][64];
 277      uint8_t new_quantizer_matrix[4][64];
 278 +
 279 +    unsigned char *pending_buffer;
 280 +    int pending_length;
 281  };
 282
 283  typedef struct {
 284 @@ -312,3 +312,5 @@
 285  extern mpeg2_mc_t mpeg2_mc_altivec;
 286  extern mpeg2_mc_t mpeg2_mc_alpha;
 287  extern mpeg2_mc_t mpeg2_mc_vis;
 288 +extern mpeg2_mc_t mpeg2_mc_arm;
 289 +extern mpeg2_mc_t mpeg2_mc_iwmmxt;
 290 --- libmpeg2/slice.c    2006-06-16 20:12:26.000000000 +0200
 291 +++ libmpeg2/slice.c    2006-06-16 20:12:50.000000000 +0200
 292 @@ -142,6 +146,7 @@
 293
 294      quantizer_scale_code = UBITS (bit_buf, 5);
 295      DUMPBITS (bit_buf, bits, 5);
 296 +    decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code];
 297
 298      decoder->quantizer_matrix[0] =
 299         decoder->quantizer_prescale[0][quantizer_scale_code];
 300 @@ -1568,6 +1569,18 @@
 301
 302  #define NEXT_MACROBLOCK                                                        \
 303  do {                                                                   \
 304 +    if(decoder->quant_store) {                                          \
 305 +       if (decoder->picture_structure == TOP_FIELD)                     \
 306 +        decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \
 307 +                    +(decoder->offset>>4)] = decoder->quantizer_scale;  \
 308 +       else if (decoder->picture_structure == BOTTOM_FIELD)             \
 309 +        decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \
 310 +                   + decoder->quant_stride                             \
 311 +                    +(decoder->offset>>4)] = decoder->quantizer_scale;  \
 312 +       else                                                             \
 313 +        decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \
 314 +                    +(decoder->offset>>4)] = decoder->quantizer_scale;  \
 315 +    }                                                                   \
 316      decoder->offset += 16;                                             \
 317      if (decoder->offset == decoder->width) {                           \
 318         do { /* just so we can use the break statement */               \
 319 @@ -1604,6 +1604,12 @@
 320      }                                                                  \
 321  } while (0)
 322
 323 +static void motion_dummy (mpeg2_decoder_t * const decoder,
 324 +                          motion_t * const motion,
 325 +                          mpeg2_mc_fct * const * const table)
 326 +{
 327 +}
 328 +
 329  void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
 330                       uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
 331  {
 332 @@ -1661,7 +1667,9 @@
 333
 334      if (decoder->mpeg1) {
 335         decoder->motion_parser[0] = motion_zero_420;
 336 +        decoder->motion_parser[MC_FIELD] = motion_dummy;
 337         decoder->motion_parser[MC_FRAME] = motion_mp1;
 338 +        decoder->motion_parser[MC_DMV] = motion_dummy;
 339         decoder->motion_parser[4] = motion_reuse_420;
 340      } else if (decoder->picture_structure == FRAME_PICTURE) {
 341         if (decoder->chroma_format == 0) {
 342 --- libmpeg2/idct_altivec.c     2004/08/02 11:26:43     12933
 343 +++ libmpeg2/idct_altivec.c     2005/05/15 20:11:34     15484
 344 @@ -41,7 +41,7 @@
 345  typedef vector signed int vector_s32_t;
 346  typedef vector unsigned int vector_u32_t;
 347
 348 -#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
 349 +#if defined(HAVE_ALTIVEC_H) && !defined(__APPLE_CC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
 350  /* work around gcc <3.3 vec_mergel bug */
 351  static inline vector_s16_t my_vec_mergel (vector_s16_t const A,
 352                                           vector_s16_t const B)
 353 @@ -56,10 +56,10 @@
 354  #define vec_mergel my_vec_mergel
 355  #endif
 356
 357 -#ifdef HAVE_ALTIVEC_H  /* gnu */
 358 -#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
 359 -#else                  /* apple */
 360 +#if defined(__APPLE_CC__)      /* apple */
 361  #define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h)
 362 +#else                  /* gnu */
 363 +#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
 364  #endif
 365
 366  static const vector_s16_t constants ATTR_ALIGN(16) =
 367 Index: libmpeg2/motion_comp_arm.c
 368 ===================================================================
 369 --- libmpeg2/motion_comp_arm.c  (revision 0)
 370 +++ libmpeg2/motion_comp_arm.c  (revision 0)
 371 @@ -0,0 +1,187 @@
 372 +/*
 373 + * motion_comp_arm.c
 374 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
 375 + *
 376 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 377 + * See http://libmpeg2.sourceforge.net/ for updates.
 378 + *
 379 + * mpeg2dec is free software; you can redistribute it and/or modify
 380 + * it under the terms of the GNU General Public License as published by
 381 + * the Free Software Foundation; either version 2 of the License, or
 382 + * (at your option) any later version.
 383 + *
 384 + * mpeg2dec is distributed in the hope that it will be useful,
 385 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 386 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 387 + * GNU General Public License for more details.
 388 + *
 389 + * You should have received a copy of the GNU General Public License
 390 + * along with this program; if not, write to the Free Software
 391 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 392 + */
 393 +
 394 +#include "config.h"
 395 +
 396 +#ifdef ARCH_ARM
 397 +
 398 +#include <inttypes.h>
 399 +
 400 +#include "mpeg2.h"
 401 +#include "attributes.h"
 402 +#include "mpeg2_internal.h"
 403 +
 404 +#define avg2(a,b) ((a+b+1)>>1)
 405 +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
 406 +
 407 +#define predict_o(i) (ref[i])
 408 +#define predict_x(i) (avg2 (ref[i], ref[i+1]))
 409 +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
 410 +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
 411 +                            (ref+stride)[i], (ref+stride)[i+1]))
 412 +
 413 +#define put(predictor,i) dest[i] = predictor (i)
 414 +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
 415 +
 416 +/* mc function template */
 417 +
 418 +#define MC_FUNC(op,xy)                                                 \
 419 +static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \
 420 +                                  const int stride, int height)        \
 421 +{                                                                      \
 422 +    do {                                                               \
 423 +       op (predict_##xy, 0);                                           \
 424 +       op (predict_##xy, 1);                                           \
 425 +       op (predict_##xy, 2);                                           \
 426 +       op (predict_##xy, 3);                                           \
 427 +       op (predict_##xy, 4);                                           \
 428 +       op (predict_##xy, 5);                                           \
 429 +       op (predict_##xy, 6);                                           \
 430 +       op (predict_##xy, 7);                                           \
 431 +       op (predict_##xy, 8);                                           \
 432 +       op (predict_##xy, 9);                                           \
 433 +       op (predict_##xy, 10);                                          \
 434 +       op (predict_##xy, 11);                                          \
 435 +       op (predict_##xy, 12);                                          \
 436 +       op (predict_##xy, 13);                                          \
 437 +       op (predict_##xy, 14);                                          \
 438 +       op (predict_##xy, 15);                                          \
 439 +       ref += stride;                                                  \
 440 +       dest += stride;                                                 \
 441 +    } while (--height);                                                        \
 442 +}                                                                      \
 443 +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \
 444 +                                 const int stride, int height)         \
 445 +{                                                                      \
 446 +    do {                                                               \
 447 +       op (predict_##xy, 0);                                           \
 448 +       op (predict_##xy, 1);                                           \
 449 +       op (predict_##xy, 2);                                           \
 450 +       op (predict_##xy, 3);                                           \
 451 +       op (predict_##xy, 4);                                           \
 452 +       op (predict_##xy, 5);                                           \
 453 +       op (predict_##xy, 6);                                           \
 454 +       op (predict_##xy, 7);                                           \
 455 +       ref += stride;                                                  \
 456 +       dest += stride;                                                 \
 457 +    } while (--height);                                                        \
 458 +}                                                                      \
 459 +/* definitions of the actual mc functions */
 460 +
 461 +MC_FUNC (put,o)
 462 +MC_FUNC (avg,o)
 463 +MC_FUNC (put,x)
 464 +MC_FUNC (avg,x)
 465 +MC_FUNC (put,y)
 466 +MC_FUNC (avg,y)
 467 +MC_FUNC (put,xy)
 468 +MC_FUNC (avg,xy)
 469 +
 470 +
 471 +extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref,
 472 +                            int stride, int height);
 473 +
 474 +extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref,
 475 +                            int stride, int height);
 476 +
 477 +
 478 +static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref,
 479 +                             int stride, int height)
 480 +{
 481 +    MC_put_y_16_c(dest, ref, stride, height);
 482 +}
 483 +
 484 +static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref,
 485 +                              int stride, int height)
 486 +{
 487 +    MC_put_xy_16_c(dest, ref, stride, height);
 488 +}
 489 +
 490 +extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref,
 491 +                               int stride, int height);
 492 +
 493 +extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref,
 494 +                           int stride, int height);
 495 +
 496 +static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref,
 497 +                            int stride, int height)
 498 +{
 499 +    MC_put_y_8_c(dest, ref, stride, height);
 500 +}
 501 +
 502 +static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref,
 503 +                             int stride, int height)
 504 +{
 505 +    MC_put_xy_8_c(dest, ref, stride, height);
 506 +}
 507 +
 508 +static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref,
 509 +                             int stride, int height)
 510 +{
 511 +    MC_avg_o_16_c(dest, ref, stride, height);
 512 +}
 513 +
 514 +static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref,
 515 +                             int stride, int height)
 516 +{
 517 +    MC_avg_x_16_c(dest, ref, stride, height);
 518 +}
 519 +
 520 +static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref,
 521 +                             int stride, int height)
 522 +{
 523 +    MC_avg_y_16_c(dest, ref, stride, height);
 524 +}
 525 +
 526 +static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref,
 527 +                              int stride, int height)
 528 +{
 529 +    MC_avg_xy_16_c(dest, ref, stride, height);
 530 +}
 531 +
 532 +static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref,
 533 +                            int stride, int height)
 534 +{
 535 +    MC_avg_o_8_c(dest, ref, stride, height);
 536 +}
 537 +
 538 +static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref,
 539 +                            int stride, int height)
 540 +{
 541 +    MC_avg_x_8_c(dest, ref, stride, height);
 542 +}
 543 +
 544 +static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref,
 545 +                            int stride, int height)
 546 +{
 547 +    MC_avg_y_8_c(dest, ref, stride, height);
 548 +}
 549 +
 550 +static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref,
 551 +                             int stride, int height)
 552 +{
 553 +    MC_avg_xy_8_c(dest, ref, stride, height);
 554 +}
 555 +
 556 +MPEG2_MC_EXTERN (arm)
 557 +
 558 +#endif
 559 Index: libmpeg2/motion_comp_arm_s.S
 560 ===================================================================
 561 --- libmpeg2/motion_comp_arm_s.S        (revision 0)
 562 +++ libmpeg2/motion_comp_arm_s.S        (revision 0)
 563 @@ -0,0 +1,322 @@
 564 +@ motion_comp_arm_s.S
 565 +@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
 566 +@
 567 +@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 568 +@ See http://libmpeg2.sourceforge.net/ for updates.
 569 +@
 570 +@ mpeg2dec is free software; you can redistribute it and/or modify
 571 +@ it under the terms of the GNU General Public License as published by
 572 +@ the Free Software Foundation; either version 2 of the License, or
 573 +@ (at your option) any later version.
 574 +@
 575 +@ mpeg2dec is distributed in the hope that it will be useful,
 576 +@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 577 +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 578 +@ GNU General Public License for more details.
 579 +@
 580 +@ You should have received a copy of the GNU General Public License
 581 +@ along with this program; if not, write to the Free Software
 582 +@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 583 +
 584 +       .text
 585 +
 586 +@ ----------------------------------------------------------------
 587 +       .align
 588 +       .global MC_put_o_16_arm
 589 +MC_put_o_16_arm:
 590 +       @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 591 +       pld [r1]
 592 +        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
 593 +       and r4, r1, #3
 594 +       adr r5, MC_put_o_16_arm_align_jt
 595 +       add r5, r5, r4, lsl #2
 596 +       ldr pc, [r5]
 597 +
 598 +MC_put_o_16_arm_align0:
 599 +       ldmia r1, {r4-r7}
 600 +       add r1, r1, r2
 601 +       pld [r1]
 602 +       stmia r0, {r4-r7}
 603 +       subs r3, r3, #1
 604 +       add r0, r0, r2
 605 +       bne MC_put_o_16_arm_align0
 606 +        ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
 607 +
 608 +.macro PROC shift
 609 +       ldmia r1, {r4-r8}
 610 +       add r1, r1, r2
 611 +       mov r9, r4, lsr #(\shift)
 612 +       pld [r1]
 613 +       mov r10, r5, lsr #(\shift)
 614 +       orr r9, r9, r5, lsl #(32-\shift)
 615 +       mov r11, r6, lsr #(\shift)
 616 +       orr r10, r10, r6, lsl #(32-\shift)
 617 +       mov r12, r7, lsr #(\shift)
 618 +       orr r11, r11, r7, lsl #(32-\shift)
 619 +       orr r12, r12, r8, lsl #(32-\shift)
 620 +       stmia r0, {r9-r12}
 621 +       subs r3, r3, #1
 622 +       add r0, r0, r2
 623 +.endm
 624 +
 625 +MC_put_o_16_arm_align1:
 626 +       and r1, r1, #0xFFFFFFFC
 627 +1:     PROC(8)
 628 +       bne 1b
 629 +        ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
 630 +MC_put_o_16_arm_align2:
 631 +       and r1, r1, #0xFFFFFFFC
 632 +1:     PROC(16)
 633 +       bne 1b
 634 +        ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
 635 +MC_put_o_16_arm_align3:
 636 +       and r1, r1, #0xFFFFFFFC
 637 +1:     PROC(24)
 638 +       bne 1b
 639 +        ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
 640 +MC_put_o_16_arm_align_jt:
 641 +       .word MC_put_o_16_arm_align0
 642 +       .word MC_put_o_16_arm_align1
 643 +       .word MC_put_o_16_arm_align2
 644 +       .word MC_put_o_16_arm_align3
 645 +
 646 +@ ----------------------------------------------------------------
 647 +       .align
 648 +       .global MC_put_o_8_arm
 649 +MC_put_o_8_arm:
 650 +       @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 651 +       pld [r1]
 652 +        stmfd sp!, {r4-r10, lr} @ R14 is also called LR
 653 +       and r4, r1, #3
 654 +       adr r5, MC_put_o_8_arm_align_jt
 655 +       add r5, r5, r4, lsl #2
 656 +       ldr pc, [r5]
 657 +MC_put_o_8_arm_align0:
 658 +       ldmia r1, {r4-r5}
 659 +       add r1, r1, r2
 660 +       pld [r1]
 661 +       stmia r0, {r4-r5}
 662 +       add r0, r0, r2
 663 +       subs r3, r3, #1
 664 +       bne MC_put_o_8_arm_align0
 665 +        ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 666 +
 667 +.macro PROC8 shift
 668 +       ldmia r1, {r4-r6}
 669 +       add r1, r1, r2
 670 +       mov r9, r4, lsr #(\shift)
 671 +       pld [r1]
 672 +       mov r10, r5, lsr #(\shift)
 673 +       orr r9, r9, r5, lsl #(32-\shift)
 674 +       orr r10, r10, r6, lsl #(32-\shift)
 675 +       stmia r0, {r9-r10}
 676 +       subs r3, r3, #1
 677 +       add r0, r0, r2
 678 +.endm
 679 +
 680 +MC_put_o_8_arm_align1:
 681 +       and r1, r1, #0xFFFFFFFC
 682 +1:     PROC8(8)
 683 +       bne 1b
 684 +        ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 685 +
 686 +MC_put_o_8_arm_align2:
 687 +       and r1, r1, #0xFFFFFFFC
 688 +1:     PROC8(16)
 689 +       bne 1b
 690 +        ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 691 +
 692 +MC_put_o_8_arm_align3:
 693 +       and r1, r1, #0xFFFFFFFC
 694 +1:     PROC8(24)
 695 +       bne 1b
 696 +        ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 697 +
 698 +MC_put_o_8_arm_align_jt:
 699 +       .word MC_put_o_8_arm_align0
 700 +       .word MC_put_o_8_arm_align1
 701 +       .word MC_put_o_8_arm_align2
 702 +       .word MC_put_o_8_arm_align3
 703 +
 704 +@ ----------------------------------------------------------------
 705 +.macro AVG_PW rW1, rW2
 706 +       mov \rW2, \rW2, lsl #24
 707 +       orr \rW2, \rW2, \rW1, lsr #8
 708 +       eor r9, \rW1, \rW2
 709 +       and \rW2, \rW1, \rW2
 710 +       and r10, r9, r12
 711 +       add \rW2, \rW2, r10, lsr #1
 712 +       and r10, r9, r11
 713 +       add \rW2, \rW2, r10
 714 +.endm
 715 +
 716 +       .align
 717 +       .global MC_put_x_16_arm
 718 +MC_put_x_16_arm:
 719 +       @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 720 +       pld [r1]
 721 +        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
 722 +       and r4, r1, #3
 723 +       adr r5, MC_put_x_16_arm_align_jt
 724 +       ldr r11, [r5]
 725 +       mvn r12, r11
 726 +       add r5, r5, r4, lsl #2
 727 +       ldr pc, [r5, #4]
 728 +
 729 +.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
 730 +       mov \R0, \R0, lsr #(\shift)
 731 +       orr \R0, \R0, \R1, lsl #(32 - \shift)
 732 +       mov \R1, \R1, lsr #(\shift)
 733 +       orr \R1, \R1, \R2, lsl #(32 - \shift)
 734 +       mov \R2, \R2, lsr #(\shift)
 735 +       orr \R2, \R2, \R3, lsl #(32 - \shift)
 736 +       mov \R3, \R3, lsr #(\shift)
 737 +       orr \R3, \R3, \R4, lsl #(32 - \shift)
 738 +       mov \R4, \R4, lsr #(\shift)
 739 +@      and \R4, \R4, #0xFF
 740 +.endm
 741 +
 742 +MC_put_x_16_arm_align0:
 743 +       ldmia r1, {r4-r8}
 744 +       add r1, r1, r2
 745 +       pld [r1]
 746 +       AVG_PW r7, r8
 747 +       AVG_PW r6, r7
 748 +       AVG_PW r5, r6
 749 +       AVG_PW r4, r5
 750 +       stmia r0, {r5-r8}
 751 +       subs r3, r3, #1
 752 +       add r0, r0, r2
 753 +       bne MC_put_x_16_arm_align0
 754 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 755 +MC_put_x_16_arm_align1:
 756 +       and r1, r1, #0xFFFFFFFC
 757 +1:     ldmia r1, {r4-r8}
 758 +       add r1, r1, r2
 759 +       pld [r1]
 760 +       ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8
 761 +       AVG_PW r7, r8
 762 +       AVG_PW r6, r7
 763 +       AVG_PW r5, r6
 764 +       AVG_PW r4, r5
 765 +       stmia r0, {r5-r8}
 766 +       subs r3, r3, #1
 767 +       add r0, r0, r2
 768 +       bne 1b
 769 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 770 +MC_put_x_16_arm_align2:
 771 +       and r1, r1, #0xFFFFFFFC
 772 +1:     ldmia r1, {r4-r8}
 773 +       add r1, r1, r2
 774 +       pld [r1]
 775 +       ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8
 776 +       AVG_PW r7, r8
 777 +       AVG_PW r6, r7
 778 +       AVG_PW r5, r6
 779 +       AVG_PW r4, r5
 780 +       stmia r0, {r5-r8}
 781 +       subs r3, r3, #1
 782 +       add r0, r0, r2
 783 +       bne 1b
 784 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 785 +MC_put_x_16_arm_align3:
 786 +       and r1, r1, #0xFFFFFFFC
 787 +1:     ldmia r1, {r4-r8}
 788 +       add r1, r1, r2
 789 +       pld [r1]
 790 +       ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8
 791 +       AVG_PW r7, r8
 792 +       AVG_PW r6, r7
 793 +       AVG_PW r5, r6
 794 +       AVG_PW r4, r5
 795 +       stmia r0, {r5-r8}
 796 +       subs r3, r3, #1
 797 +       add r0, r0, r2
 798 +       bne 1b
 799 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 800 +MC_put_x_16_arm_align_jt:
 801 +       .word 0x01010101
 802 +       .word MC_put_x_16_arm_align0
 803 +       .word MC_put_x_16_arm_align1
 804 +       .word MC_put_x_16_arm_align2
 805 +       .word MC_put_x_16_arm_align3
 806 +
 807 +@ ----------------------------------------------------------------
 808 +       .align
 809 +       .global MC_put_x_8_arm
 810 +MC_put_x_8_arm:
 811 +       @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 812 +       pld [r1]
 813 +        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
 814 +       and r4, r1, #3
 815 +       adr r5, MC_put_x_8_arm_align_jt
 816 +       ldr r11, [r5]
 817 +       mvn r12, r11
 818 +       add r5, r5, r4, lsl #2
 819 +       ldr pc, [r5, #4]
 820 +
 821 +.macro ADJ_ALIGN_DW shift, R0, R1, R2
 822 +       mov \R0, \R0, lsr #(\shift)
 823 +       orr \R0, \R0, \R1, lsl #(32 - \shift)
 824 +       mov \R1, \R1, lsr #(\shift)
 825 +       orr \R1, \R1, \R2, lsl #(32 - \shift)
 826 +       mov \R2, \R2, lsr #(\shift)
 827 +@      and \R4, \R4, #0xFF
 828 +.endm
 829 +
 830 +MC_put_x_8_arm_align0:
 831 +       ldmia r1, {r4-r6}
 832 +       add r1, r1, r2
 833 +       pld [r1]
 834 +       AVG_PW r5, r6
 835 +       AVG_PW r4, r5
 836 +       stmia r0, {r5-r6}
 837 +       subs r3, r3, #1
 838 +       add r0, r0, r2
 839 +       bne MC_put_x_8_arm_align0
 840 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 841 +MC_put_x_8_arm_align1:
 842 +       and r1, r1, #0xFFFFFFFC
 843 +1:     ldmia r1, {r4-r6}
 844 +       add r1, r1, r2
 845 +       pld [r1]
 846 +       ADJ_ALIGN_DW 8, r4, r5, r6
 847 +       AVG_PW r5, r6
 848 +       AVG_PW r4, r5
 849 +       stmia r0, {r5-r6}
 850 +       subs r3, r3, #1
 851 +       add r0, r0, r2
 852 +       bne 1b
 853 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 854 +MC_put_x_8_arm_align2:
 855 +       and r1, r1, #0xFFFFFFFC
 856 +1:     ldmia r1, {r4-r6}
 857 +       add r1, r1, r2
 858 +       pld [r1]
 859 +       ADJ_ALIGN_DW 16, r4, r5, r6
 860 +       AVG_PW r5, r6
 861 +       AVG_PW r4, r5
 862 +       stmia r0, {r5-r6}
 863 +       subs r3, r3, #1
 864 +       add r0, r0, r2
 865 +       bne 1b
 866 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 867 +MC_put_x_8_arm_align3:
 868 +       and r1, r1, #0xFFFFFFFC
 869 +1:     ldmia r1, {r4-r6}
 870 +       add r1, r1, r2
 871 +       pld [r1]
 872 +       ADJ_ALIGN_DW 24, r4, r5, r6
 873 +       AVG_PW r5, r6
 874 +       AVG_PW r4, r5
 875 +       stmia r0, {r5-r6}
 876 +       subs r3, r3, #1
 877 +       add r0, r0, r2
 878 +       bne 1b
 879 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 880 +MC_put_x_8_arm_align_jt:
 881 +       .word 0x01010101
 882 +       .word MC_put_x_8_arm_align0
 883 +       .word MC_put_x_8_arm_align1
 884 +       .word MC_put_x_8_arm_align2
 885 +       .word MC_put_x_8_arm_align3
 886 Index: libmpeg2/motion_comp_iwmmxt.c
 887 ===================================================================
 888 --- libmpeg2/motion_comp_iwmmxt.c       (revision 0)
 889 +++ libmpeg2/motion_comp_iwmmxt.c       (revision 0)
 890 @@ -0,0 +1,59 @@
 891 +/*
 892 + * motion_comp_iwmmxt.c
 893 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
 894 + *
 895 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 896 + * See http://libmpeg2.sourceforge.net/ for updates.
 897 + *
 898 + * mpeg2dec is free software; you can redistribute it and/or modify
 899 + * it under the terms of the GNU General Public License as published by
 900 + * the Free Software Foundation; either version 2 of the License, or
 901 + * (at your option) any later version.
 902 + *
 903 + * mpeg2dec is distributed in the hope that it will be useful,
 904 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 905 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 906 + * GNU General Public License for more details.
 907 + *
 908 + * You should have received a copy of the GNU General Public License
 909 + * along with this program; if not, write to the Free Software
 910 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 911 + */
 912 +
 913 +#include "config.h"
 914 +
 915 +#if defined(ARCH_ARM) && defined(HAVE_IWMMXT)
 916 +
 917 +#include <inttypes.h>
 918 +
 919 +#include "mpeg2.h"
 920 +#include "attributes.h"
 921 +#include "mpeg2_internal.h"
 922 +
 923 +/* defined in libavcodec */
 924 +
 925 +extern void put_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 926 +extern void put_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 927 +extern void put_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 928 +extern void put_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 929 +extern void put_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 930 +extern void put_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 931 +extern void put_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 932 +extern void put_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 933 +extern void avg_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 934 +extern void avg_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 935 +extern void avg_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 936 +extern void avg_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 937 +extern void avg_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 938 +extern void avg_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 939 +extern void avg_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 940 +extern void avg_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
 941 +
 942 +mpeg2_mc_t mpeg2_mc_iwmmxt = {
 943 +    {put_pixels16_iwmmxt, put_pixels16_x2_iwmmxt, put_pixels16_y2_iwmmxt, put_pixels16_xy2_iwmmxt,
 944 +     put_pixels8_iwmmxt, put_pixels8_x2_iwmmxt,  put_pixels8_y2_iwmmxt,  put_pixels8_xy2_iwmmxt}, \
 945 +    {avg_pixels16_iwmmxt, avg_pixels16_x2_iwmmxt, avg_pixels16_y2_iwmmxt, avg_pixels16_xy2_iwmmxt,
 946 +     avg_pixels8_iwmmxt, avg_pixels8_x2_iwmmxt,  avg_pixels8_y2_iwmmxt,  avg_pixels8_xy2_iwmmxt}, \
 947 +};
 948 +
 949 +#endif /* defined(ARCH_ARM) && defined(HAVE_IWMMXT) */