libmpeg2/libmpeg-0.4.1.diff

   1 --- include/attributes.h        2006-06-16 20:12:26.000000000 +0200
   2 +++ libmpeg2/attributes.h       2006-06-16 20:12:50.000000000 +0200
   3 @@ -25,7 +29,7 @@
   4  #ifdef ATTRIBUTE_ALIGNED_MAX
   5  #define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align)))
   6  #else
   7 -#define ATTR_ALIGN(align)
   8 +#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((16 < align) ? 16 : align)))
   9  #endif
  10
  11  #ifdef HAVE_BUILTIN_EXPECT
  12 --- libmpeg2/cpu_accel.c        2006-06-16 20:12:26.000000000 +0200
  13 +++ libmpeg2/cpu_accel.c        2006-06-16 20:12:50.000000000 +0200
  14 @@ -22,6 +26,7 @@
  15   */
  16
  17  #include "config.h"
  18 +#include "cpudetect.h"
  19
  20  #include <inttypes.h>
  21
  22 @@ -30,9 +35,17 @@
  23  #include "mpeg2_internal.h"
  24
  25  #ifdef ACCEL_DETECT
  26 -#ifdef ARCH_X86
  27 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
  28 +
  29 +/* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow!
  30 + * instructions via assembly. However, it is regarded as duplicaed work
  31 + * in MPlayer, so that we enforce to use MPlayer's implementation.
  32 + */
  33 +#define USE_MPLAYER_CPUDETECT
  34 +
  35  static inline uint32_t arch_accel (void)
  36  {
  37 +#if !defined(USE_MPLAYER_CPUDETECT)
  38      uint32_t eax, ebx, ecx, edx;
  39      int AMD;
  40      uint32_t caps;
  41 @@ -105,10 +118,24 @@
  42         caps |= MPEG2_ACCEL_X86_MMXEXT;
  43
  44      return caps;
  45 +#else /* USE_MPLAYER_CPUDETECT: Use MPlayer's cpu capability property */
  46 +    caps = 0;
  47 +    if (gCpuCaps.hasMMX)
  48 +        caps |= MPEG2_ACCEL_X86_MMX;
  49 +    if (gCpuCaps.hasSSE2)
  50 +       caps |= MPEG2_ACCEL_X86_SSE2;
  51 +    if (gCpuCaps.hasMMX2)
  52 +       caps |= MPEG2_ACCEL_X86_MMXEXT;
  53 +    if (gCpuCaps.has3DNow)
  54 +       caps |= MPEG2_ACCEL_X86_3DNOW;
  55 +
  56 +    return caps;
  57 +
  58 +#endif /* USE_MPLAYER_CPUDETECT */
  59  }
  60 -#endif /* ARCH_X86 */
  61 +#endif /* ARCH_X86 || ARCH_X86_64 */
  62
  63 -#if defined(ARCH_PPC) || defined(ARCH_SPARC)
  64 +#if defined(ARCH_PPC) || (defined(ARCH_SPARC) && defined(HAVE_VIS))
  65  #include <signal.h>
  66  #include <setjmp.h>
  67
  68 @@ -166,10 +166,10 @@
  69
  70      canjump = 1;
  71
  72 -#ifdef HAVE_ALTIVEC_H  /* gnu */
  73 -#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
  74 -#else                  /* apple */
  75 +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */
  76  #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
  77 +#else                  /* gnu */
  78 +#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
  79  #endif
  80      asm volatile ("mtspr 256, %0\n\t"
  81                   VAND (0, 0, 0)
  82 @@ -195,6 +222,7 @@
  83  #ifdef ARCH_ALPHA
  84  static inline uint32_t arch_accel (void)
  85  {
  86 +#ifdef CAN_COMPILE_ALPHA_MVI
  87      uint64_t no_mvi;
  88
  89      asm volatile ("amask %1, %0"
  90 @@ -202,6 +230,9 @@
  91                   : "rI" (256));        /* AMASK_MVI */
  92      return no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA |
  93                                          MPEG2_ACCEL_ALPHA_MVI);
  94 +#else
  95 +    return MPEG2_ACCEL_ALPHA;
  96 +#endif
  97  }
  98  #endif /* ARCH_ALPHA */
  99  #endif /* ACCEL_DETECT */
 100 @@ -212,7 +243,7 @@
 101
 102      accel = 0;
 103  #ifdef ACCEL_DETECT
 104 -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
 105 +#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
 106      accel = arch_accel ();
 107  #endif
 108  #endif
 109 --- libmpeg2/cpu_state.c        2006-06-16 20:12:26.000000000 +0200
 110 +++ libmpeg2/cpu_state.c        2006-06-16 20:12:50.000000000 +0200
 111 @@ -29,14 +33,14 @@
 112  #include "mpeg2.h"
 113  #include "attributes.h"
 114  #include "mpeg2_internal.h"
 115 -#ifdef ARCH_X86
 116 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
 117  #include "mmx.h"
 118  #endif
 119
 120  void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
 121  void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
 122
 123 -#ifdef ARCH_X86
 124 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
 125  static void state_restore_mmx (cpu_state_t * state)
 126  {
 127      emms ();
 128 @@ -48,18 +48,18 @@
 129  #endif
 130
 131 -#ifdef ARCH_PPC
 132 +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC)
 133 -#ifdef HAVE_ALTIVEC_H  /* gnu */
 134 -#define LI(a,b) "li " #a "," #b "\n\t"
 135 -#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
 136 -#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
 137 -#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
 138 -#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
 139 -#else                  /* apple */
 140 +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ )    /* apple */
 141  #define LI(a,b) "li r" #a "," #b "\n\t"
 142  #define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t"
 143  #define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t"
 144  #define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t"
 145  #define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t"
 146 +#else                  /* gnu */
 147 +#define LI(a,b) "li " #a "," #b "\n\t"
 148 +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
 149 +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
 150 +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
 151 +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
 152  #endif
 153
 154  static void state_save_altivec (cpu_state_t * state)
 155 @@ -115,12 +119,12 @@
 156
 157  void mpeg2_cpu_state_init (uint32_t accel)
 158  {
 159 -#ifdef ARCH_X86
 160 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
 161      if (accel & MPEG2_ACCEL_X86_MMX) {
 162         mpeg2_cpu_state_restore = state_restore_mmx;
 163      }
 164  #endif
 165 -#ifdef ARCH_PPC
 166 +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC)
 167      if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
 168         mpeg2_cpu_state_save = state_save_altivec;
 169         mpeg2_cpu_state_restore = state_restore_altivec;
 170 --- libmpeg2/decode.c   2006-06-16 20:12:26.000000000 +0200
 171 +++ libmpeg2/decode.c   2006-06-16 20:12:50.000000000 +0200
 172 @@ -351,6 +355,15 @@
 173      fbuf->buf[1] = buf[1];
 174      fbuf->buf[2] = buf[2];
 175      fbuf->id = id;
 176 +    // HACK! FIXME! At first I frame, copy pointers to prediction frame too!
 177 +    if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){
 178 +       mpeg2dec->fbuf[1]->buf[0]=buf[0];
 179 +       mpeg2dec->fbuf[1]->buf[1]=buf[1];
 180 +       mpeg2dec->fbuf[1]->buf[2]=buf[2];
 181 +       mpeg2dec->fbuf[1]->id=NULL;
 182 +    }
 183 +//        printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n",
 184 +//         mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]);
 185  }
 186
 187  void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf)
 188 --- libmpeg2/header.c   2006-06-16 20:12:26.000000000 +0200
 189 +++ libmpeg2/header.c   2006-06-16 20:12:50.000000000 +0200
 190 @@ -100,6 +104,9 @@
 191      mpeg2dec->decoder.convert = NULL;
 192      mpeg2dec->decoder.convert_id = NULL;
 193      mpeg2dec->picture = mpeg2dec->pictures;
 194 +    memset(&mpeg2dec->fbuf_alloc[0].fbuf, 0, sizeof(mpeg2_fbuf_t));
 195 +    memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t));
 196 +    memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t));
 197      mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf;
 198      mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf;
 199      mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf;
 200 @@ -551,6 +558,7 @@
 201         if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) {
 202             picture->nb_fields = (buffer[3] & 2) ? 3 : 2;
 203             flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0;
 204 +           flags |= (buffer[3] &   2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0;
 205         } else
 206             picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2;
 207         break;
 208 @@ -799,6 +807,7 @@
 209         mpeg2dec->scaled[index] = mpeg2dec->q_scale_type;
 210         for (i = 0; i < 32; i++) {
 211             k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1);
 212 +           decoder->quantizer_scales[i] = k;
 213             for (j = 0; j < 64; j++)
 214                 decoder->quantizer_prescale[index][i][j] =
 215                     k * mpeg2dec->quantizer_matrix[index][j];
 216 --- libmpeg2/idct.c     2006-06-16 20:12:26.000000000 +0200
 217 +++ libmpeg2/idct.c     2006-06-16 20:12:50.000000000 +0200
 218 @@ -239,12 +239,15 @@
 219
 220  void mpeg2_idct_init (uint32_t accel)
 221  {
 222 -#ifdef ARCH_X86
 223 +#ifdef HAVE_MMX2
 224      if (accel & MPEG2_ACCEL_X86_MMXEXT) {
 225         mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
 226         mpeg2_idct_add = mpeg2_idct_add_mmxext;
 227         mpeg2_idct_mmx_init ();
 228 -    } else if (accel & MPEG2_ACCEL_X86_MMX) {
 229 +    } else
 230 +#endif
 231 +#ifdef HAVE_MMX
 232 +    if (accel & MPEG2_ACCEL_X86_MMX) {
 233         mpeg2_idct_copy = mpeg2_idct_copy_mmx;
 234         mpeg2_idct_add = mpeg2_idct_add_mmx;
 235         mpeg2_idct_mmx_init ();
 236 @@ -254,11 +261,14 @@
 237      } else
 238  #endif
 239  #ifdef ARCH_ALPHA
 240 +#ifdef CAN_COMPILE_ALPHA_MVI
 241      if (accel & MPEG2_ACCEL_ALPHA_MVI) {
 242         mpeg2_idct_copy = mpeg2_idct_copy_mvi;
 243         mpeg2_idct_add = mpeg2_idct_add_mvi;
 244         mpeg2_idct_alpha_init ();
 245 -    } else if (accel & MPEG2_ACCEL_ALPHA) {
 246 +    } else
 247 +#endif
 248 +    if (accel & MPEG2_ACCEL_ALPHA) {
 249         int i;
 250
 251         mpeg2_idct_copy = mpeg2_idct_copy_alpha;
 252 --- libmpeg2/idct_alpha.c       2006-06-16 20:12:26.000000000 +0200
 253 +++ libmpeg2/idct_alpha.c       2006-06-16 20:12:50.000000000 +0200
 254 @@ -157,6 +161,7 @@
 255      block[8*7] = (a0 - b0) >> 17;
 256  }
 257
 258 +#ifdef CAN_COMPILE_ALPHA_MVI
 259  void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
 260  {
 261      uint64_t clampmask;
 262 @@ -289,6 +294,7 @@
 263         stq (p7, dest + 7 * stride);
 264      }
 265  }
 266 +#endif
 267
 268  void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride)
 269  {
 270 --- libmpeg2/idct_mmx.c 2006-06-16 20:12:26.000000000 +0200
 271 +++ libmpeg2/idct_mmx.c 2006-06-16 20:12:50.000000000 +0200
 272 @@ -23,7 +27,7 @@
 273
 274  #include "config.h"
 275
 276 -#ifdef ARCH_X86
 277 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
 278
 279  #include <inttypes.h>
 280
 281 --- libmpeg2/motion_comp.c      2006-06-16 20:12:26.000000000 +0200
 282 +++ libmpeg2/motion_comp.c      2006-06-16 20:12:50.000000000 +0200
 283 @@ -37,16 +37,22 @@
 284
 285  void mpeg2_mc_init (uint32_t accel)
 286  {
 287 -#ifdef ARCH_X86
 288 +#ifdef HAVE_MMX2
 289      if (accel & MPEG2_ACCEL_X86_MMXEXT)
 290         mpeg2_mc = mpeg2_mc_mmxext;
 291 -    else if (accel & MPEG2_ACCEL_X86_3DNOW)
 292 +    else
 293 +#endif
 294 +#ifdef HAVE_3DNOW
 295 +    if (accel & MPEG2_ACCEL_X86_3DNOW)
 296         mpeg2_mc = mpeg2_mc_3dnow;
 297 -    else if (accel & MPEG2_ACCEL_X86_MMX)
 298 +    else
 299 +#endif
 300 +#ifdef HAVE_MMX
 301 +    if (accel & MPEG2_ACCEL_X86_MMX)
 302         mpeg2_mc = mpeg2_mc_mmx;
 303      else
 304  #endif
 305 -#ifdef ARCH_PPC
 306 +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC)
 307      if (accel & MPEG2_ACCEL_PPC_ALTIVEC)
 308         mpeg2_mc = mpeg2_mc_altivec;
 309      else
 310 @@ -52,7 +62,7 @@
 311         mpeg2_mc = mpeg2_mc_alpha;
 312      else
 313  #endif
 314 -#ifdef ARCH_SPARC
 315 +#if defined(ARCH_SPARC) && defined(HAVE_VIS)
 316      if (accel & MPEG2_ACCEL_SPARC_VIS)
 317         mpeg2_mc = mpeg2_mc_vis;
 318      else
 319 @@ -67,6 +67,16 @@
 320         mpeg2_mc = mpeg2_mc_vis;
 321      else
 322  #endif
 323 +#ifdef ARCH_ARM
 324 +    if (1 /*accel & MPEG2_ACCEL_ARM*/) {
 325 +#ifdef HAVE_IWMMXT
 326 +       if (1 /*accel & MPEG2_ACCEL_ARM_IWMMXT*/)
 327 +           mpeg2_mc = mpeg2_mc_iwmmxt;
 328 +       else
 329 +#endif
 330 +           mpeg2_mc = mpeg2_mc_arm;
 331 +    } else
 332 +#endif
 333         mpeg2_mc = mpeg2_mc_c;
 334  }
 335
 336 --- libmpeg2/motion_comp_mmx.c  2006-06-16 20:12:26.000000000 +0200
 337 +++ libmpeg2/motion_comp_mmx.c  2006-06-16 20:12:50.000000000 +0200
 338 @@ -23,7 +27,7 @@
 339
 340  #include "config.h"
 341
 342 -#ifdef ARCH_X86
 343 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
 344
 345  #include <inttypes.h>
 346
 347 --- include/mpeg2.h     2006-06-16 20:12:26.000000000 +0200
 348 +++ libmpeg2/mpeg2.h    2006-06-16 20:12:50.000000000 +0200
 349 @@ -82,6 +86,7 @@
 350  #define PIC_FLAG_COMPOSITE_DISPLAY 32
 351  #define PIC_FLAG_SKIP 64
 352  #define PIC_FLAG_TAGS 128
 353 +#define PIC_FLAG_REPEAT_FIRST_FIELD 256
 354  #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000
 355
 356  typedef struct mpeg2_picture_s {
 357 @@ -154,6 +159,7 @@
 358  #define MPEG2_ACCEL_X86_MMX 1
 359  #define MPEG2_ACCEL_X86_3DNOW 2
 360  #define MPEG2_ACCEL_X86_MMXEXT 4
 361 +#define MPEG2_ACCEL_X86_SSE2 8
 362  #define MPEG2_ACCEL_PPC_ALTIVEC 1
 363  #define MPEG2_ACCEL_ALPHA 1
 364  #define MPEG2_ACCEL_ALPHA_MVI 2
 365 --- libmpeg2/mpeg2_internal.h   2006-06-16 20:12:26.000000000 +0200
 366 +++ libmpeg2/mpeg2_internal.h   2006-06-16 20:12:50.000000000 +0200
 367 @@ -144,6 +148,12 @@
 368      int second_field;
 369
 370      int mpeg1;
 371 +
 372 +    /* for MPlayer: */
 373 +    int quantizer_scales[32];
 374 +    int quantizer_scale;
 375 +    char* quant_store;
 376 +    int quant_stride;
 377  };
 378
 379  typedef struct {
 380 @@ -214,6 +224,10 @@
 381      int8_t q_scale_type, scaled[4];
 382      uint8_t quantizer_matrix[4][64];
 383      uint8_t new_quantizer_matrix[4][64];
 384 +
 385 +    /* for MPlayer: */
 386 +    unsigned char *pending_buffer;
 387 +    int pending_length;
 388  };
 389
 390  typedef struct {
 391 @@ -312,3 +312,5 @@
 392  extern mpeg2_mc_t mpeg2_mc_altivec;
 393  extern mpeg2_mc_t mpeg2_mc_alpha;
 394  extern mpeg2_mc_t mpeg2_mc_vis;
 395 +extern mpeg2_mc_t mpeg2_mc_arm;
 396 +extern mpeg2_mc_t mpeg2_mc_iwmmxt;
 397 --- libmpeg2/slice.c    2006-06-16 20:12:26.000000000 +0200
 398 +++ libmpeg2/slice.c    2006-06-16 20:12:50.000000000 +0200
 399 @@ -142,6 +146,7 @@
 400
 401      quantizer_scale_code = UBITS (bit_buf, 5);
 402      DUMPBITS (bit_buf, bits, 5);
 403 +    decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code];
 404
 405      decoder->quantizer_matrix[0] =
 406         decoder->quantizer_prescale[0][quantizer_scale_code];
 407 @@ -1568,6 +1569,18 @@
 408
 409  #define NEXT_MACROBLOCK                                                        \
 410  do {                                                                   \
 411 +    if(decoder->quant_store) {                                          \
 412 +       if (decoder->picture_structure == TOP_FIELD)                     \
 413 +        decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \
 414 +                    +(decoder->offset>>4)] = decoder->quantizer_scale;  \
 415 +       else if (decoder->picture_structure == BOTTOM_FIELD)             \
 416 +        decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \
 417 +                   + decoder->quant_stride                             \
 418 +                    +(decoder->offset>>4)] = decoder->quantizer_scale;  \
 419 +       else                                                             \
 420 +        decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \
 421 +                    +(decoder->offset>>4)] = decoder->quantizer_scale;  \
 422 +    }                                                                   \
 423      decoder->offset += 16;                                             \
 424      if (decoder->offset == decoder->width) {                           \
 425         do { /* just so we can use the break statement */               \
 426 @@ -1604,6 +1604,12 @@
 427      }                                                                  \
 428  } while (0)
 429
 430 +static void motion_dummy (mpeg2_decoder_t * const decoder,
 431 +                          motion_t * const motion,
 432 +                          mpeg2_mc_fct * const * const table)
 433 +{
 434 +}
 435 +
 436  void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
 437                       uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
 438  {
 439 @@ -1661,7 +1667,9 @@
 440
 441      if (decoder->mpeg1) {
 442         decoder->motion_parser[0] = motion_zero_420;
 443 +        decoder->motion_parser[MC_FIELD] = motion_dummy;
 444         decoder->motion_parser[MC_FRAME] = motion_mp1;
 445 +        decoder->motion_parser[MC_DMV] = motion_dummy;
 446         decoder->motion_parser[4] = motion_reuse_420;
 447      } else if (decoder->picture_structure == FRAME_PICTURE) {
 448         if (decoder->chroma_format == 0) {
 449 --- libmpeg2/idct.c     2006-06-16 20:12:26.000000000 +0200
 450 +++ libmpeg2/idct.c     2006-06-16 20:12:50.000000000 +0200
 451 @@ -253,7 +253,7 @@
 452         mpeg2_idct_mmx_init ();
 453      } else
 454  #endif
 455 -#ifdef ARCH_PPC
 456 +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC)
 457      if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
 458         mpeg2_idct_copy = mpeg2_idct_copy_altivec;
 459         mpeg2_idct_add = mpeg2_idct_add_altivec;
 460 --- libmpeg2/idct_altivec.c     2004/08/02 11:26:43     12933
 461 +++ libmpeg2/idct_altivec.c     2005/05/15 20:11:34     15484
 462 @@ -41,7 +41,7 @@
 463  typedef vector signed int vector_s32_t;
 464  typedef vector unsigned int vector_u32_t;
 465
 466 -#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
 467 +#if defined( HAVE_ALTIVEC_H ) && !defined( __APPLE_ALTIVEC__ ) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
 468  /* work around gcc <3.3 vec_mergel bug */
 469  static inline vector_s16_t my_vec_mergel (vector_s16_t const A,
 470                                           vector_s16_t const B)
 471 @@ -56,10 +56,10 @@
 472  #define vec_mergel my_vec_mergel
 473  #endif
 474
 475 -#ifdef HAVE_ALTIVEC_H  /* gnu */
 476 -#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
 477 -#else                  /* apple */
 478 +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */
 479  #define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h)
 480 +#else                  /* gnu */
 481 +#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
 482  #endif
 483
 484  static const vector_s16_t constants ATTR_ALIGN(16) =
 485 Index: libmpeg2/motion_comp_arm.c
 486 ===================================================================
 487 --- libmpeg2/motion_comp_arm.c  (revision 0)
 488 +++ libmpeg2/motion_comp_arm.c  (revision 0)
 489 @@ -0,0 +1,187 @@
 490 +/*
 491 + * motion_comp_arm.c
 492 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
 493 + *
 494 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 495 + * See http://libmpeg2.sourceforge.net/ for updates.
 496 + *
 497 + * mpeg2dec is free software; you can redistribute it and/or modify
 498 + * it under the terms of the GNU General Public License as published by
 499 + * the Free Software Foundation; either version 2 of the License, or
 500 + * (at your option) any later version.
 501 + *
 502 + * mpeg2dec is distributed in the hope that it will be useful,
 503 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 504 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 505 + * GNU General Public License for more details.
 506 + *
 507 + * You should have received a copy of the GNU General Public License
 508 + * along with this program; if not, write to the Free Software
 509 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 510 + */
 511 +
 512 +#include "config.h"
 513 +
 514 +#ifdef ARCH_ARM
 515 +
 516 +#include <inttypes.h>
 517 +
 518 +#include "mpeg2.h"
 519 +#include "attributes.h"
 520 +#include "mpeg2_internal.h"
 521 +
 522 +#define avg2(a,b) ((a+b+1)>>1)
 523 +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
 524 +
 525 +#define predict_o(i) (ref[i])
 526 +#define predict_x(i) (avg2 (ref[i], ref[i+1]))
 527 +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
 528 +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
 529 +                            (ref+stride)[i], (ref+stride)[i+1]))
 530 +
 531 +#define put(predictor,i) dest[i] = predictor (i)
 532 +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
 533 +
 534 +/* mc function template */
 535 +
 536 +#define MC_FUNC(op,xy)                                                 \
 537 +static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \
 538 +                                  const int stride, int height)        \
 539 +{                                                                      \
 540 +    do {                                                               \
 541 +       op (predict_##xy, 0);                                           \
 542 +       op (predict_##xy, 1);                                           \
 543 +       op (predict_##xy, 2);                                           \
 544 +       op (predict_##xy, 3);                                           \
 545 +       op (predict_##xy, 4);                                           \
 546 +       op (predict_##xy, 5);                                           \
 547 +       op (predict_##xy, 6);                                           \
 548 +       op (predict_##xy, 7);                                           \
 549 +       op (predict_##xy, 8);                                           \
 550 +       op (predict_##xy, 9);                                           \
 551 +       op (predict_##xy, 10);                                          \
 552 +       op (predict_##xy, 11);                                          \
 553 +       op (predict_##xy, 12);                                          \
 554 +       op (predict_##xy, 13);                                          \
 555 +       op (predict_##xy, 14);                                          \
 556 +       op (predict_##xy, 15);                                          \
 557 +       ref += stride;                                                  \
 558 +       dest += stride;                                                 \
 559 +    } while (--height);                                                        \
 560 +}                                                                      \
 561 +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \
 562 +                                 const int stride, int height)         \
 563 +{                                                                      \
 564 +    do {                                                               \
 565 +       op (predict_##xy, 0);                                           \
 566 +       op (predict_##xy, 1);                                           \
 567 +       op (predict_##xy, 2);                                           \
 568 +       op (predict_##xy, 3);                                           \
 569 +       op (predict_##xy, 4);                                           \
 570 +       op (predict_##xy, 5);                                           \
 571 +       op (predict_##xy, 6);                                           \
 572 +       op (predict_##xy, 7);                                           \
 573 +       ref += stride;                                                  \
 574 +       dest += stride;                                                 \
 575 +    } while (--height);                                                        \
 576 +}                                                                      \
 577 +/* definitions of the actual mc functions */
 578 +
 579 +MC_FUNC (put,o)
 580 +MC_FUNC (avg,o)
 581 +MC_FUNC (put,x)
 582 +MC_FUNC (avg,x)
 583 +MC_FUNC (put,y)
 584 +MC_FUNC (avg,y)
 585 +MC_FUNC (put,xy)
 586 +MC_FUNC (avg,xy)
 587 +
 588 +
 589 +extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref,
 590 +                            int stride, int height);
 591 +
 592 +extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref,
 593 +                            int stride, int height);
 594 +
 595 +
 596 +static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref,
 597 +                             int stride, int height)
 598 +{
 599 +    MC_put_y_16_c(dest, ref, stride, height);
 600 +}
 601 +
 602 +static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref,
 603 +                              int stride, int height)
 604 +{
 605 +    MC_put_xy_16_c(dest, ref, stride, height);
 606 +}
 607 +
 608 +extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref,
 609 +                               int stride, int height);
 610 +
 611 +extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref,
 612 +                           int stride, int height);
 613 +
 614 +static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref,
 615 +                            int stride, int height)
 616 +{
 617 +    MC_put_y_8_c(dest, ref, stride, height);
 618 +}
 619 +
 620 +static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref,
 621 +                             int stride, int height)
 622 +{
 623 +    MC_put_xy_8_c(dest, ref, stride, height);
 624 +}
 625 +
 626 +static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref,
 627 +                             int stride, int height)
 628 +{
 629 +    MC_avg_o_16_c(dest, ref, stride, height);
 630 +}
 631 +
 632 +static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref,
 633 +                             int stride, int height)
 634 +{
 635 +    MC_avg_x_16_c(dest, ref, stride, height);
 636 +}
 637 +
 638 +static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref,
 639 +                             int stride, int height)
 640 +{
 641 +    MC_avg_y_16_c(dest, ref, stride, height);
 642 +}
 643 +
 644 +static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref,
 645 +                              int stride, int height)
 646 +{
 647 +    MC_avg_xy_16_c(dest, ref, stride, height);
 648 +}
 649 +
 650 +static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref,
 651 +                            int stride, int height)
 652 +{
 653 +    MC_avg_o_8_c(dest, ref, stride, height);
 654 +}
 655 +
 656 +static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref,
 657 +                            int stride, int height)
 658 +{
 659 +    MC_avg_x_8_c(dest, ref, stride, height);
 660 +}
 661 +
 662 +static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref,
 663 +                            int stride, int height)
 664 +{
 665 +    MC_avg_y_8_c(dest, ref, stride, height);
 666 +}
 667 +
 668 +static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref,
 669 +                             int stride, int height)
 670 +{
 671 +    MC_avg_xy_8_c(dest, ref, stride, height);
 672 +}
 673 +
 674 +MPEG2_MC_EXTERN (arm)
 675 +
 676 +#endif
 677 Index: libmpeg2/motion_comp_arm_s.S
 678 ===================================================================
 679 --- libmpeg2/motion_comp_arm_s.S        (revision 0)
 680 +++ libmpeg2/motion_comp_arm_s.S        (revision 0)
 681 @@ -0,0 +1,322 @@
 682 +@ motion_comp_arm_s.S
 683 +@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
 684 +@
 685 +@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 686 +@ See http://libmpeg2.sourceforge.net/ for updates.
 687 +@
 688 +@ mpeg2dec is free software; you can redistribute it and/or modify
 689 +@ it under the terms of the GNU General Public License as published by
 690 +@ the Free Software Foundation; either version 2 of the License, or
 691 +@ (at your option) any later version.
 692 +@
 693 +@ mpeg2dec is distributed in the hope that it will be useful,
 694 +@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 695 +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 696 +@ GNU General Public License for more details.
 697 +@
 698 +@ You should have received a copy of the GNU General Public License
 699 +@ along with this program; if not, write to the Free Software
 700 +@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 701 +
 702 +       .text
 703 +
 704 +@ ----------------------------------------------------------------
 705 +       .align
 706 +       .global MC_put_o_16_arm
 707 +MC_put_o_16_arm:
 708 +       @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 709 +       pld [r1]
 710 +        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
 711 +       and r4, r1, #3
 712 +       adr r5, MC_put_o_16_arm_align_jt
 713 +       add r5, r5, r4, lsl #2
 714 +       ldr pc, [r5]
 715 +
 716 +MC_put_o_16_arm_align0:
 717 +       ldmia r1, {r4-r7}
 718 +       add r1, r1, r2
 719 +       pld [r1]
 720 +       stmia r0, {r4-r7}
 721 +       subs r3, r3, #1
 722 +       add r0, r0, r2
 723 +       bne MC_put_o_16_arm_align0
 724 +        ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
 725 +
 726 +.macro PROC shift
 727 +       ldmia r1, {r4-r8}
 728 +       add r1, r1, r2
 729 +       mov r9, r4, lsr #(\shift)
 730 +       pld [r1]
 731 +       mov r10, r5, lsr #(\shift)
 732 +       orr r9, r9, r5, lsl #(32-\shift)
 733 +       mov r11, r6, lsr #(\shift)
 734 +       orr r10, r10, r6, lsl #(32-\shift)
 735 +       mov r12, r7, lsr #(\shift)
 736 +       orr r11, r11, r7, lsl #(32-\shift)
 737 +       orr r12, r12, r8, lsl #(32-\shift)
 738 +       stmia r0, {r9-r12}
 739 +       subs r3, r3, #1
 740 +       add r0, r0, r2
 741 +.endm
 742 +
 743 +MC_put_o_16_arm_align1:
 744 +       and r1, r1, #0xFFFFFFFC
 745 +1:     PROC(8)
 746 +       bne 1b
 747 +        ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
 748 +MC_put_o_16_arm_align2:
 749 +       and r1, r1, #0xFFFFFFFC
 750 +1:     PROC(16)
 751 +       bne 1b
 752 +        ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
 753 +MC_put_o_16_arm_align3:
 754 +       and r1, r1, #0xFFFFFFFC
 755 +1:     PROC(24)
 756 +       bne 1b
 757 +        ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
 758 +MC_put_o_16_arm_align_jt:
 759 +       .word MC_put_o_16_arm_align0
 760 +       .word MC_put_o_16_arm_align1
 761 +       .word MC_put_o_16_arm_align2
 762 +       .word MC_put_o_16_arm_align3
 763 +
 764 +@ ----------------------------------------------------------------
 765 +       .align
 766 +       .global MC_put_o_8_arm
 767 +MC_put_o_8_arm:
 768 +       @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 769 +       pld [r1]
 770 +        stmfd sp!, {r4-r10, lr} @ R14 is also called LR
 771 +       and r4, r1, #3
 772 +       adr r5, MC_put_o_8_arm_align_jt
 773 +       add r5, r5, r4, lsl #2
 774 +       ldr pc, [r5]
 775 +MC_put_o_8_arm_align0:
 776 +       ldmia r1, {r4-r5}
 777 +       add r1, r1, r2
 778 +       pld [r1]
 779 +       stmia r0, {r4-r5}
 780 +       add r0, r0, r2
 781 +       subs r3, r3, #1
 782 +       bne MC_put_o_8_arm_align0
 783 +        ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 784 +
 785 +.macro PROC8 shift
 786 +       ldmia r1, {r4-r6}
 787 +       add r1, r1, r2
 788 +       mov r9, r4, lsr #(\shift)
 789 +       pld [r1]
 790 +       mov r10, r5, lsr #(\shift)
 791 +       orr r9, r9, r5, lsl #(32-\shift)
 792 +       orr r10, r10, r6, lsl #(32-\shift)
 793 +       stmia r0, {r9-r10}
 794 +       subs r3, r3, #1
 795 +       add r0, r0, r2
 796 +.endm
 797 +
 798 +MC_put_o_8_arm_align1:
 799 +       and r1, r1, #0xFFFFFFFC
 800 +1:     PROC8(8)
 801 +       bne 1b
 802 +        ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 803 +
 804 +MC_put_o_8_arm_align2:
 805 +       and r1, r1, #0xFFFFFFFC
 806 +1:     PROC8(16)
 807 +       bne 1b
 808 +        ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 809 +
 810 +MC_put_o_8_arm_align3:
 811 +       and r1, r1, #0xFFFFFFFC
 812 +1:     PROC8(24)
 813 +       bne 1b
 814 +        ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
 815 +
 816 +MC_put_o_8_arm_align_jt:
 817 +       .word MC_put_o_8_arm_align0
 818 +       .word MC_put_o_8_arm_align1
 819 +       .word MC_put_o_8_arm_align2
 820 +       .word MC_put_o_8_arm_align3
 821 +
 822 +@ ----------------------------------------------------------------
 823 +.macro AVG_PW rW1, rW2
 824 +       mov \rW2, \rW2, lsl #24
 825 +       orr \rW2, \rW2, \rW1, lsr #8
 826 +       eor r9, \rW1, \rW2
 827 +       and \rW2, \rW1, \rW2
 828 +       and r10, r9, r12
 829 +       add \rW2, \rW2, r10, lsr #1
 830 +       and r10, r9, r11
 831 +       add \rW2, \rW2, r10
 832 +.endm
 833 +
 834 +       .align
 835 +       .global MC_put_x_16_arm
 836 +MC_put_x_16_arm:
 837 +       @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 838 +       pld [r1]
 839 +        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
 840 +       and r4, r1, #3
 841 +       adr r5, MC_put_x_16_arm_align_jt
 842 +       ldr r11, [r5]
 843 +       mvn r12, r11
 844 +       add r5, r5, r4, lsl #2
 845 +       ldr pc, [r5, #4]
 846 +
 847 +.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
 848 +       mov \R0, \R0, lsr #(\shift)
 849 +       orr \R0, \R0, \R1, lsl #(32 - \shift)
 850 +       mov \R1, \R1, lsr #(\shift)
 851 +       orr \R1, \R1, \R2, lsl #(32 - \shift)
 852 +       mov \R2, \R2, lsr #(\shift)
 853 +       orr \R2, \R2, \R3, lsl #(32 - \shift)
 854 +       mov \R3, \R3, lsr #(\shift)
 855 +       orr \R3, \R3, \R4, lsl #(32 - \shift)
 856 +       mov \R4, \R4, lsr #(\shift)
 857 +@      and \R4, \R4, #0xFF
 858 +.endm
 859 +
 860 +MC_put_x_16_arm_align0:
 861 +       ldmia r1, {r4-r8}
 862 +       add r1, r1, r2
 863 +       pld [r1]
 864 +       AVG_PW r7, r8
 865 +       AVG_PW r6, r7
 866 +       AVG_PW r5, r6
 867 +       AVG_PW r4, r5
 868 +       stmia r0, {r5-r8}
 869 +       subs r3, r3, #1
 870 +       add r0, r0, r2
 871 +       bne MC_put_x_16_arm_align0
 872 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 873 +MC_put_x_16_arm_align1:
 874 +       and r1, r1, #0xFFFFFFFC
 875 +1:     ldmia r1, {r4-r8}
 876 +       add r1, r1, r2
 877 +       pld [r1]
 878 +       ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8
 879 +       AVG_PW r7, r8
 880 +       AVG_PW r6, r7
 881 +       AVG_PW r5, r6
 882 +       AVG_PW r4, r5
 883 +       stmia r0, {r5-r8}
 884 +       subs r3, r3, #1
 885 +       add r0, r0, r2
 886 +       bne 1b
 887 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 888 +MC_put_x_16_arm_align2:
 889 +       and r1, r1, #0xFFFFFFFC
 890 +1:     ldmia r1, {r4-r8}
 891 +       add r1, r1, r2
 892 +       pld [r1]
 893 +       ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8
 894 +       AVG_PW r7, r8
 895 +       AVG_PW r6, r7
 896 +       AVG_PW r5, r6
 897 +       AVG_PW r4, r5
 898 +       stmia r0, {r5-r8}
 899 +       subs r3, r3, #1
 900 +       add r0, r0, r2
 901 +       bne 1b
 902 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 903 +MC_put_x_16_arm_align3:
 904 +       and r1, r1, #0xFFFFFFFC
 905 +1:     ldmia r1, {r4-r8}
 906 +       add r1, r1, r2
 907 +       pld [r1]
 908 +       ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8
 909 +       AVG_PW r7, r8
 910 +       AVG_PW r6, r7
 911 +       AVG_PW r5, r6
 912 +       AVG_PW r4, r5
 913 +       stmia r0, {r5-r8}
 914 +       subs r3, r3, #1
 915 +       add r0, r0, r2
 916 +       bne 1b
 917 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 918 +MC_put_x_16_arm_align_jt:
 919 +       .word 0x01010101
 920 +       .word MC_put_x_16_arm_align0
 921 +       .word MC_put_x_16_arm_align1
 922 +       .word MC_put_x_16_arm_align2
 923 +       .word MC_put_x_16_arm_align3
 924 +
 925 +@ ----------------------------------------------------------------
 926 +       .align
 927 +       .global MC_put_x_8_arm
 928 +MC_put_x_8_arm:
 929 +       @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
 930 +       pld [r1]
 931 +        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
 932 +       and r4, r1, #3
 933 +       adr r5, MC_put_x_8_arm_align_jt
 934 +       ldr r11, [r5]
 935 +       mvn r12, r11
 936 +       add r5, r5, r4, lsl #2
 937 +       ldr pc, [r5, #4]
 938 +
 939 +.macro ADJ_ALIGN_DW shift, R0, R1, R2
 940 +       mov \R0, \R0, lsr #(\shift)
 941 +       orr \R0, \R0, \R1, lsl #(32 - \shift)
 942 +       mov \R1, \R1, lsr #(\shift)
 943 +       orr \R1, \R1, \R2, lsl #(32 - \shift)
 944 +       mov \R2, \R2, lsr #(\shift)
 945 +@      and \R4, \R4, #0xFF
 946 +.endm
 947 +
 948 +MC_put_x_8_arm_align0:
 949 +       ldmia r1, {r4-r6}
 950 +       add r1, r1, r2
 951 +       pld [r1]
 952 +       AVG_PW r5, r6
 953 +       AVG_PW r4, r5
 954 +       stmia r0, {r5-r6}
 955 +       subs r3, r3, #1
 956 +       add r0, r0, r2
 957 +       bne MC_put_x_8_arm_align0
 958 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 959 +MC_put_x_8_arm_align1:
 960 +       and r1, r1, #0xFFFFFFFC
 961 +1:     ldmia r1, {r4-r6}
 962 +       add r1, r1, r2
 963 +       pld [r1]
 964 +       ADJ_ALIGN_DW 8, r4, r5, r6
 965 +       AVG_PW r5, r6
 966 +       AVG_PW r4, r5
 967 +       stmia r0, {r5-r6}
 968 +       subs r3, r3, #1
 969 +       add r0, r0, r2
 970 +       bne 1b
 971 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 972 +MC_put_x_8_arm_align2:
 973 +       and r1, r1, #0xFFFFFFFC
 974 +1:     ldmia r1, {r4-r6}
 975 +       add r1, r1, r2
 976 +       pld [r1]
 977 +       ADJ_ALIGN_DW 16, r4, r5, r6
 978 +       AVG_PW r5, r6
 979 +       AVG_PW r4, r5
 980 +       stmia r0, {r5-r6}
 981 +       subs r3, r3, #1
 982 +       add r0, r0, r2
 983 +       bne 1b
 984 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 985 +MC_put_x_8_arm_align3:
 986 +       and r1, r1, #0xFFFFFFFC
 987 +1:     ldmia r1, {r4-r6}
 988 +       add r1, r1, r2
 989 +       pld [r1]
 990 +       ADJ_ALIGN_DW 24, r4, r5, r6
 991 +       AVG_PW r5, r6
 992 +       AVG_PW r4, r5
 993 +       stmia r0, {r5-r6}
 994 +       subs r3, r3, #1
 995 +       add r0, r0, r2
 996 +       bne 1b
 997 +        ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
 998 +MC_put_x_8_arm_align_jt:
 999 +       .word 0x01010101
1000 +       .word MC_put_x_8_arm_align0
1001 +       .word MC_put_x_8_arm_align1
1002 +       .word MC_put_x_8_arm_align2
1003 +       .word MC_put_x_8_arm_align3
1004 Index: libmpeg2/motion_comp_iwmmxt.c
1005 ===================================================================
1006 --- libmpeg2/motion_comp_iwmmxt.c       (revision 0)
1007 +++ libmpeg2/motion_comp_iwmmxt.c       (revision 0)
1008 @@ -0,0 +1,61 @@
1009 +/*
1010 + * motion_comp_iwmmxt.c
1011 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
1012 + *
1013 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
1014 + * See http://libmpeg2.sourceforge.net/ for updates.
1015 + *
1016 + * mpeg2dec is free software; you can redistribute it and/or modify
1017 + * it under the terms of the GNU General Public License as published by
1018 + * the Free Software Foundation; either version 2 of the License, or
1019 + * (at your option) any later version.
1020 + *
1021 + * mpeg2dec is distributed in the hope that it will be useful,
1022 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1023 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1024 + * GNU General Public License for more details.
1025 + *
1026 + * You should have received a copy of the GNU General Public License
1027 + * along with this program; if not, write to the Free Software
1028 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1029 + */
1030 +
1031 +#include "config.h"
1032 +
1033 +#ifdef ARCH_ARM
1034 +#ifdef HAVE_IWMMXT
1035 +
1036 +#include <inttypes.h>
1037 +
1038 +#include "mpeg2.h"
1039 +#include "attributes.h"
1040 +#include "mpeg2_internal.h"
1041 +
1042 +/* defined in libavcodec */
1043 +
1044 +extern void put_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1045 +extern void put_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1046 +extern void put_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1047 +extern void put_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1048 +extern void put_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1049 +extern void put_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1050 +extern void put_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1051 +extern void put_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1052 +extern void avg_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1053 +extern void avg_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1054 +extern void avg_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1055 +extern void avg_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1056 +extern void avg_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1057 +extern void avg_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1058 +extern void avg_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1059 +extern void avg_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1060 +
1061 +mpeg2_mc_t mpeg2_mc_iwmmxt = {
1062 +    {put_pixels16_iwmmxt, put_pixels16_x2_iwmmxt, put_pixels16_y2_iwmmxt, put_pixels16_xy2_iwmmxt,
1063 +     put_pixels8_iwmmxt, put_pixels8_x2_iwmmxt,  put_pixels8_y2_iwmmxt,  put_pixels8_xy2_iwmmxt}, \
1064 +    {avg_pixels16_iwmmxt, avg_pixels16_x2_iwmmxt, avg_pixels16_y2_iwmmxt, avg_pixels16_xy2_iwmmxt,
1065 +     avg_pixels8_iwmmxt, avg_pixels8_x2_iwmmxt,  avg_pixels8_y2_iwmmxt,  avg_pixels8_xy2_iwmmxt}, \
1066 +};
1067 +
1068 +#endif
1069 +#endif