1 --- include/attributes.h 2006-06-16 20:12:26.000000000 +0200
2 +++ libmpeg2/attributes.h 2006-06-16 20:12:50.000000000 +0200
4 #ifdef ATTRIBUTE_ALIGNED_MAX
5 #define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align)))
7 -#define ATTR_ALIGN(align)
8 +#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((16 < align) ? 16 : align)))
11 #ifdef HAVE_BUILTIN_EXPECT
12 --- libmpeg2/cpu_accel.c 2006-06-16 20:12:26.000000000 +0200
13 +++ libmpeg2/cpu_accel.c 2006-06-16 20:12:50.000000000 +0200
18 +#include "cpudetect.h"
23 #include "mpeg2_internal.h"
27 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
29 +/* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow!
30 + * instructions via assembly. However, it is regarded as duplicaed work
31 + * in MPlayer, so that we enforce to use MPlayer's implementation.
33 +#define USE_MPLAYER_CPUDETECT
35 static inline uint32_t arch_accel (void)
37 +#if !defined(USE_MPLAYER_CPUDETECT)
38 uint32_t eax, ebx, ecx, edx;
42 caps |= MPEG2_ACCEL_X86_MMXEXT;
45 +#else /* USE_MPLAYER_CPUDETECT: Use MPlayer's cpu capability property */
47 + if (gCpuCaps.hasMMX)
48 + caps |= MPEG2_ACCEL_X86_MMX;
49 + if (gCpuCaps.hasSSE2)
50 + caps |= MPEG2_ACCEL_X86_SSE2;
51 + if (gCpuCaps.hasMMX2)
52 + caps |= MPEG2_ACCEL_X86_MMXEXT;
53 + if (gCpuCaps.has3DNow)
54 + caps |= MPEG2_ACCEL_X86_3DNOW;
58 +#endif /* USE_MPLAYER_CPUDETECT */
60 -#endif /* ARCH_X86 */
61 +#endif /* ARCH_X86 || ARCH_X86_64 */
63 -#if defined(ARCH_PPC) || defined(ARCH_SPARC)
64 +#if defined(ARCH_PPC) || (defined(ARCH_SPARC) && defined(HAVE_VIS))
72 -#ifdef HAVE_ALTIVEC_H /* gnu */
73 -#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
75 +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */
76 #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
78 +#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
80 asm volatile ("mtspr 256, %0\n\t"
84 static inline uint32_t arch_accel (void)
86 +#ifdef CAN_COMPILE_ALPHA_MVI
89 asm volatile ("amask %1, %0"
91 : "rI" (256)); /* AMASK_MVI */
92 return no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA |
93 MPEG2_ACCEL_ALPHA_MVI);
95 + return MPEG2_ACCEL_ALPHA;
98 #endif /* ARCH_ALPHA */
99 #endif /* ACCEL_DETECT */
104 -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
105 +#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
106 accel = arch_accel ();
109 --- libmpeg2/cpu_state.c 2006-06-16 20:12:26.000000000 +0200
110 +++ libmpeg2/cpu_state.c 2006-06-16 20:12:50.000000000 +0200
113 #include "attributes.h"
114 #include "mpeg2_internal.h"
116 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
120 void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
121 void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
124 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
125 static void state_restore_mmx (cpu_state_t * state)
132 +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC)
133 -#ifdef HAVE_ALTIVEC_H /* gnu */
134 -#define LI(a,b) "li " #a "," #b "\n\t"
135 -#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
136 -#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
137 -#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
138 -#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
140 +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */
141 #define LI(a,b) "li r" #a "," #b "\n\t"
142 #define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t"
143 #define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t"
144 #define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t"
145 #define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t"
147 +#define LI(a,b) "li " #a "," #b "\n\t"
148 +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
149 +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
150 +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
151 +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
154 static void state_save_altivec (cpu_state_t * state)
155 @@ -115,12 +119,12 @@
157 void mpeg2_cpu_state_init (uint32_t accel)
160 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
161 if (accel & MPEG2_ACCEL_X86_MMX) {
162 mpeg2_cpu_state_restore = state_restore_mmx;
166 +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC)
167 if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
168 mpeg2_cpu_state_save = state_save_altivec;
169 mpeg2_cpu_state_restore = state_restore_altivec;
170 --- libmpeg2/decode.c 2006-06-16 20:12:26.000000000 +0200
171 +++ libmpeg2/decode.c 2006-06-16 20:12:50.000000000 +0200
173 fbuf->buf[1] = buf[1];
174 fbuf->buf[2] = buf[2];
176 + // HACK! FIXME! At first I frame, copy pointers to prediction frame too!
177 + if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){
178 + mpeg2dec->fbuf[1]->buf[0]=buf[0];
179 + mpeg2dec->fbuf[1]->buf[1]=buf[1];
180 + mpeg2dec->fbuf[1]->buf[2]=buf[2];
181 + mpeg2dec->fbuf[1]->id=NULL;
183 +// printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n",
184 +// mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]);
187 void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf)
188 --- libmpeg2/header.c 2006-06-16 20:12:26.000000000 +0200
189 +++ libmpeg2/header.c 2006-06-16 20:12:50.000000000 +0200
191 mpeg2dec->decoder.convert = NULL;
192 mpeg2dec->decoder.convert_id = NULL;
193 mpeg2dec->picture = mpeg2dec->pictures;
194 + memset(&mpeg2dec->fbuf_alloc[0].fbuf, 0, sizeof(mpeg2_fbuf_t));
195 + memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t));
196 + memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t));
197 mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf;
198 mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf;
199 mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf;
201 if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) {
202 picture->nb_fields = (buffer[3] & 2) ? 3 : 2;
203 flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0;
204 + flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0;
206 picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2;
209 mpeg2dec->scaled[index] = mpeg2dec->q_scale_type;
210 for (i = 0; i < 32; i++) {
211 k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1);
212 + decoder->quantizer_scales[i] = k;
213 for (j = 0; j < 64; j++)
214 decoder->quantizer_prescale[index][i][j] =
215 k * mpeg2dec->quantizer_matrix[index][j];
216 --- libmpeg2/idct.c 2006-06-16 20:12:26.000000000 +0200
217 +++ libmpeg2/idct.c 2006-06-16 20:12:50.000000000 +0200
218 @@ -239,12 +239,15 @@
220 void mpeg2_idct_init (uint32_t accel)
224 if (accel & MPEG2_ACCEL_X86_MMXEXT) {
225 mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
226 mpeg2_idct_add = mpeg2_idct_add_mmxext;
227 mpeg2_idct_mmx_init ();
228 - } else if (accel & MPEG2_ACCEL_X86_MMX) {
232 + if (accel & MPEG2_ACCEL_X86_MMX) {
233 mpeg2_idct_copy = mpeg2_idct_copy_mmx;
234 mpeg2_idct_add = mpeg2_idct_add_mmx;
235 mpeg2_idct_mmx_init ();
236 @@ -254,11 +261,14 @@
240 +#ifdef CAN_COMPILE_ALPHA_MVI
241 if (accel & MPEG2_ACCEL_ALPHA_MVI) {
242 mpeg2_idct_copy = mpeg2_idct_copy_mvi;
243 mpeg2_idct_add = mpeg2_idct_add_mvi;
244 mpeg2_idct_alpha_init ();
245 - } else if (accel & MPEG2_ACCEL_ALPHA) {
248 + if (accel & MPEG2_ACCEL_ALPHA) {
251 mpeg2_idct_copy = mpeg2_idct_copy_alpha;
252 --- libmpeg2/idct_alpha.c 2006-06-16 20:12:26.000000000 +0200
253 +++ libmpeg2/idct_alpha.c 2006-06-16 20:12:50.000000000 +0200
255 block[8*7] = (a0 - b0) >> 17;
258 +#ifdef CAN_COMPILE_ALPHA_MVI
259 void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
263 stq (p7, dest + 7 * stride);
268 void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride)
270 --- libmpeg2/idct_mmx.c 2006-06-16 20:12:26.000000000 +0200
271 +++ libmpeg2/idct_mmx.c 2006-06-16 20:12:50.000000000 +0200
277 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
279 #include <inttypes.h>
281 --- libmpeg2/motion_comp.c 2006-06-16 20:12:26.000000000 +0200
282 +++ libmpeg2/motion_comp.c 2006-06-16 20:12:50.000000000 +0200
285 void mpeg2_mc_init (uint32_t accel)
289 if (accel & MPEG2_ACCEL_X86_MMXEXT)
290 mpeg2_mc = mpeg2_mc_mmxext;
291 - else if (accel & MPEG2_ACCEL_X86_3DNOW)
295 + if (accel & MPEG2_ACCEL_X86_3DNOW)
296 mpeg2_mc = mpeg2_mc_3dnow;
297 - else if (accel & MPEG2_ACCEL_X86_MMX)
301 + if (accel & MPEG2_ACCEL_X86_MMX)
302 mpeg2_mc = mpeg2_mc_mmx;
306 +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC)
307 if (accel & MPEG2_ACCEL_PPC_ALTIVEC)
308 mpeg2_mc = mpeg2_mc_altivec;
311 mpeg2_mc = mpeg2_mc_alpha;
315 +#if defined(ARCH_SPARC) && defined(HAVE_VIS)
316 if (accel & MPEG2_ACCEL_SPARC_VIS)
317 mpeg2_mc = mpeg2_mc_vis;
320 mpeg2_mc = mpeg2_mc_vis;
324 + if (1 /*accel & MPEG2_ACCEL_ARM*/) {
326 + if (1 /*accel & MPEG2_ACCEL_ARM_IWMMXT*/)
327 + mpeg2_mc = mpeg2_mc_iwmmxt;
330 + mpeg2_mc = mpeg2_mc_arm;
333 mpeg2_mc = mpeg2_mc_c;
336 --- libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:26.000000000 +0200
337 +++ libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:50.000000000 +0200
343 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
345 #include <inttypes.h>
347 --- include/mpeg2.h 2006-06-16 20:12:26.000000000 +0200
348 +++ libmpeg2/mpeg2.h 2006-06-16 20:12:50.000000000 +0200
350 #define PIC_FLAG_COMPOSITE_DISPLAY 32
351 #define PIC_FLAG_SKIP 64
352 #define PIC_FLAG_TAGS 128
353 +#define PIC_FLAG_REPEAT_FIRST_FIELD 256
354 #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000
356 typedef struct mpeg2_picture_s {
358 #define MPEG2_ACCEL_X86_MMX 1
359 #define MPEG2_ACCEL_X86_3DNOW 2
360 #define MPEG2_ACCEL_X86_MMXEXT 4
361 +#define MPEG2_ACCEL_X86_SSE2 8
362 #define MPEG2_ACCEL_PPC_ALTIVEC 1
363 #define MPEG2_ACCEL_ALPHA 1
364 #define MPEG2_ACCEL_ALPHA_MVI 2
365 --- libmpeg2/mpeg2_internal.h 2006-06-16 20:12:26.000000000 +0200
366 +++ libmpeg2/mpeg2_internal.h 2006-06-16 20:12:50.000000000 +0200
373 + int quantizer_scales[32];
374 + int quantizer_scale;
381 int8_t q_scale_type, scaled[4];
382 uint8_t quantizer_matrix[4][64];
383 uint8_t new_quantizer_matrix[4][64];
386 + unsigned char *pending_buffer;
387 + int pending_length;
392 extern mpeg2_mc_t mpeg2_mc_altivec;
393 extern mpeg2_mc_t mpeg2_mc_alpha;
394 extern mpeg2_mc_t mpeg2_mc_vis;
395 +extern mpeg2_mc_t mpeg2_mc_arm;
396 +extern mpeg2_mc_t mpeg2_mc_iwmmxt;
397 --- libmpeg2/slice.c 2006-06-16 20:12:26.000000000 +0200
398 +++ libmpeg2/slice.c 2006-06-16 20:12:50.000000000 +0200
401 quantizer_scale_code = UBITS (bit_buf, 5);
402 DUMPBITS (bit_buf, bits, 5);
403 + decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code];
405 decoder->quantizer_matrix[0] =
406 decoder->quantizer_prescale[0][quantizer_scale_code];
407 @@ -1568,6 +1569,18 @@
409 #define NEXT_MACROBLOCK \
411 + if(decoder->quant_store) { \
412 + if (decoder->picture_structure == TOP_FIELD) \
413 + decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \
414 + +(decoder->offset>>4)] = decoder->quantizer_scale; \
415 + else if (decoder->picture_structure == BOTTOM_FIELD) \
416 + decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \
417 + + decoder->quant_stride \
418 + +(decoder->offset>>4)] = decoder->quantizer_scale; \
420 + decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \
421 + +(decoder->offset>>4)] = decoder->quantizer_scale; \
423 decoder->offset += 16; \
424 if (decoder->offset == decoder->width) { \
425 do { /* just so we can use the break statement */ \
426 @@ -1604,6 +1604,12 @@
430 +static void motion_dummy (mpeg2_decoder_t * const decoder,
431 + motion_t * const motion,
432 + mpeg2_mc_fct * const * const table)
436 void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
437 uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
439 @@ -1661,7 +1667,9 @@
441 if (decoder->mpeg1) {
442 decoder->motion_parser[0] = motion_zero_420;
443 + decoder->motion_parser[MC_FIELD] = motion_dummy;
444 decoder->motion_parser[MC_FRAME] = motion_mp1;
445 + decoder->motion_parser[MC_DMV] = motion_dummy;
446 decoder->motion_parser[4] = motion_reuse_420;
447 } else if (decoder->picture_structure == FRAME_PICTURE) {
448 if (decoder->chroma_format == 0) {
449 --- libmpeg2/idct.c 2006-06-16 20:12:26.000000000 +0200
450 +++ libmpeg2/idct.c 2006-06-16 20:12:50.000000000 +0200
452 mpeg2_idct_mmx_init ();
456 +#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC)
457 if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
458 mpeg2_idct_copy = mpeg2_idct_copy_altivec;
459 mpeg2_idct_add = mpeg2_idct_add_altivec;
460 --- libmpeg2/idct_altivec.c 2004/08/02 11:26:43 12933
461 +++ libmpeg2/idct_altivec.c 2005/05/15 20:11:34 15484
463 typedef vector signed int vector_s32_t;
464 typedef vector unsigned int vector_u32_t;
466 -#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
467 +#if defined( HAVE_ALTIVEC_H ) && !defined( __APPLE_ALTIVEC__ ) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
468 /* work around gcc <3.3 vec_mergel bug */
469 static inline vector_s16_t my_vec_mergel (vector_s16_t const A,
470 vector_s16_t const B)
472 #define vec_mergel my_vec_mergel
475 -#ifdef HAVE_ALTIVEC_H /* gnu */
476 -#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
478 +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */
479 #define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h)
481 +#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
484 static const vector_s16_t constants ATTR_ALIGN(16) =
485 Index: libmpeg2/motion_comp_arm.c
486 ===================================================================
487 --- libmpeg2/motion_comp_arm.c (revision 0)
488 +++ libmpeg2/motion_comp_arm.c (revision 0)
491 + * motion_comp_arm.c
492 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
494 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
495 + * See http://libmpeg2.sourceforge.net/ for updates.
497 + * mpeg2dec is free software; you can redistribute it and/or modify
498 + * it under the terms of the GNU General Public License as published by
499 + * the Free Software Foundation; either version 2 of the License, or
500 + * (at your option) any later version.
502 + * mpeg2dec is distributed in the hope that it will be useful,
503 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
504 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
505 + * GNU General Public License for more details.
507 + * You should have received a copy of the GNU General Public License
508 + * along with this program; if not, write to the Free Software
509 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
516 +#include <inttypes.h>
519 +#include "attributes.h"
520 +#include "mpeg2_internal.h"
522 +#define avg2(a,b) ((a+b+1)>>1)
523 +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
525 +#define predict_o(i) (ref[i])
526 +#define predict_x(i) (avg2 (ref[i], ref[i+1]))
527 +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
528 +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
529 + (ref+stride)[i], (ref+stride)[i+1]))
531 +#define put(predictor,i) dest[i] = predictor (i)
532 +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
534 +/* mc function template */
536 +#define MC_FUNC(op,xy) \
537 +static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \
538 + const int stride, int height) \
541 + op (predict_##xy, 0); \
542 + op (predict_##xy, 1); \
543 + op (predict_##xy, 2); \
544 + op (predict_##xy, 3); \
545 + op (predict_##xy, 4); \
546 + op (predict_##xy, 5); \
547 + op (predict_##xy, 6); \
548 + op (predict_##xy, 7); \
549 + op (predict_##xy, 8); \
550 + op (predict_##xy, 9); \
551 + op (predict_##xy, 10); \
552 + op (predict_##xy, 11); \
553 + op (predict_##xy, 12); \
554 + op (predict_##xy, 13); \
555 + op (predict_##xy, 14); \
556 + op (predict_##xy, 15); \
559 + } while (--height); \
561 +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \
562 + const int stride, int height) \
565 + op (predict_##xy, 0); \
566 + op (predict_##xy, 1); \
567 + op (predict_##xy, 2); \
568 + op (predict_##xy, 3); \
569 + op (predict_##xy, 4); \
570 + op (predict_##xy, 5); \
571 + op (predict_##xy, 6); \
572 + op (predict_##xy, 7); \
575 + } while (--height); \
577 +/* definitions of the actual mc functions */
589 +extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref,
590 + int stride, int height);
592 +extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref,
593 + int stride, int height);
596 +static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref,
597 + int stride, int height)
599 + MC_put_y_16_c(dest, ref, stride, height);
602 +static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref,
603 + int stride, int height)
605 + MC_put_xy_16_c(dest, ref, stride, height);
608 +extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref,
609 + int stride, int height);
611 +extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref,
612 + int stride, int height);
614 +static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref,
615 + int stride, int height)
617 + MC_put_y_8_c(dest, ref, stride, height);
620 +static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref,
621 + int stride, int height)
623 + MC_put_xy_8_c(dest, ref, stride, height);
626 +static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref,
627 + int stride, int height)
629 + MC_avg_o_16_c(dest, ref, stride, height);
632 +static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref,
633 + int stride, int height)
635 + MC_avg_x_16_c(dest, ref, stride, height);
638 +static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref,
639 + int stride, int height)
641 + MC_avg_y_16_c(dest, ref, stride, height);
644 +static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref,
645 + int stride, int height)
647 + MC_avg_xy_16_c(dest, ref, stride, height);
650 +static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref,
651 + int stride, int height)
653 + MC_avg_o_8_c(dest, ref, stride, height);
656 +static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref,
657 + int stride, int height)
659 + MC_avg_x_8_c(dest, ref, stride, height);
662 +static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref,
663 + int stride, int height)
665 + MC_avg_y_8_c(dest, ref, stride, height);
668 +static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref,
669 + int stride, int height)
671 + MC_avg_xy_8_c(dest, ref, stride, height);
674 +MPEG2_MC_EXTERN (arm)
677 Index: libmpeg2/motion_comp_arm_s.S
678 ===================================================================
679 --- libmpeg2/motion_comp_arm_s.S (revision 0)
680 +++ libmpeg2/motion_comp_arm_s.S (revision 0)
682 +@ motion_comp_arm_s.S
683 +@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
685 +@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
686 +@ See http://libmpeg2.sourceforge.net/ for updates.
688 +@ mpeg2dec is free software; you can redistribute it and/or modify
689 +@ it under the terms of the GNU General Public License as published by
690 +@ the Free Software Foundation; either version 2 of the License, or
691 +@ (at your option) any later version.
693 +@ mpeg2dec is distributed in the hope that it will be useful,
694 +@ but WITHOUT ANY WARRANTY; without even the implied warranty of
695 +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
696 +@ GNU General Public License for more details.
698 +@ You should have received a copy of the GNU General Public License
699 +@ along with this program; if not, write to the Free Software
700 +@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
704 +@ ----------------------------------------------------------------
706 + .global MC_put_o_16_arm
708 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
710 + stmfd sp!, {r4-r11, lr} @ R14 is also called LR
712 + adr r5, MC_put_o_16_arm_align_jt
713 + add r5, r5, r4, lsl #2
716 +MC_put_o_16_arm_align0:
723 + bne MC_put_o_16_arm_align0
724 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
729 + mov r9, r4, lsr #(\shift)
731 + mov r10, r5, lsr #(\shift)
732 + orr r9, r9, r5, lsl #(32-\shift)
733 + mov r11, r6, lsr #(\shift)
734 + orr r10, r10, r6, lsl #(32-\shift)
735 + mov r12, r7, lsr #(\shift)
736 + orr r11, r11, r7, lsl #(32-\shift)
737 + orr r12, r12, r8, lsl #(32-\shift)
743 +MC_put_o_16_arm_align1:
744 + and r1, r1, #0xFFFFFFFC
747 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
748 +MC_put_o_16_arm_align2:
749 + and r1, r1, #0xFFFFFFFC
752 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
753 +MC_put_o_16_arm_align3:
754 + and r1, r1, #0xFFFFFFFC
757 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
758 +MC_put_o_16_arm_align_jt:
759 + .word MC_put_o_16_arm_align0
760 + .word MC_put_o_16_arm_align1
761 + .word MC_put_o_16_arm_align2
762 + .word MC_put_o_16_arm_align3
764 +@ ----------------------------------------------------------------
766 + .global MC_put_o_8_arm
768 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
770 + stmfd sp!, {r4-r10, lr} @ R14 is also called LR
772 + adr r5, MC_put_o_8_arm_align_jt
773 + add r5, r5, r4, lsl #2
775 +MC_put_o_8_arm_align0:
782 + bne MC_put_o_8_arm_align0
783 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
788 + mov r9, r4, lsr #(\shift)
790 + mov r10, r5, lsr #(\shift)
791 + orr r9, r9, r5, lsl #(32-\shift)
792 + orr r10, r10, r6, lsl #(32-\shift)
798 +MC_put_o_8_arm_align1:
799 + and r1, r1, #0xFFFFFFFC
802 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
804 +MC_put_o_8_arm_align2:
805 + and r1, r1, #0xFFFFFFFC
808 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
810 +MC_put_o_8_arm_align3:
811 + and r1, r1, #0xFFFFFFFC
814 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
816 +MC_put_o_8_arm_align_jt:
817 + .word MC_put_o_8_arm_align0
818 + .word MC_put_o_8_arm_align1
819 + .word MC_put_o_8_arm_align2
820 + .word MC_put_o_8_arm_align3
822 +@ ----------------------------------------------------------------
823 +.macro AVG_PW rW1, rW2
824 + mov \rW2, \rW2, lsl #24
825 + orr \rW2, \rW2, \rW1, lsr #8
827 + and \rW2, \rW1, \rW2
829 + add \rW2, \rW2, r10, lsr #1
831 + add \rW2, \rW2, r10
835 + .global MC_put_x_16_arm
837 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
839 + stmfd sp!, {r4-r11,lr} @ R14 is also called LR
841 + adr r5, MC_put_x_16_arm_align_jt
844 + add r5, r5, r4, lsl #2
847 +.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
848 + mov \R0, \R0, lsr #(\shift)
849 + orr \R0, \R0, \R1, lsl #(32 - \shift)
850 + mov \R1, \R1, lsr #(\shift)
851 + orr \R1, \R1, \R2, lsl #(32 - \shift)
852 + mov \R2, \R2, lsr #(\shift)
853 + orr \R2, \R2, \R3, lsl #(32 - \shift)
854 + mov \R3, \R3, lsr #(\shift)
855 + orr \R3, \R3, \R4, lsl #(32 - \shift)
856 + mov \R4, \R4, lsr #(\shift)
857 +@ and \R4, \R4, #0xFF
860 +MC_put_x_16_arm_align0:
871 + bne MC_put_x_16_arm_align0
872 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
873 +MC_put_x_16_arm_align1:
874 + and r1, r1, #0xFFFFFFFC
875 +1: ldmia r1, {r4-r8}
878 + ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8
887 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
888 +MC_put_x_16_arm_align2:
889 + and r1, r1, #0xFFFFFFFC
890 +1: ldmia r1, {r4-r8}
893 + ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8
902 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
903 +MC_put_x_16_arm_align3:
904 + and r1, r1, #0xFFFFFFFC
905 +1: ldmia r1, {r4-r8}
908 + ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8
917 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
918 +MC_put_x_16_arm_align_jt:
920 + .word MC_put_x_16_arm_align0
921 + .word MC_put_x_16_arm_align1
922 + .word MC_put_x_16_arm_align2
923 + .word MC_put_x_16_arm_align3
925 +@ ----------------------------------------------------------------
927 + .global MC_put_x_8_arm
929 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
931 + stmfd sp!, {r4-r11,lr} @ R14 is also called LR
933 + adr r5, MC_put_x_8_arm_align_jt
936 + add r5, r5, r4, lsl #2
939 +.macro ADJ_ALIGN_DW shift, R0, R1, R2
940 + mov \R0, \R0, lsr #(\shift)
941 + orr \R0, \R0, \R1, lsl #(32 - \shift)
942 + mov \R1, \R1, lsr #(\shift)
943 + orr \R1, \R1, \R2, lsl #(32 - \shift)
944 + mov \R2, \R2, lsr #(\shift)
945 +@ and \R4, \R4, #0xFF
948 +MC_put_x_8_arm_align0:
957 + bne MC_put_x_8_arm_align0
958 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
959 +MC_put_x_8_arm_align1:
960 + and r1, r1, #0xFFFFFFFC
961 +1: ldmia r1, {r4-r6}
964 + ADJ_ALIGN_DW 8, r4, r5, r6
971 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
972 +MC_put_x_8_arm_align2:
973 + and r1, r1, #0xFFFFFFFC
974 +1: ldmia r1, {r4-r6}
977 + ADJ_ALIGN_DW 16, r4, r5, r6
984 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
985 +MC_put_x_8_arm_align3:
986 + and r1, r1, #0xFFFFFFFC
987 +1: ldmia r1, {r4-r6}
990 + ADJ_ALIGN_DW 24, r4, r5, r6
997 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
998 +MC_put_x_8_arm_align_jt:
1000 + .word MC_put_x_8_arm_align0
1001 + .word MC_put_x_8_arm_align1
1002 + .word MC_put_x_8_arm_align2
1003 + .word MC_put_x_8_arm_align3
1004 Index: libmpeg2/motion_comp_iwmmxt.c
1005 ===================================================================
1006 --- libmpeg2/motion_comp_iwmmxt.c (revision 0)
1007 +++ libmpeg2/motion_comp_iwmmxt.c (revision 0)
1010 + * motion_comp_iwmmxt.c
1011 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
1013 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
1014 + * See http://libmpeg2.sourceforge.net/ for updates.
1016 + * mpeg2dec is free software; you can redistribute it and/or modify
1017 + * it under the terms of the GNU General Public License as published by
1018 + * the Free Software Foundation; either version 2 of the License, or
1019 + * (at your option) any later version.
1021 + * mpeg2dec is distributed in the hope that it will be useful,
1022 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1023 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1024 + * GNU General Public License for more details.
1026 + * You should have received a copy of the GNU General Public License
1027 + * along with this program; if not, write to the Free Software
1028 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1031 +#include "config.h"
1036 +#include <inttypes.h>
1039 +#include "attributes.h"
1040 +#include "mpeg2_internal.h"
1042 +/* defined in libavcodec */
1044 +extern void put_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1045 +extern void put_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1046 +extern void put_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1047 +extern void put_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1048 +extern void put_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1049 +extern void put_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1050 +extern void put_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1051 +extern void put_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1052 +extern void avg_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1053 +extern void avg_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1054 +extern void avg_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1055 +extern void avg_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1056 +extern void avg_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1057 +extern void avg_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1058 +extern void avg_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1059 +extern void avg_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
1061 +mpeg2_mc_t mpeg2_mc_iwmmxt = {
1062 + {put_pixels16_iwmmxt, put_pixels16_x2_iwmmxt, put_pixels16_y2_iwmmxt, put_pixels16_xy2_iwmmxt,
1063 + put_pixels8_iwmmxt, put_pixels8_x2_iwmmxt, put_pixels8_y2_iwmmxt, put_pixels8_xy2_iwmmxt}, \
1064 + {avg_pixels16_iwmmxt, avg_pixels16_x2_iwmmxt, avg_pixels16_y2_iwmmxt, avg_pixels16_xy2_iwmmxt,
1065 + avg_pixels8_iwmmxt, avg_pixels8_x2_iwmmxt, avg_pixels8_y2_iwmmxt, avg_pixels8_xy2_iwmmxt}, \