sync mga_vid.h to revision 265 from the mga_vid repo
[mplayer/glamo.git] / libmpeg2 / libmpeg-0.4.1.diff
blobb54b28a95290fb0fe380b3eace9b9b8312969e8d
1 --- libmpeg2/cpu_accel.c 2006-06-16 20:12:26.000000000 +0200
2 +++ libmpeg2/cpu_accel.c 2006-06-16 20:12:50.000000000 +0200
3 @@ -22,6 +26,7 @@
4 */
6 #include "config.h"
7 +#include "cpudetect.h"
9 #include <inttypes.h>
11 @@ -30,9 +35,17 @@
12 #include "mpeg2_internal.h"
14 #ifdef ACCEL_DETECT
15 -#ifdef ARCH_X86
16 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
18 +/* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow!
19 + * instructions via assembly. However, it is regarded as duplicated work
20 + * in MPlayer, so that we enforce using MPlayer's implementation.
21 + */
22 +#define USE_MPLAYER_CPUDETECT
24 static inline uint32_t arch_accel (void)
26 +#if !defined(USE_MPLAYER_CPUDETECT)
27 uint32_t eax, ebx, ecx, edx;
28 int AMD;
29 uint32_t caps;
30 @@ -105,7 +120,21 @@
31 caps |= MPEG2_ACCEL_X86_MMXEXT;
33 return caps;
34 +#else /* USE_MPLAYER_CPUDETECT: Use MPlayer's CPU capability property. */
35 + caps = 0;
36 + if (gCpuCaps.hasMMX)
37 + caps |= MPEG2_ACCEL_X86_MMX;
38 + if (gCpuCaps.hasSSE2)
39 + caps |= MPEG2_ACCEL_X86_SSE2;
40 + if (gCpuCaps.hasMMX2)
41 + caps |= MPEG2_ACCEL_X86_MMXEXT;
42 + if (gCpuCaps.has3DNow)
43 + caps |= MPEG2_ACCEL_X86_3DNOW;
45 + return caps;
47 +#endif /* USE_MPLAYER_CPUDETECT */
49 -#endif /* ARCH_X86 */
50 +#endif /* ARCH_X86 || ARCH_X86_64 */
52 #if defined(ARCH_PPC) || defined(ARCH_SPARC)
53 @@ -166,10 +168,10 @@
55 canjump = 1;
57 -#ifdef HAVE_ALTIVEC_H /* gnu */
58 -#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
59 -#else /* apple */
60 +#if defined(__APPLE_CC__) /* apple */
61 #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
62 +#else /* gnu */
63 +#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
64 #endif
65 asm volatile ("mtspr 256, %0\n\t"
66 VAND (0, 0, 0)
67 @@ -212,7 +241,7 @@
69 accel = 0;
70 #ifdef ACCEL_DETECT
71 -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
72 +#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
73 accel = arch_accel ();
74 #endif
75 #endif
76 --- libmpeg2/cpu_state.c 2006-06-16 20:12:26.000000000 +0200
77 +++ libmpeg2/cpu_state.c 2006-06-16 20:12:50.000000000 +0200
78 @@ -29,14 +33,14 @@
79 #include "mpeg2.h"
80 #include "attributes.h"
81 #include "mpeg2_internal.h"
82 -#ifdef ARCH_X86
83 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
84 #include "mmx.h"
85 #endif
87 void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
88 void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
90 -#ifdef ARCH_X86
91 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
92 static void state_restore_mmx (cpu_state_t * state)
94 emms ();
95 @@ -48,18 +48,18 @@
96 #endif
98 #ifdef ARCH_PPC
99 -#ifdef HAVE_ALTIVEC_H /* gnu */
100 -#define LI(a,b) "li " #a "," #b "\n\t"
101 -#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
102 -#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
103 -#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
104 -#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
105 -#else /* apple */
106 +#if defined(__APPLE_CC__) /* apple */
107 #define LI(a,b) "li r" #a "," #b "\n\t"
108 #define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t"
109 #define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t"
110 #define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t"
111 #define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t"
112 +#else /* gnu */
113 +#define LI(a,b) "li " #a "," #b "\n\t"
114 +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
115 +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
116 +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
117 +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
118 #endif
120 static void state_save_altivec (cpu_state_t * state)
121 @@ -115,9 +119,9 @@
123 void mpeg2_cpu_state_init (uint32_t accel)
125 -#ifdef ARCH_X86
126 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
127 if (accel & MPEG2_ACCEL_X86_MMX) {
128 mpeg2_cpu_state_restore = state_restore_mmx;
130 #endif
131 #ifdef ARCH_PPC
132 --- libmpeg2/decode.c 2006-06-16 20:12:26.000000000 +0200
133 +++ libmpeg2/decode.c 2006-06-16 20:12:50.000000000 +0200
134 @@ -351,6 +355,15 @@
135 fbuf->buf[1] = buf[1];
136 fbuf->buf[2] = buf[2];
137 fbuf->id = id;
138 + // HACK! FIXME! At first I frame, copy pointers to prediction frame too!
139 + if (mpeg2dec->custom_fbuf && !mpeg2dec->fbuf[1]->buf[0]){
140 + mpeg2dec->fbuf[1]->buf[0]=buf[0];
141 + mpeg2dec->fbuf[1]->buf[1]=buf[1];
142 + mpeg2dec->fbuf[1]->buf[2]=buf[2];
143 + mpeg2dec->fbuf[1]->id=NULL;
145 +// printf("libmpeg2: FBUF 0:%p 1:%p 2:%p\n",
146 +// mpeg2dec->fbuf[0]->buf[0],mpeg2dec->fbuf[1]->buf[0],mpeg2dec->fbuf[2]->buf[0]);
149 void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf)
150 --- libmpeg2/header.c 2006-06-16 20:12:26.000000000 +0200
151 +++ libmpeg2/header.c 2006-06-16 20:12:50.000000000 +0200
152 @@ -100,6 +104,9 @@
153 mpeg2dec->decoder.convert = NULL;
154 mpeg2dec->decoder.convert_id = NULL;
155 mpeg2dec->picture = mpeg2dec->pictures;
156 + memset(&mpeg2dec->fbuf_alloc[0].fbuf, 0, sizeof(mpeg2_fbuf_t));
157 + memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t));
158 + memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t));
159 mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf;
160 mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf;
161 mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf;
162 @@ -551,6 +558,7 @@
163 if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) {
164 picture->nb_fields = (buffer[3] & 2) ? 3 : 2;
165 flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0;
166 + flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0;
167 } else
168 picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2;
169 break;
170 @@ -799,6 +807,7 @@
171 mpeg2dec->scaled[index] = mpeg2dec->q_scale_type;
172 for (i = 0; i < 32; i++) {
173 k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1);
174 + decoder->quantizer_scales[i] = k;
175 for (j = 0; j < 64; j++)
176 decoder->quantizer_prescale[index][i][j] =
177 k * mpeg2dec->quantizer_matrix[index][j];
178 --- libmpeg2/idct.c (revision 26652)
179 +++ libmpeg2/idct.c (working copy)
180 @@ -250,7 +254,7 @@
181 mpeg2_idct_mmx_init ();
182 } else
183 #endif
184 -#ifdef ARCH_PPC
185 +#ifdef HAVE_ALTIVEC
186 if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
187 mpeg2_idct_copy = mpeg2_idct_copy_altivec;
188 mpeg2_idct_add = mpeg2_idct_add_altivec;
189 --- libmpeg2/idct_mmx.c 2006-06-16 20:12:26.000000000 +0200
190 +++ libmpeg2/idct_mmx.c 2006-06-16 20:12:50.000000000 +0200
191 @@ -23,7 +27,7 @@
193 #include "config.h"
195 -#ifdef ARCH_X86
196 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
198 #include <inttypes.h>
200 --- libmpeg2/motion_comp.c 2006-06-16 20:12:26.000000000 +0200
201 +++ libmpeg2/motion_comp.c 2006-06-16 20:12:50.000000000 +0200
202 @@ -46,7 +46,7 @@
203 mpeg2_mc = mpeg2_mc_mmx;
204 else
205 #endif
206 -#ifdef ARCH_PPC
207 +#ifdef HAVE_ALTIVEC
208 if (accel & MPEG2_ACCEL_PPC_ALTIVEC)
209 mpeg2_mc = mpeg2_mc_altivec;
210 else
211 @@ -67,6 +61,13 @@
212 mpeg2_mc = mpeg2_mc_vis;
213 else
214 #endif
215 +#ifdef ARCH_ARM
216 + if (accel & MPEG2_ACCEL_ARM_IWMMXT)
217 + mpeg2_mc = mpeg2_mc_iwmmxt;
218 + else if (accel & MPEG2_ACCEL_ARM)
219 + mpeg2_mc = mpeg2_mc_arm;
220 + else
221 +#endif
222 mpeg2_mc = mpeg2_mc_c;
225 --- libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:26.000000000 +0200
226 +++ libmpeg2/motion_comp_mmx.c 2006-06-16 20:12:50.000000000 +0200
227 @@ -23,7 +27,7 @@
229 #include "config.h"
231 -#ifdef ARCH_X86
232 +#if defined(ARCH_X86) || defined(ARCH_X86_64)
234 #include <inttypes.h>
236 --- include/mpeg2.h 2006-06-16 20:12:26.000000000 +0200
237 +++ libmpeg2/mpeg2.h 2006-06-16 20:12:50.000000000 +0200
238 @@ -82,6 +86,7 @@
239 #define PIC_FLAG_COMPOSITE_DISPLAY 32
240 #define PIC_FLAG_SKIP 64
241 #define PIC_FLAG_TAGS 128
242 +#define PIC_FLAG_REPEAT_FIRST_FIELD 256
243 #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000
245 typedef struct mpeg2_picture_s {
246 @@ -156,10 +160,13 @@
247 #define MPEG2_ACCEL_X86_3DNOW 2
248 #define MPEG2_ACCEL_X86_MMXEXT 4
249 +#define MPEG2_ACCEL_X86_SSE2 8
250 #define MPEG2_ACCEL_PPC_ALTIVEC 1
251 #define MPEG2_ACCEL_ALPHA 1
252 #define MPEG2_ACCEL_ALPHA_MVI 2
253 #define MPEG2_ACCEL_SPARC_VIS 1
254 #define MPEG2_ACCEL_SPARC_VIS2 2
255 +#define MPEG2_ACCEL_ARM 1
256 +#define MPEG2_ACCEL_ARM_IWMMXT 2
257 #define MPEG2_ACCEL_DETECT 0x80000000
259 uint32_t mpeg2_accel (uint32_t accel);
260 --- libmpeg2/mpeg2_internal.h 2006-06-16 20:12:26.000000000 +0200
261 +++ libmpeg2/mpeg2_internal.h 2006-06-16 20:12:50.000000000 +0200
262 @@ -144,6 +148,11 @@
263 int second_field;
265 int mpeg1;
267 + int quantizer_scales[32];
268 + int quantizer_scale;
269 + char* quant_store;
270 + int quant_stride;
273 typedef struct {
274 @@ -214,6 +224,9 @@
275 int8_t q_scale_type, scaled[4];
276 uint8_t quantizer_matrix[4][64];
277 uint8_t new_quantizer_matrix[4][64];
279 + unsigned char *pending_buffer;
280 + int pending_length;
283 typedef struct {
284 @@ -312,3 +312,5 @@
285 extern mpeg2_mc_t mpeg2_mc_altivec;
286 extern mpeg2_mc_t mpeg2_mc_alpha;
287 extern mpeg2_mc_t mpeg2_mc_vis;
288 +extern mpeg2_mc_t mpeg2_mc_arm;
289 +extern mpeg2_mc_t mpeg2_mc_iwmmxt;
290 --- libmpeg2/slice.c 2006-06-16 20:12:26.000000000 +0200
291 +++ libmpeg2/slice.c 2006-06-16 20:12:50.000000000 +0200
292 @@ -142,6 +146,7 @@
294 quantizer_scale_code = UBITS (bit_buf, 5);
295 DUMPBITS (bit_buf, bits, 5);
296 + decoder->quantizer_scale = decoder->quantizer_scales[quantizer_scale_code];
298 decoder->quantizer_matrix[0] =
299 decoder->quantizer_prescale[0][quantizer_scale_code];
300 @@ -1568,6 +1569,18 @@
302 #define NEXT_MACROBLOCK \
303 do { \
304 + if(decoder->quant_store) { \
305 + if (decoder->picture_structure == TOP_FIELD) \
306 + decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \
307 + +(decoder->offset>>4)] = decoder->quantizer_scale; \
308 + else if (decoder->picture_structure == BOTTOM_FIELD) \
309 + decoder->quant_store[2*decoder->quant_stride*(decoder->v_offset>>4) \
310 + + decoder->quant_stride \
311 + +(decoder->offset>>4)] = decoder->quantizer_scale; \
312 + else \
313 + decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \
314 + +(decoder->offset>>4)] = decoder->quantizer_scale; \
315 + } \
316 decoder->offset += 16; \
317 if (decoder->offset == decoder->width) { \
318 do { /* just so we can use the break statement */ \
319 @@ -1604,6 +1604,12 @@
321 } while (0)
323 +static void motion_dummy (mpeg2_decoder_t * const decoder,
324 + motion_t * const motion,
325 + mpeg2_mc_fct * const * const table)
329 void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
330 uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
332 @@ -1661,7 +1667,9 @@
334 if (decoder->mpeg1) {
335 decoder->motion_parser[0] = motion_zero_420;
336 + decoder->motion_parser[MC_FIELD] = motion_dummy;
337 decoder->motion_parser[MC_FRAME] = motion_mp1;
338 + decoder->motion_parser[MC_DMV] = motion_dummy;
339 decoder->motion_parser[4] = motion_reuse_420;
340 } else if (decoder->picture_structure == FRAME_PICTURE) {
341 if (decoder->chroma_format == 0) {
342 --- libmpeg2/idct_altivec.c 2004/08/02 11:26:43 12933
343 +++ libmpeg2/idct_altivec.c 2005/05/15 20:11:34 15484
344 @@ -41,7 +41,7 @@
345 typedef vector signed int vector_s32_t;
346 typedef vector unsigned int vector_u32_t;
348 -#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
349 +#if defined(HAVE_ALTIVEC_H) && !defined(__APPLE_CC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
350 /* work around gcc <3.3 vec_mergel bug */
351 static inline vector_s16_t my_vec_mergel (vector_s16_t const A,
352 vector_s16_t const B)
353 @@ -56,10 +56,10 @@
354 #define vec_mergel my_vec_mergel
355 #endif
357 -#ifdef HAVE_ALTIVEC_H /* gnu */
358 -#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
359 -#else /* apple */
360 +#if defined(__APPLE_CC__) /* apple */
361 #define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h)
362 +#else /* gnu */
363 +#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
364 #endif
366 static const vector_s16_t constants ATTR_ALIGN(16) =
367 Index: libmpeg2/motion_comp_arm.c
368 ===================================================================
369 --- libmpeg2/motion_comp_arm.c (revision 0)
370 +++ libmpeg2/motion_comp_arm.c (revision 0)
371 @@ -0,0 +1,187 @@
373 + * motion_comp_arm.c
374 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
376 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
377 + * See http://libmpeg2.sourceforge.net/ for updates.
379 + * mpeg2dec is free software; you can redistribute it and/or modify
380 + * it under the terms of the GNU General Public License as published by
381 + * the Free Software Foundation; either version 2 of the License, or
382 + * (at your option) any later version.
384 + * mpeg2dec is distributed in the hope that it will be useful,
385 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
386 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
387 + * GNU General Public License for more details.
389 + * You should have received a copy of the GNU General Public License
390 + * along with this program; if not, write to the Free Software
391 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
392 + */
394 +#include "config.h"
396 +#ifdef ARCH_ARM
398 +#include <inttypes.h>
400 +#include "mpeg2.h"
401 +#include "attributes.h"
402 +#include "mpeg2_internal.h"
404 +#define avg2(a,b) ((a+b+1)>>1)
405 +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
407 +#define predict_o(i) (ref[i])
408 +#define predict_x(i) (avg2 (ref[i], ref[i+1]))
409 +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
410 +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
411 + (ref+stride)[i], (ref+stride)[i+1]))
413 +#define put(predictor,i) dest[i] = predictor (i)
414 +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
416 +/* mc function template */
418 +#define MC_FUNC(op,xy) \
419 +static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \
420 + const int stride, int height) \
421 +{ \
422 + do { \
423 + op (predict_##xy, 0); \
424 + op (predict_##xy, 1); \
425 + op (predict_##xy, 2); \
426 + op (predict_##xy, 3); \
427 + op (predict_##xy, 4); \
428 + op (predict_##xy, 5); \
429 + op (predict_##xy, 6); \
430 + op (predict_##xy, 7); \
431 + op (predict_##xy, 8); \
432 + op (predict_##xy, 9); \
433 + op (predict_##xy, 10); \
434 + op (predict_##xy, 11); \
435 + op (predict_##xy, 12); \
436 + op (predict_##xy, 13); \
437 + op (predict_##xy, 14); \
438 + op (predict_##xy, 15); \
439 + ref += stride; \
440 + dest += stride; \
441 + } while (--height); \
442 +} \
443 +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \
444 + const int stride, int height) \
445 +{ \
446 + do { \
447 + op (predict_##xy, 0); \
448 + op (predict_##xy, 1); \
449 + op (predict_##xy, 2); \
450 + op (predict_##xy, 3); \
451 + op (predict_##xy, 4); \
452 + op (predict_##xy, 5); \
453 + op (predict_##xy, 6); \
454 + op (predict_##xy, 7); \
455 + ref += stride; \
456 + dest += stride; \
457 + } while (--height); \
458 +} \
459 +/* definitions of the actual mc functions */
461 +MC_FUNC (put,o)
462 +MC_FUNC (avg,o)
463 +MC_FUNC (put,x)
464 +MC_FUNC (avg,x)
465 +MC_FUNC (put,y)
466 +MC_FUNC (avg,y)
467 +MC_FUNC (put,xy)
468 +MC_FUNC (avg,xy)
471 +extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref,
472 + int stride, int height);
474 +extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref,
475 + int stride, int height);
478 +static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref,
479 + int stride, int height)
481 + MC_put_y_16_c(dest, ref, stride, height);
484 +static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref,
485 + int stride, int height)
487 + MC_put_xy_16_c(dest, ref, stride, height);
490 +extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref,
491 + int stride, int height);
493 +extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref,
494 + int stride, int height);
496 +static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref,
497 + int stride, int height)
499 + MC_put_y_8_c(dest, ref, stride, height);
502 +static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref,
503 + int stride, int height)
505 + MC_put_xy_8_c(dest, ref, stride, height);
508 +static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref,
509 + int stride, int height)
511 + MC_avg_o_16_c(dest, ref, stride, height);
514 +static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref,
515 + int stride, int height)
517 + MC_avg_x_16_c(dest, ref, stride, height);
520 +static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref,
521 + int stride, int height)
523 + MC_avg_y_16_c(dest, ref, stride, height);
526 +static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref,
527 + int stride, int height)
529 + MC_avg_xy_16_c(dest, ref, stride, height);
532 +static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref,
533 + int stride, int height)
535 + MC_avg_o_8_c(dest, ref, stride, height);
538 +static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref,
539 + int stride, int height)
541 + MC_avg_x_8_c(dest, ref, stride, height);
544 +static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref,
545 + int stride, int height)
547 + MC_avg_y_8_c(dest, ref, stride, height);
550 +static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref,
551 + int stride, int height)
553 + MC_avg_xy_8_c(dest, ref, stride, height);
556 +MPEG2_MC_EXTERN (arm)
558 +#endif
559 Index: libmpeg2/motion_comp_arm_s.S
560 ===================================================================
561 --- libmpeg2/motion_comp_arm_s.S (revision 0)
562 +++ libmpeg2/motion_comp_arm_s.S (revision 0)
563 @@ -0,0 +1,322 @@
564 +@ motion_comp_arm_s.S
565 +@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
567 +@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
568 +@ See http://libmpeg2.sourceforge.net/ for updates.
570 +@ mpeg2dec is free software; you can redistribute it and/or modify
571 +@ it under the terms of the GNU General Public License as published by
572 +@ the Free Software Foundation; either version 2 of the License, or
573 +@ (at your option) any later version.
575 +@ mpeg2dec is distributed in the hope that it will be useful,
576 +@ but WITHOUT ANY WARRANTY; without even the implied warranty of
577 +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
578 +@ GNU General Public License for more details.
580 +@ You should have received a copy of the GNU General Public License
581 +@ along with this program; if not, write to the Free Software
582 +@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
584 + .text
586 +@ ----------------------------------------------------------------
587 + .align
588 + .global MC_put_o_16_arm
589 +MC_put_o_16_arm:
590 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
591 + pld [r1]
592 + stmfd sp!, {r4-r11, lr} @ R14 is also called LR
593 + and r4, r1, #3
594 + adr r5, MC_put_o_16_arm_align_jt
595 + add r5, r5, r4, lsl #2
596 + ldr pc, [r5]
598 +MC_put_o_16_arm_align0:
599 + ldmia r1, {r4-r7}
600 + add r1, r1, r2
601 + pld [r1]
602 + stmia r0, {r4-r7}
603 + subs r3, r3, #1
604 + add r0, r0, r2
605 + bne MC_put_o_16_arm_align0
606 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
608 +.macro PROC shift
609 + ldmia r1, {r4-r8}
610 + add r1, r1, r2
611 + mov r9, r4, lsr #(\shift)
612 + pld [r1]
613 + mov r10, r5, lsr #(\shift)
614 + orr r9, r9, r5, lsl #(32-\shift)
615 + mov r11, r6, lsr #(\shift)
616 + orr r10, r10, r6, lsl #(32-\shift)
617 + mov r12, r7, lsr #(\shift)
618 + orr r11, r11, r7, lsl #(32-\shift)
619 + orr r12, r12, r8, lsl #(32-\shift)
620 + stmia r0, {r9-r12}
621 + subs r3, r3, #1
622 + add r0, r0, r2
623 +.endm
625 +MC_put_o_16_arm_align1:
626 + and r1, r1, #0xFFFFFFFC
627 +1: PROC(8)
628 + bne 1b
629 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
630 +MC_put_o_16_arm_align2:
631 + and r1, r1, #0xFFFFFFFC
632 +1: PROC(16)
633 + bne 1b
634 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
635 +MC_put_o_16_arm_align3:
636 + and r1, r1, #0xFFFFFFFC
637 +1: PROC(24)
638 + bne 1b
639 + ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
640 +MC_put_o_16_arm_align_jt:
641 + .word MC_put_o_16_arm_align0
642 + .word MC_put_o_16_arm_align1
643 + .word MC_put_o_16_arm_align2
644 + .word MC_put_o_16_arm_align3
646 +@ ----------------------------------------------------------------
647 + .align
648 + .global MC_put_o_8_arm
649 +MC_put_o_8_arm:
650 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
651 + pld [r1]
652 + stmfd sp!, {r4-r10, lr} @ R14 is also called LR
653 + and r4, r1, #3
654 + adr r5, MC_put_o_8_arm_align_jt
655 + add r5, r5, r4, lsl #2
656 + ldr pc, [r5]
657 +MC_put_o_8_arm_align0:
658 + ldmia r1, {r4-r5}
659 + add r1, r1, r2
660 + pld [r1]
661 + stmia r0, {r4-r5}
662 + add r0, r0, r2
663 + subs r3, r3, #1
664 + bne MC_put_o_8_arm_align0
665 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
667 +.macro PROC8 shift
668 + ldmia r1, {r4-r6}
669 + add r1, r1, r2
670 + mov r9, r4, lsr #(\shift)
671 + pld [r1]
672 + mov r10, r5, lsr #(\shift)
673 + orr r9, r9, r5, lsl #(32-\shift)
674 + orr r10, r10, r6, lsl #(32-\shift)
675 + stmia r0, {r9-r10}
676 + subs r3, r3, #1
677 + add r0, r0, r2
678 +.endm
680 +MC_put_o_8_arm_align1:
681 + and r1, r1, #0xFFFFFFFC
682 +1: PROC8(8)
683 + bne 1b
684 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
686 +MC_put_o_8_arm_align2:
687 + and r1, r1, #0xFFFFFFFC
688 +1: PROC8(16)
689 + bne 1b
690 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
692 +MC_put_o_8_arm_align3:
693 + and r1, r1, #0xFFFFFFFC
694 +1: PROC8(24)
695 + bne 1b
696 + ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
698 +MC_put_o_8_arm_align_jt:
699 + .word MC_put_o_8_arm_align0
700 + .word MC_put_o_8_arm_align1
701 + .word MC_put_o_8_arm_align2
702 + .word MC_put_o_8_arm_align3
704 +@ ----------------------------------------------------------------
705 +.macro AVG_PW rW1, rW2
706 + mov \rW2, \rW2, lsl #24
707 + orr \rW2, \rW2, \rW1, lsr #8
708 + eor r9, \rW1, \rW2
709 + and \rW2, \rW1, \rW2
710 + and r10, r9, r12
711 + add \rW2, \rW2, r10, lsr #1
712 + and r10, r9, r11
713 + add \rW2, \rW2, r10
714 +.endm
716 + .align
717 + .global MC_put_x_16_arm
718 +MC_put_x_16_arm:
719 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
720 + pld [r1]
721 + stmfd sp!, {r4-r11,lr} @ R14 is also called LR
722 + and r4, r1, #3
723 + adr r5, MC_put_x_16_arm_align_jt
724 + ldr r11, [r5]
725 + mvn r12, r11
726 + add r5, r5, r4, lsl #2
727 + ldr pc, [r5, #4]
729 +.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
730 + mov \R0, \R0, lsr #(\shift)
731 + orr \R0, \R0, \R1, lsl #(32 - \shift)
732 + mov \R1, \R1, lsr #(\shift)
733 + orr \R1, \R1, \R2, lsl #(32 - \shift)
734 + mov \R2, \R2, lsr #(\shift)
735 + orr \R2, \R2, \R3, lsl #(32 - \shift)
736 + mov \R3, \R3, lsr #(\shift)
737 + orr \R3, \R3, \R4, lsl #(32 - \shift)
738 + mov \R4, \R4, lsr #(\shift)
739 +@ and \R4, \R4, #0xFF
740 +.endm
742 +MC_put_x_16_arm_align0:
743 + ldmia r1, {r4-r8}
744 + add r1, r1, r2
745 + pld [r1]
746 + AVG_PW r7, r8
747 + AVG_PW r6, r7
748 + AVG_PW r5, r6
749 + AVG_PW r4, r5
750 + stmia r0, {r5-r8}
751 + subs r3, r3, #1
752 + add r0, r0, r2
753 + bne MC_put_x_16_arm_align0
754 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
755 +MC_put_x_16_arm_align1:
756 + and r1, r1, #0xFFFFFFFC
757 +1: ldmia r1, {r4-r8}
758 + add r1, r1, r2
759 + pld [r1]
760 + ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8
761 + AVG_PW r7, r8
762 + AVG_PW r6, r7
763 + AVG_PW r5, r6
764 + AVG_PW r4, r5
765 + stmia r0, {r5-r8}
766 + subs r3, r3, #1
767 + add r0, r0, r2
768 + bne 1b
769 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
770 +MC_put_x_16_arm_align2:
771 + and r1, r1, #0xFFFFFFFC
772 +1: ldmia r1, {r4-r8}
773 + add r1, r1, r2
774 + pld [r1]
775 + ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8
776 + AVG_PW r7, r8
777 + AVG_PW r6, r7
778 + AVG_PW r5, r6
779 + AVG_PW r4, r5
780 + stmia r0, {r5-r8}
781 + subs r3, r3, #1
782 + add r0, r0, r2
783 + bne 1b
784 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
785 +MC_put_x_16_arm_align3:
786 + and r1, r1, #0xFFFFFFFC
787 +1: ldmia r1, {r4-r8}
788 + add r1, r1, r2
789 + pld [r1]
790 + ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8
791 + AVG_PW r7, r8
792 + AVG_PW r6, r7
793 + AVG_PW r5, r6
794 + AVG_PW r4, r5
795 + stmia r0, {r5-r8}
796 + subs r3, r3, #1
797 + add r0, r0, r2
798 + bne 1b
799 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
800 +MC_put_x_16_arm_align_jt:
801 + .word 0x01010101
802 + .word MC_put_x_16_arm_align0
803 + .word MC_put_x_16_arm_align1
804 + .word MC_put_x_16_arm_align2
805 + .word MC_put_x_16_arm_align3
807 +@ ----------------------------------------------------------------
808 + .align
809 + .global MC_put_x_8_arm
810 +MC_put_x_8_arm:
811 + @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
812 + pld [r1]
813 + stmfd sp!, {r4-r11,lr} @ R14 is also called LR
814 + and r4, r1, #3
815 + adr r5, MC_put_x_8_arm_align_jt
816 + ldr r11, [r5]
817 + mvn r12, r11
818 + add r5, r5, r4, lsl #2
819 + ldr pc, [r5, #4]
821 +.macro ADJ_ALIGN_DW shift, R0, R1, R2
822 + mov \R0, \R0, lsr #(\shift)
823 + orr \R0, \R0, \R1, lsl #(32 - \shift)
824 + mov \R1, \R1, lsr #(\shift)
825 + orr \R1, \R1, \R2, lsl #(32 - \shift)
826 + mov \R2, \R2, lsr #(\shift)
827 +@ and \R4, \R4, #0xFF
828 +.endm
830 +MC_put_x_8_arm_align0:
831 + ldmia r1, {r4-r6}
832 + add r1, r1, r2
833 + pld [r1]
834 + AVG_PW r5, r6
835 + AVG_PW r4, r5
836 + stmia r0, {r5-r6}
837 + subs r3, r3, #1
838 + add r0, r0, r2
839 + bne MC_put_x_8_arm_align0
840 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
841 +MC_put_x_8_arm_align1:
842 + and r1, r1, #0xFFFFFFFC
843 +1: ldmia r1, {r4-r6}
844 + add r1, r1, r2
845 + pld [r1]
846 + ADJ_ALIGN_DW 8, r4, r5, r6
847 + AVG_PW r5, r6
848 + AVG_PW r4, r5
849 + stmia r0, {r5-r6}
850 + subs r3, r3, #1
851 + add r0, r0, r2
852 + bne 1b
853 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
854 +MC_put_x_8_arm_align2:
855 + and r1, r1, #0xFFFFFFFC
856 +1: ldmia r1, {r4-r6}
857 + add r1, r1, r2
858 + pld [r1]
859 + ADJ_ALIGN_DW 16, r4, r5, r6
860 + AVG_PW r5, r6
861 + AVG_PW r4, r5
862 + stmia r0, {r5-r6}
863 + subs r3, r3, #1
864 + add r0, r0, r2
865 + bne 1b
866 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
867 +MC_put_x_8_arm_align3:
868 + and r1, r1, #0xFFFFFFFC
869 +1: ldmia r1, {r4-r6}
870 + add r1, r1, r2
871 + pld [r1]
872 + ADJ_ALIGN_DW 24, r4, r5, r6
873 + AVG_PW r5, r6
874 + AVG_PW r4, r5
875 + stmia r0, {r5-r6}
876 + subs r3, r3, #1
877 + add r0, r0, r2
878 + bne 1b
879 + ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
880 +MC_put_x_8_arm_align_jt:
881 + .word 0x01010101
882 + .word MC_put_x_8_arm_align0
883 + .word MC_put_x_8_arm_align1
884 + .word MC_put_x_8_arm_align2
885 + .word MC_put_x_8_arm_align3
886 Index: libmpeg2/motion_comp_iwmmxt.c
887 ===================================================================
888 --- libmpeg2/motion_comp_iwmmxt.c (revision 0)
889 +++ libmpeg2/motion_comp_iwmmxt.c (revision 0)
890 @@ -0,0 +1,59 @@
892 + * motion_comp_iwmmxt.c
893 + * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
895 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
896 + * See http://libmpeg2.sourceforge.net/ for updates.
898 + * mpeg2dec is free software; you can redistribute it and/or modify
899 + * it under the terms of the GNU General Public License as published by
900 + * the Free Software Foundation; either version 2 of the License, or
901 + * (at your option) any later version.
903 + * mpeg2dec is distributed in the hope that it will be useful,
904 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
905 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
906 + * GNU General Public License for more details.
908 + * You should have received a copy of the GNU General Public License
909 + * along with this program; if not, write to the Free Software
910 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
911 + */
913 +#include "config.h"
915 +#if defined(ARCH_ARM) && defined(HAVE_IWMMXT)
917 +#include <inttypes.h>
919 +#include "mpeg2.h"
920 +#include "attributes.h"
921 +#include "mpeg2_internal.h"
923 +/* defined in libavcodec */
925 +extern void put_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
926 +extern void put_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
927 +extern void put_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
928 +extern void put_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
929 +extern void put_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
930 +extern void put_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
931 +extern void put_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
932 +extern void put_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
933 +extern void avg_pixels16_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
934 +extern void avg_pixels16_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
935 +extern void avg_pixels16_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
936 +extern void avg_pixels16_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
937 +extern void avg_pixels8_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
938 +extern void avg_pixels8_x2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
939 +extern void avg_pixels8_y2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
940 +extern void avg_pixels8_xy2_iwmmxt(uint8_t * dest, const uint8_t * ref, const int stride, int height);
942 +mpeg2_mc_t mpeg2_mc_iwmmxt = {
943 + {put_pixels16_iwmmxt, put_pixels16_x2_iwmmxt, put_pixels16_y2_iwmmxt, put_pixels16_xy2_iwmmxt,
944 + put_pixels8_iwmmxt, put_pixels8_x2_iwmmxt, put_pixels8_y2_iwmmxt, put_pixels8_xy2_iwmmxt}, \
945 + {avg_pixels16_iwmmxt, avg_pixels16_x2_iwmmxt, avg_pixels16_y2_iwmmxt, avg_pixels16_xy2_iwmmxt,
946 + avg_pixels8_iwmmxt, avg_pixels8_x2_iwmmxt, avg_pixels8_y2_iwmmxt, avg_pixels8_xy2_iwmmxt}, \
949 +#endif /* defined(ARCH_ARM) && defined(HAVE_IWMMXT) */