3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Modified for use with MPlayer, see libmpeg2_changes.diff for the exact changes.
24 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
34 #include "attributes.h"
35 #include "mpeg2_internal.h"
37 #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
38 #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
39 #define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
40 #define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
41 #define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
42 #define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */
44 /* idct main entry point */
45 void (* mpeg2_idct_copy
) (int16_t * block
, uint8_t * dest
, int stride
);
46 void (* mpeg2_idct_add
) (int last
, int16_t * block
,
47 uint8_t * dest
, int stride
);
50 * In legal streams, the IDCT output should be between -384 and +384.
51 * In corrupted streams, it is possible to force the IDCT output to go
52 * to +-3826 - this is the worst case for a column IDCT where the
53 * column inputs are 16-bit values.
55 uint8_t mpeg2_clip
[3840 * 2 + 256];
56 #define CLIP(i) ((mpeg2_clip + 3840)[i])
59 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
61 t0 = W0 * d0 + W1 * d1; \
62 t1 = W0 * d1 - W1 * d0; \
65 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
67 int tmp = W0 * (d0 + d1); \
68 t0 = tmp + (W1 - W0) * d1; \
69 t1 = tmp - (W1 + W0) * d0; \
73 static inline void idct_row (int16_t * const block
)
76 int a0
, a1
, a2
, a3
, b0
, b1
, b2
, b3
;
80 if (likely (!(block
[1] | ((int32_t *)block
)[1] | ((int32_t *)block
)[2] |
81 ((int32_t *)block
)[3]))) {
82 uint32_t tmp
= (uint16_t) (block
[0] >> 1);
84 ((int32_t *)block
)[0] = tmp
;
85 ((int32_t *)block
)[1] = tmp
;
86 ((int32_t *)block
)[2] = tmp
;
87 ((int32_t *)block
)[3] = tmp
;
91 d0
= (block
[0] << 11) + 2048;
97 BUTTERFLY (t2
, t3
, W6
, W2
, d3
, d1
);
107 BUTTERFLY (t0
, t1
, W7
, W1
, d3
, d0
);
108 BUTTERFLY (t2
, t3
, W3
, W5
, d1
, d2
);
113 b1
= ((t0
+ t1
) >> 8) * 181;
114 b2
= ((t0
- t1
) >> 8) * 181;
116 block
[0] = (a0
+ b0
) >> 12;
117 block
[1] = (a1
+ b1
) >> 12;
118 block
[2] = (a2
+ b2
) >> 12;
119 block
[3] = (a3
+ b3
) >> 12;
120 block
[4] = (a3
- b3
) >> 12;
121 block
[5] = (a2
- b2
) >> 12;
122 block
[6] = (a1
- b1
) >> 12;
123 block
[7] = (a0
- b0
) >> 12;
126 static inline void idct_col (int16_t * const block
)
129 int a0
, a1
, a2
, a3
, b0
, b1
, b2
, b3
;
132 d0
= (block
[8*0] << 11) + 65536;
134 d2
= block
[8*2] << 11;
138 BUTTERFLY (t2
, t3
, W6
, W2
, d3
, d1
);
148 BUTTERFLY (t0
, t1
, W7
, W1
, d3
, d0
);
149 BUTTERFLY (t2
, t3
, W3
, W5
, d1
, d2
);
154 b1
= ((t0
+ t1
) >> 8) * 181;
155 b2
= ((t0
- t1
) >> 8) * 181;
157 block
[8*0] = (a0
+ b0
) >> 17;
158 block
[8*1] = (a1
+ b1
) >> 17;
159 block
[8*2] = (a2
+ b2
) >> 17;
160 block
[8*3] = (a3
+ b3
) >> 17;
161 block
[8*4] = (a3
- b3
) >> 17;
162 block
[8*5] = (a2
- b2
) >> 17;
163 block
[8*6] = (a1
- b1
) >> 17;
164 block
[8*7] = (a0
- b0
) >> 17;
167 static void mpeg2_idct_copy_c (int16_t * block
, uint8_t * dest
,
172 for (i
= 0; i
< 8; i
++)
173 idct_row (block
+ 8 * i
);
174 for (i
= 0; i
< 8; i
++)
175 idct_col (block
+ i
);
177 dest
[0] = CLIP (block
[0]);
178 dest
[1] = CLIP (block
[1]);
179 dest
[2] = CLIP (block
[2]);
180 dest
[3] = CLIP (block
[3]);
181 dest
[4] = CLIP (block
[4]);
182 dest
[5] = CLIP (block
[5]);
183 dest
[6] = CLIP (block
[6]);
184 dest
[7] = CLIP (block
[7]);
186 ((int32_t *)block
)[0] = 0; ((int32_t *)block
)[1] = 0;
187 ((int32_t *)block
)[2] = 0; ((int32_t *)block
)[3] = 0;
194 static void mpeg2_idct_add_c (const int last
, int16_t * block
,
195 uint8_t * dest
, const int stride
)
199 if (last
!= 129 || (block
[0] & (7 << 4)) == (4 << 4)) {
200 for (i
= 0; i
< 8; i
++)
201 idct_row (block
+ 8 * i
);
202 for (i
= 0; i
< 8; i
++)
203 idct_col (block
+ i
);
205 dest
[0] = CLIP (block
[0] + dest
[0]);
206 dest
[1] = CLIP (block
[1] + dest
[1]);
207 dest
[2] = CLIP (block
[2] + dest
[2]);
208 dest
[3] = CLIP (block
[3] + dest
[3]);
209 dest
[4] = CLIP (block
[4] + dest
[4]);
210 dest
[5] = CLIP (block
[5] + dest
[5]);
211 dest
[6] = CLIP (block
[6] + dest
[6]);
212 dest
[7] = CLIP (block
[7] + dest
[7]);
214 ((int32_t *)block
)[0] = 0; ((int32_t *)block
)[1] = 0;
215 ((int32_t *)block
)[2] = 0; ((int32_t *)block
)[3] = 0;
223 DC
= (block
[0] + 64) >> 7;
224 block
[0] = block
[63] = 0;
227 dest
[0] = CLIP (DC
+ dest
[0]);
228 dest
[1] = CLIP (DC
+ dest
[1]);
229 dest
[2] = CLIP (DC
+ dest
[2]);
230 dest
[3] = CLIP (DC
+ dest
[3]);
231 dest
[4] = CLIP (DC
+ dest
[4]);
232 dest
[5] = CLIP (DC
+ dest
[5]);
233 dest
[6] = CLIP (DC
+ dest
[6]);
234 dest
[7] = CLIP (DC
+ dest
[7]);
240 void mpeg2_idct_init (uint32_t accel
)
243 if (accel
& MPEG2_ACCEL_X86_SSE2
) {
244 mpeg2_idct_copy
= mpeg2_idct_copy_sse2
;
245 mpeg2_idct_add
= mpeg2_idct_add_sse2
;
246 mpeg2_idct_mmx_init ();
249 if (accel
& MPEG2_ACCEL_X86_MMXEXT
) {
250 mpeg2_idct_copy
= mpeg2_idct_copy_mmxext
;
251 mpeg2_idct_add
= mpeg2_idct_add_mmxext
;
252 mpeg2_idct_mmx_init ();
255 if (accel
& MPEG2_ACCEL_X86_MMX
) {
256 mpeg2_idct_copy
= mpeg2_idct_copy_mmx
;
257 mpeg2_idct_add
= mpeg2_idct_add_mmx
;
258 mpeg2_idct_mmx_init ();
262 if (accel
& MPEG2_ACCEL_PPC_ALTIVEC
) {
263 mpeg2_idct_copy
= mpeg2_idct_copy_altivec
;
264 mpeg2_idct_add
= mpeg2_idct_add_altivec
;
265 mpeg2_idct_altivec_init ();
269 if (accel
& MPEG2_ACCEL_ALPHA_MVI
) {
270 mpeg2_idct_copy
= mpeg2_idct_copy_mvi
;
271 mpeg2_idct_add
= mpeg2_idct_add_mvi
;
272 mpeg2_idct_alpha_init ();
275 if (accel
& MPEG2_ACCEL_ALPHA
) {
278 mpeg2_idct_copy
= mpeg2_idct_copy_alpha
;
279 mpeg2_idct_add
= mpeg2_idct_add_alpha
;
280 mpeg2_idct_alpha_init ();
281 for (i
= -3840; i
< 3840 + 256; i
++)
282 CLIP(i
) = (i
< 0) ? 0 : ((i
> 255) ? 255 : i
);
288 mpeg2_idct_copy
= mpeg2_idct_copy_c
;
289 mpeg2_idct_add
= mpeg2_idct_add_c
;
290 for (i
= -3840; i
< 3840 + 256; i
++)
291 CLIP(i
) = (i
< 0) ? 0 : ((i
> 255) ? 255 : i
);
292 for (i
= 0; i
< 64; i
++) {
293 j
= mpeg2_scan_norm
[i
];
294 mpeg2_scan_norm
[i
] = ((j
& 0x36) >> 1) | ((j
& 0x09) << 2);
295 j
= mpeg2_scan_alt
[i
];
296 mpeg2_scan_alt
[i
] = ((j
& 0x36) >> 1) | ((j
& 0x09) << 2);