3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
7 * See http://libmpeg2.sourceforge.net/ for updates.
9 * mpeg2dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * mpeg2dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "mpeg2dec_config.h"
29 #include "attributes.h"
30 #include "mpeg2_internal.h"
32 #define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
33 #define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
34 #define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
35 #define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
36 #define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
37 #define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */
39 /* idct main entry point */
40 void (* mpeg2_idct_copy
) (int16_t * block
, uint8_t * dest
, int stride
);
41 void (* mpeg2_idct_add
) (int last
, int16_t * block
,
42 uint8_t * dest
, int stride
);
45 * In legal streams, the IDCT output should be between -384 and +384.
46 * In corrupted streams, it is possible to force the IDCT output to go
47 * to +-3826 - this is the worst case for a column IDCT where the
48 * column inputs are 16-bit values.
51 static inline unsigned CLIP(int value
)
53 asm ( /* Note: Uses knowledge that only the low byte of the result is used */
54 "cmp.l #255,%[v] \n" /* overflow? */
55 "bls.b 1f \n" /* no: return value */
56 "spl.b %[v] \n" /* yes: set low byte to appropriate boundary */
64 static inline unsigned CLIP(int value
)
66 asm volatile ( /* Note: Uses knowledge that only the low byte of the result is used */
68 "mvnhi %[v], %[v], asr #31 \n"
75 uint8_t mpeg2_clip
[3840 * 2 + 256] IBSS_ATTR
;
76 #define CLIP(i) ((mpeg2_clip + 3840)[i])
80 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
82 t0 = W0 * d0 + W1 * d1; \
83 t1 = W0 * d1 - W1 * d0; \
86 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
88 int tmp = W0 * (d0 + d1); \
89 t0 = tmp + (W1 - W0) * d1; \
90 t1 = tmp - (W1 + W0) * d0; \
94 static inline void idct_row (int16_t * const block
)
97 int a0
, a1
, a2
, a3
, b0
, b1
, b2
, b3
;
101 if (likely (!(block
[1] | ((int32_t *)block
)[1] | ((int32_t *)block
)[2] |
102 ((int32_t *)block
)[3]))) {
103 uint32_t tmp
= (uint16_t) (block
[0] >> 1);
105 ((int32_t *)block
)[0] = tmp
;
106 ((int32_t *)block
)[1] = tmp
;
107 ((int32_t *)block
)[2] = tmp
;
108 ((int32_t *)block
)[3] = tmp
;
112 d0
= (block
[0] << 11) + 2048;
118 BUTTERFLY (t2
, t3
, W6
, W2
, d3
, d1
);
128 BUTTERFLY (t0
, t1
, W7
, W1
, d3
, d0
);
129 BUTTERFLY (t2
, t3
, W3
, W5
, d1
, d2
);
134 b1
= ((t0
+ t1
) >> 8) * 181;
135 b2
= ((t0
- t1
) >> 8) * 181;
137 block
[0] = (a0
+ b0
) >> 12;
138 block
[1] = (a1
+ b1
) >> 12;
139 block
[2] = (a2
+ b2
) >> 12;
140 block
[3] = (a3
+ b3
) >> 12;
141 block
[4] = (a3
- b3
) >> 12;
142 block
[5] = (a2
- b2
) >> 12;
143 block
[6] = (a1
- b1
) >> 12;
144 block
[7] = (a0
- b0
) >> 12;
147 static inline void idct_col (int16_t * const block
)
150 int a0
, a1
, a2
, a3
, b0
, b1
, b2
, b3
;
153 d0
= (block
[8*0] << 11) + 65536;
155 d2
= block
[8*2] << 11;
159 BUTTERFLY (t2
, t3
, W6
, W2
, d3
, d1
);
169 BUTTERFLY (t0
, t1
, W7
, W1
, d3
, d0
);
170 BUTTERFLY (t2
, t3
, W3
, W5
, d1
, d2
);
175 b1
= ((t0
+ t1
) >> 8) * 181;
176 b2
= ((t0
- t1
) >> 8) * 181;
178 block
[8*0] = (a0
+ b0
) >> 17;
179 block
[8*1] = (a1
+ b1
) >> 17;
180 block
[8*2] = (a2
+ b2
) >> 17;
181 block
[8*3] = (a3
+ b3
) >> 17;
182 block
[8*4] = (a3
- b3
) >> 17;
183 block
[8*5] = (a2
- b2
) >> 17;
184 block
[8*6] = (a1
- b1
) >> 17;
185 block
[8*7] = (a0
- b0
) >> 17;
188 static void mpeg2_idct_copy_c (int16_t * block
, uint8_t * dest
,
193 for (i
= 0; i
< 8; i
++)
194 idct_row (block
+ 8 * i
);
195 for (i
= 0; i
< 8; i
++)
196 idct_col (block
+ i
);
198 dest
[0] = CLIP (block
[0]);
199 dest
[1] = CLIP (block
[1]);
200 dest
[2] = CLIP (block
[2]);
201 dest
[3] = CLIP (block
[3]);
202 dest
[4] = CLIP (block
[4]);
203 dest
[5] = CLIP (block
[5]);
204 dest
[6] = CLIP (block
[6]);
205 dest
[7] = CLIP (block
[7]);
207 ((int32_t *)block
)[0] = 0; ((int32_t *)block
)[1] = 0;
208 ((int32_t *)block
)[2] = 0; ((int32_t *)block
)[3] = 0;
215 static void mpeg2_idct_add_c (const int last
, int16_t * block
,
216 uint8_t * dest
, const int stride
)
220 if (last
!= 129 || (block
[0] & (7 << 4)) == (4 << 4)) {
221 for (i
= 0; i
< 8; i
++)
222 idct_row (block
+ 8 * i
);
223 for (i
= 0; i
< 8; i
++)
224 idct_col (block
+ i
);
226 dest
[0] = CLIP (block
[0] + dest
[0]);
227 dest
[1] = CLIP (block
[1] + dest
[1]);
228 dest
[2] = CLIP (block
[2] + dest
[2]);
229 dest
[3] = CLIP (block
[3] + dest
[3]);
230 dest
[4] = CLIP (block
[4] + dest
[4]);
231 dest
[5] = CLIP (block
[5] + dest
[5]);
232 dest
[6] = CLIP (block
[6] + dest
[6]);
233 dest
[7] = CLIP (block
[7] + dest
[7]);
235 ((int32_t *)block
)[0] = 0; ((int32_t *)block
)[1] = 0;
236 ((int32_t *)block
)[2] = 0; ((int32_t *)block
)[3] = 0;
244 DC
= (block
[0] + 64) >> 7;
245 block
[0] = block
[63] = 0;
248 dest
[0] = CLIP (DC
+ dest
[0]);
249 dest
[1] = CLIP (DC
+ dest
[1]);
250 dest
[2] = CLIP (DC
+ dest
[2]);
251 dest
[3] = CLIP (DC
+ dest
[3]);
252 dest
[4] = CLIP (DC
+ dest
[4]);
253 dest
[5] = CLIP (DC
+ dest
[5]);
254 dest
[6] = CLIP (DC
+ dest
[6]);
255 dest
[7] = CLIP (DC
+ dest
[7]);
261 void mpeg2_idct_init (void)
263 extern uint8_t default_mpeg2_scan_norm
[64];
264 extern uint8_t default_mpeg2_scan_alt
[64];
265 extern uint8_t mpeg2_scan_norm
[64];
266 extern uint8_t mpeg2_scan_alt
[64];
269 mpeg2_idct_copy
= mpeg2_idct_copy_c
;
270 mpeg2_idct_add
= mpeg2_idct_add_c
;
272 #if !defined(CPU_COLDFIRE) && !defined(CPU_ARM)
273 for (i
= -3840; i
< 3840 + 256; i
++)
274 CLIP(i
) = (i
< 0) ? 0 : ((i
> 255) ? 255 : i
);
277 for (i
= 0; i
< 64; i
++)
279 j
= default_mpeg2_scan_norm
[i
];
280 mpeg2_scan_norm
[i
] = ((j
& 0x36) >> 1) | ((j
& 0x09) << 2);
282 j
= default_mpeg2_scan_alt
[i
];
283 mpeg2_scan_alt
[i
] = ((j
& 0x36) >> 1) | ((j
& 0x09) << 2);