r665: Merged the official release 2.0.
[cinelerra_cv.git] / quicktime / revdct.c
blob606b35cea73474ff8d69a58f87910486988eecd9
1 #include "sizes.h"
3 #ifdef RIGHT_SHIFT_IS_UNSIGNED
4 #define SHIFT_TEMPS long shift_temp;
5 #define RIGHT_SHIFT(x, shft) \
6 ((shift_temp = (x)) < 0 ? \
7 (shift_temp >> (shft)) | ((~((long) 0)) << (32-(shft))) : \
8 (shift_temp >> (shft)))
9 #else
10 #define SHIFT_TEMPS
11 #define RIGHT_SHIFT(x, shft) ((x) >> (shft))
12 #endif
14 #define MAXJSAMPLE 255
15 #define RANGE_MASK (MAXJSAMPLE * 4 + 3)
16 #define CONST_SCALE (ONE << CONST_BITS)
17 #define ONE ((long)1)
18 #define CONST_BITS 13
19 #define PASS1_BITS 2
20 #define DCTSIZE1 8
21 #define DCTSIZE2 64
22 #define MULTIPLY(var, const) ((var) * (const))
23 #define FIX(x) ((long)((x) * CONST_SCALE + 0.5))
24 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n) - 1)), n)
26 int quicktime_rev_dct(int16_t *data, unsigned char *outptr, unsigned char *rnglimit)
28 long tmp0, tmp1, tmp2, tmp3;
29 long tmp10, tmp11, tmp12, tmp13;
30 long z1, z2, z3, z4, z5;
31 long d0, d1, d2, d3, d4, d5, d6, d7;
32 register int16_t *dataptr;
33 int rowctr;
34 SHIFT_TEMPS
36 /* Pass 1: process rows. */
37 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
38 /* furthermore, we scale the results by 2**PASS1_BITS. */
40 dataptr = data;
42 for(rowctr = DCTSIZE1 - 1; rowctr >= 0; rowctr--)
44 /* Due to quantization, we will usually find that many of the input
45 * coefficients are zero, especially the AC terms. We can exploit this
46 * by short-circuiting the IDCT calculation for any row in which all
47 * the AC terms are zero. In that case each output is equal to the
48 * DC coefficient (with scale factor as needed).
49 * With typical images and quantization tables, half or more of the
50 * row DCT calculations can be simplified this way.
53 register int *idataptr = (int*)dataptr;
54 d0 = dataptr[0];
55 d1 = dataptr[1];
56 if((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0)
58 /* AC terms all zero */
59 if(d0)
61 /* Compute a 32 bit value to assign. */
62 int16_t dcval = (int16_t) (d0 << PASS1_BITS);
63 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
65 idataptr[0] = v;
66 idataptr[1] = v;
67 idataptr[2] = v;
68 idataptr[3] = v;
71 /* advance pointer to next row */
72 dataptr += DCTSIZE1;
73 continue;
75 d2 = dataptr[2];
76 d3 = dataptr[3];
77 d4 = dataptr[4];
78 d5 = dataptr[5];
79 d6 = dataptr[6];
80 d7 = dataptr[7];
82 /* Even part: reverse the even part of the forward DCT. */
83 /* The rotator is sqrt(2)*c(-6). */
84 if(d6)
86 if(d4)
88 if(d2)
90 if(d0)
92 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
93 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
94 tmp2 = z1 + MULTIPLY(d6, -FIX(1.847759065));
95 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
97 tmp0 = (d0 + d4) << CONST_BITS;
98 tmp1 = (d0 - d4) << CONST_BITS;
100 tmp10 = tmp0 + tmp3;
101 tmp13 = tmp0 - tmp3;
102 tmp11 = tmp1 + tmp2;
103 tmp12 = tmp1 - tmp2;
105 else
107 /* d*0 == 0, d2 != 0, d4 != 0, d6 != 0 */
108 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
109 tmp2 = z1 + MULTIPLY(d6, -FIX(1.847759065));
110 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
112 tmp0 = d4 << CONST_BITS;
114 tmp10 = tmp0 + tmp3;
115 tmp13 = tmp0 - tmp3;
116 tmp11 = tmp2 - tmp0;
117 tmp12 = -(tmp0 + tmp2);
120 else
122 if(d0)
124 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
125 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
126 tmp3 = MULTIPLY(d6, FIX(0.541196100));
128 tmp0 = (d0 + d4) << CONST_BITS;
129 tmp1 = (d0 - d4) << CONST_BITS;
131 tmp10 = tmp0 + tmp3;
132 tmp13 = tmp0 - tmp3;
133 tmp11 = tmp1 + tmp2;
134 tmp12 = tmp1 - tmp2;
136 else
138 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
139 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
140 tmp3 = MULTIPLY(d6, FIX(0.541196100));
142 tmp0 = d4 << CONST_BITS;
144 tmp10 = tmp0 + tmp3;
145 tmp13 = tmp0 - tmp3;
146 tmp11 = tmp2 - tmp0;
147 tmp12 = -(tmp0 + tmp2);
151 else
153 if (d2)
155 if (d0)
157 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
158 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
159 tmp2 = z1 + MULTIPLY(d6, -FIX(1.847759065));
160 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
162 tmp0 = d0 << CONST_BITS;
164 tmp10 = tmp0 + tmp3;
165 tmp13 = tmp0 - tmp3;
166 tmp11 = tmp0 + tmp2;
167 tmp12 = tmp0 - tmp2;
169 else
171 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
172 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
173 tmp2 = z1 + MULTIPLY(d6, -FIX(1.847759065));
174 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
176 tmp10 = tmp3;
177 tmp13 = -tmp3;
178 tmp11 = tmp2;
179 tmp12 = -tmp2;
182 else
184 if(d0)
186 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
187 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
188 tmp3 = MULTIPLY(d6, FIX(0.541196100));
190 tmp0 = d0 << CONST_BITS;
192 tmp10 = tmp0 + tmp3;
193 tmp13 = tmp0 - tmp3;
194 tmp11 = tmp0 + tmp2;
195 tmp12 = tmp0 - tmp2;
197 else
199 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
200 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
201 tmp3 = MULTIPLY(d6, FIX(0.541196100));
203 tmp10 = tmp3;
204 tmp13 = -tmp3;
205 tmp11 = tmp2;
206 tmp12 = -tmp2;
211 else
213 if(d4)
215 if(d2)
217 if(d0)
219 /* d*0 != 0, d2 != 0, d4 != 0, d6 == 0 */
220 tmp2 = MULTIPLY(d2, FIX(0.541196100));
221 tmp3 = MULTIPLY(d2, FIX(1.306562965));
223 tmp0 = (d0 + d4) << CONST_BITS;
224 tmp1 = (d0 - d4) << CONST_BITS;
226 tmp10 = tmp0 + tmp3;
227 tmp13 = tmp0 - tmp3;
228 tmp11 = tmp1 + tmp2;
229 tmp12 = tmp1 - tmp2;
231 else
233 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
234 tmp2 = MULTIPLY(d2, FIX(0.541196100));
235 tmp3 = MULTIPLY(d2, FIX(1.306562965));
237 tmp0 = d4 << CONST_BITS;
239 tmp10 = tmp0 + tmp3;
240 tmp13 = tmp0 - tmp3;
241 tmp11 = tmp2 - tmp0;
242 tmp12 = -(tmp0 + tmp2);
245 else
247 if(d0)
249 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
250 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
251 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
253 else
255 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
256 tmp10 = tmp13 = d4 << CONST_BITS;
257 tmp11 = tmp12 = -tmp10;
261 else
263 if(d2)
265 if(d0)
267 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
268 tmp2 = MULTIPLY(d2, FIX(0.541196100));
269 tmp3 = MULTIPLY(d2, FIX(1.306562965));
271 tmp0 = d0 << CONST_BITS;
273 tmp10 = tmp0 + tmp3;
274 tmp13 = tmp0 - tmp3;
275 tmp11 = tmp0 + tmp2;
276 tmp12 = tmp0 - tmp2;
278 else
280 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
281 tmp2 = MULTIPLY(d2, FIX(0.541196100));
282 tmp3 = MULTIPLY(d2, FIX(1.306562965));
284 tmp10 = tmp3;
285 tmp13 = -tmp3;
286 tmp11 = tmp2;
287 tmp12 = -tmp2;
290 else
292 if(d0)
294 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
295 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
297 else
299 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
300 tmp10 = tmp13 = tmp11 = tmp12 = 0;
307 /* Odd part per figure 8; the matrix is unitary and hence its
308 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
311 if(d7)
313 if(d5)
315 if(d3)
317 if(d1)
319 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
320 z1 = d7 + d1;
321 z2 = d5 + d3;
322 z3 = d7 + d3;
323 z4 = d5 + d1;
324 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
326 tmp0 = MULTIPLY(d7, FIX(0.298631336));
327 tmp1 = MULTIPLY(d5, FIX(2.053119869));
328 tmp2 = MULTIPLY(d3, FIX(3.072711026));
329 tmp3 = MULTIPLY(d1, FIX(1.501321110));
330 z1 = MULTIPLY(z1, -FIX(0.899976223));
331 z2 = MULTIPLY(z2, -FIX(2.562915447));
332 z3 = MULTIPLY(z3, -FIX(1.961570560));
333 z4 = MULTIPLY(z4, -FIX(0.390180644));
335 z3 += z5;
336 z4 += z5;
338 tmp0 += z1 + z3;
339 tmp1 += z2 + z4;
340 tmp2 += z2 + z3;
341 tmp3 += z1 + z4;
343 else
345 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
346 z1 = d7;
347 z2 = d5 + d3;
348 z3 = d7 + d3;
349 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
351 tmp0 = MULTIPLY(d7, FIX(0.298631336));
352 tmp1 = MULTIPLY(d5, FIX(2.053119869));
353 tmp2 = MULTIPLY(d3, FIX(3.072711026));
354 z1 = MULTIPLY(d7, -FIX(0.899976223));
355 z2 = MULTIPLY(z2, -FIX(2.562915447));
356 z3 = MULTIPLY(z3, -FIX(1.961570560));
357 z4 = MULTIPLY(d5, -FIX(0.390180644));
359 z3 += z5;
360 z4 += z5;
362 tmp0 += z1 + z3;
363 tmp1 += z2 + z4;
364 tmp2 += z2 + z3;
365 tmp3 = z1 + z4;
368 else
370 if(d1)
372 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
373 z1 = d7 + d1;
374 z2 = d5;
375 z3 = d7;
376 z4 = d5 + d1;
377 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
379 tmp0 = MULTIPLY(d7, FIX(0.298631336));
380 tmp1 = MULTIPLY(d5, FIX(2.053119869));
381 tmp3 = MULTIPLY(d1, FIX(1.501321110));
382 z1 = MULTIPLY(z1, -FIX(0.899976223));
383 z2 = MULTIPLY(d5, -FIX(2.562915447));
384 z3 = MULTIPLY(d7, -FIX(1.961570560));
385 z4 = MULTIPLY(z4, -FIX(0.390180644));
387 z3 += z5;
388 z4 += z5;
390 tmp0 += z1 + z3;
391 tmp1 += z2 + z4;
392 tmp2 = z2 + z3;
393 tmp3 += z1 + z4;
395 else
397 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
398 tmp0 = MULTIPLY(d7, -FIX(0.601344887));
399 z1 = MULTIPLY(d7, -FIX(0.899976223));
400 z3 = MULTIPLY(d7, -FIX(1.961570560));
401 tmp1 = MULTIPLY(d5, -FIX(0.509795578));
402 z2 = MULTIPLY(d5, -FIX(2.562915447));
403 z4 = MULTIPLY(d5, -FIX(0.390180644));
404 z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
406 z3 += z5;
407 z4 += z5;
409 tmp0 += z3;
410 tmp1 += z4;
411 tmp2 = z2 + z3;
412 tmp3 = z1 + z4;
416 else
418 if(d3)
420 if(d1)
422 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
423 z1 = d7 + d1;
424 z3 = d7 + d3;
425 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
427 tmp0 = MULTIPLY(d7, FIX(0.298631336));
428 tmp2 = MULTIPLY(d3, FIX(3.072711026));
429 tmp3 = MULTIPLY(d1, FIX(1.501321110));
430 z1 = MULTIPLY(z1, -FIX(0.899976223));
431 z2 = MULTIPLY(d3, -FIX(2.562915447));
432 z3 = MULTIPLY(z3, -FIX(1.961570560));
433 z4 = MULTIPLY(d1, -FIX(0.390180644));
435 z3 += z5;
436 z4 += z5;
438 tmp0 += z1 + z3;
439 tmp1 = z2 + z4;
440 tmp2 += z2 + z3;
441 tmp3 += z1 + z4;
443 else
445 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
446 z3 = d7 + d3;
448 tmp0 = MULTIPLY(d7, -FIX(0.601344887));
449 z1 = MULTIPLY(d7, -FIX(0.899976223));
450 tmp2 = MULTIPLY(d3, FIX(0.509795579));
451 z2 = MULTIPLY(d3, -FIX(2.562915447));
452 z5 = MULTIPLY(z3, FIX(1.175875602));
453 z3 = MULTIPLY(z3, -FIX(0.785694958));
455 tmp0 += z3;
456 tmp1 = z2 + z5;
457 tmp2 += z3;
458 tmp3 = z1 + z5;
461 else
463 if(d1)
465 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
466 z1 = d7 + d1;
467 z5 = MULTIPLY(z1, FIX(1.175875602));
469 z1 = MULTIPLY(z1, FIX(0.275899379));
470 z3 = MULTIPLY(d7, -FIX(1.961570560));
471 tmp0 = MULTIPLY(d7, -FIX(1.662939224));
472 z4 = MULTIPLY(d1, -FIX(0.390180644));
473 tmp3 = MULTIPLY(d1, FIX(1.111140466));
475 tmp0 += z1;
476 tmp1 = z4 + z5;
477 tmp2 = z3 + z5;
478 tmp3 += z1;
480 else
482 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
483 tmp0 = MULTIPLY(d7, -FIX(1.387039845));
484 tmp1 = MULTIPLY(d7, FIX(1.175875602));
485 tmp2 = MULTIPLY(d7, -FIX(0.785694958));
486 tmp3 = MULTIPLY(d7, FIX(0.275899379));
491 else
493 if(d5)
495 if(d3)
497 if(d1)
499 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
500 z2 = d5 + d3;
501 z4 = d5 + d1;
502 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
504 tmp1 = MULTIPLY(d5, FIX(2.053119869));
505 tmp2 = MULTIPLY(d3, FIX(3.072711026));
506 tmp3 = MULTIPLY(d1, FIX(1.501321110));
507 z1 = MULTIPLY(d1, -FIX(0.899976223));
508 z2 = MULTIPLY(z2, -FIX(2.562915447));
509 z3 = MULTIPLY(d3, -FIX(1.961570560));
510 z4 = MULTIPLY(z4, -FIX(0.390180644));
512 z3 += z5;
513 z4 += z5;
515 tmp0 = z1 + z3;
516 tmp1 += z2 + z4;
517 tmp2 += z2 + z3;
518 tmp3 += z1 + z4;
520 else
522 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
523 z2 = d5 + d3;
525 z5 = MULTIPLY(z2, FIX(1.175875602));
526 tmp1 = MULTIPLY(d5, FIX(1.662939225));
527 z4 = MULTIPLY(d5, -FIX(0.390180644));
528 z2 = MULTIPLY(z2, -FIX(1.387039845));
529 tmp2 = MULTIPLY(d3, FIX(1.111140466));
530 z3 = MULTIPLY(d3, -FIX(1.961570560));
532 tmp0 = z3 + z5;
533 tmp1 += z2;
534 tmp2 += z2;
535 tmp3 = z4 + z5;
538 else
540 if(d1)
542 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
543 z4 = d5 + d1;
545 z5 = MULTIPLY(z4, FIX(1.175875602));
546 z1 = MULTIPLY(d1, -FIX(0.899976223));
547 tmp3 = MULTIPLY(d1, FIX(0.601344887));
548 tmp1 = MULTIPLY(d5, -FIX(0.509795578));
549 z2 = MULTIPLY(d5, -FIX(2.562915447));
550 z4 = MULTIPLY(z4, FIX(0.785694958));
552 tmp0 = z1 + z5;
553 tmp1 += z4;
554 tmp2 = z2 + z5;
555 tmp3 += z4;
557 else
559 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
560 tmp0 = MULTIPLY(d5, FIX(1.175875602));
561 tmp1 = MULTIPLY(d5, FIX(0.275899380));
562 tmp2 = MULTIPLY(d5, -FIX(1.387039845));
563 tmp3 = MULTIPLY(d5, FIX(0.785694958));
567 else
569 if(d3)
571 if(d1)
573 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
574 z5 = d1 + d3;
575 tmp3 = MULTIPLY(d1, FIX(0.211164243));
576 tmp2 = MULTIPLY(d3, -FIX(1.451774981));
577 z1 = MULTIPLY(d1, FIX(1.061594337));
578 z2 = MULTIPLY(d3, -FIX(2.172734803));
579 z4 = MULTIPLY(z5, FIX(0.785694958));
580 z5 = MULTIPLY(z5, FIX(1.175875602));
582 tmp0 = z1 - z4;
583 tmp1 = z2 + z4;
584 tmp2 += z5;
585 tmp3 += z5;
587 else
589 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
590 tmp0 = MULTIPLY(d3, -FIX(0.785694958));
591 tmp1 = MULTIPLY(d3, -FIX(1.387039845));
592 tmp2 = MULTIPLY(d3, -FIX(0.275899379));
593 tmp3 = MULTIPLY(d3, FIX(1.175875602));
596 else
598 if(d1)
600 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
601 tmp0 = MULTIPLY(d1, FIX(0.275899379));
602 tmp1 = MULTIPLY(d1, FIX(0.785694958));
603 tmp2 = MULTIPLY(d1, FIX(1.175875602));
604 tmp3 = MULTIPLY(d1, FIX(1.387039845));
606 else
608 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
609 tmp0 = tmp1 = tmp2 = tmp3 = 0;
615 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
617 dataptr[0] = (int16_t) DESCALE(tmp10 + tmp3, CONST_BITS - PASS1_BITS);
618 dataptr[7] = (int16_t) DESCALE(tmp10 - tmp3, CONST_BITS - PASS1_BITS);
619 dataptr[1] = (int16_t) DESCALE(tmp11 + tmp2, CONST_BITS - PASS1_BITS);
620 dataptr[6] = (int16_t) DESCALE(tmp11 - tmp2, CONST_BITS - PASS1_BITS);
621 dataptr[2] = (int16_t) DESCALE(tmp12 + tmp1, CONST_BITS - PASS1_BITS);
622 dataptr[5] = (int16_t) DESCALE(tmp12 - tmp1, CONST_BITS - PASS1_BITS);
623 dataptr[3] = (int16_t) DESCALE(tmp13 + tmp0, CONST_BITS - PASS1_BITS);
624 dataptr[4] = (int16_t) DESCALE(tmp13 - tmp0, CONST_BITS - PASS1_BITS);
626 dataptr += DCTSIZE1; /* advance pointer to next row */
629 /* Pass 2: process columns. */
630 /* Note that we must descale the results by a factor of 8 == 2**3, */
631 /* and also undo the PASS1_BITS scaling. */
633 dataptr = data;
634 for(rowctr = DCTSIZE1 - 1; rowctr >= 0; rowctr--)
636 /* Columns of zeroes can be exploited in the same way as we did with rows.
637 * However, the row calculation has created many nonzero AC terms, so the
638 * simplification applies less often (typically 5% to 10% of the time).
639 * On machines with very fast multiplication, it's possible that the
640 * test takes more time than it's worth. In that case this section
641 * may be commented out.
644 d0 = dataptr[DCTSIZE1 * 0];
645 d1 = dataptr[DCTSIZE1 * 1];
646 d2 = dataptr[DCTSIZE1 * 2];
647 d3 = dataptr[DCTSIZE1 * 3];
648 d4 = dataptr[DCTSIZE1 * 4];
649 d5 = dataptr[DCTSIZE1 * 5];
650 d6 = dataptr[DCTSIZE1 * 6];
651 d7 = dataptr[DCTSIZE1 * 7];
653 /* Even part: reverse the even part of the forward DCT. */
654 /* The rotator is sqrt(2)*c(-6). */
655 if(d6)
657 if(d4)
659 if(d2)
661 if(d0)
663 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
664 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
665 tmp2 = z1 + MULTIPLY(d6, -FIX(1.847759065));
666 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
668 tmp0 = (d0 + d4) << CONST_BITS;
669 tmp1 = (d0 - d4) << CONST_BITS;
671 tmp10 = tmp0 + tmp3;
672 tmp13 = tmp0 - tmp3;
673 tmp11 = tmp1 + tmp2;
674 tmp12 = tmp1 - tmp2;
675 } else {
676 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
677 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
678 tmp2 = z1 + MULTIPLY(d6, -FIX(1.847759065));
679 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
681 tmp0 = d4 << CONST_BITS;
683 tmp10 = tmp0 + tmp3;
684 tmp13 = tmp0 - tmp3;
685 tmp11 = tmp2 - tmp0;
686 tmp12 = -(tmp0 + tmp2);
689 else
691 if(d0)
693 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
694 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
695 tmp3 = MULTIPLY(d6, FIX(0.541196100));
697 tmp0 = (d0 + d4) << CONST_BITS;
698 tmp1 = (d0 - d4) << CONST_BITS;
700 tmp10 = tmp0 + tmp3;
701 tmp13 = tmp0 - tmp3;
702 tmp11 = tmp1 + tmp2;
703 tmp12 = tmp1 - tmp2;
705 else
707 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
708 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
709 tmp3 = MULTIPLY(d6, FIX(0.541196100));
711 tmp0 = d4 << CONST_BITS;
713 tmp10 = tmp0 + tmp3;
714 tmp13 = tmp0 - tmp3;
715 tmp11 = tmp2 - tmp0;
716 tmp12 = -(tmp0 + tmp2);
720 else
722 if(d2)
724 if(d0)
726 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
727 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
728 tmp2 = z1 + MULTIPLY(d6, -FIX(1.847759065));
729 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
731 tmp0 = d0 << CONST_BITS;
733 tmp10 = tmp0 + tmp3;
734 tmp13 = tmp0 - tmp3;
735 tmp11 = tmp0 + tmp2;
736 tmp12 = tmp0 - tmp2;
738 else
740 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
741 z1 = MULTIPLY(d2 + d6, FIX(0.541196100));
742 tmp2 = z1 + MULTIPLY(d6, -FIX(1.847759065));
743 tmp3 = z1 + MULTIPLY(d2, FIX(0.765366865));
745 tmp10 = tmp3;
746 tmp13 = -tmp3;
747 tmp11 = tmp2;
748 tmp12 = -tmp2;
751 else
753 if(d0)
755 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
756 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
757 tmp3 = MULTIPLY(d6, FIX(0.541196100));
759 tmp0 = d0 << CONST_BITS;
761 tmp10 = tmp0 + tmp3;
762 tmp13 = tmp0 - tmp3;
763 tmp11 = tmp0 + tmp2;
764 tmp12 = tmp0 - tmp2;
766 else
768 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
769 tmp2 = MULTIPLY(d6, -FIX(1.306562965));
770 tmp3 = MULTIPLY(d6, FIX(0.541196100));
772 tmp10 = tmp3;
773 tmp13 = -tmp3;
774 tmp11 = tmp2;
775 tmp12 = -tmp2;
780 else
782 if(d4)
784 if(d2)
786 if(d0)
788 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
789 tmp2 = MULTIPLY(d2, FIX(0.541196100));
790 tmp3 = MULTIPLY(d2, FIX(1.306562965));
792 tmp0 = (d0 + d4) << CONST_BITS;
793 tmp1 = (d0 - d4) << CONST_BITS;
795 tmp10 = tmp0 + tmp3;
796 tmp13 = tmp0 - tmp3;
797 tmp11 = tmp1 + tmp2;
798 tmp12 = tmp1 - tmp2;
800 else
802 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
803 tmp2 = MULTIPLY(d2, FIX(0.541196100));
804 tmp3 = MULTIPLY(d2, FIX(1.306562965));
806 tmp0 = d4 << CONST_BITS;
808 tmp10 = tmp0 + tmp3;
809 tmp13 = tmp0 - tmp3;
810 tmp11 = tmp2 - tmp0;
811 tmp12 = -(tmp0 + tmp2);
814 else
816 if(d0)
818 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
819 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
820 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
822 else
824 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
825 tmp10 = tmp13 = d4 << CONST_BITS;
826 tmp11 = tmp12 = -tmp10;
830 else
832 if(d2)
834 if(d0)
836 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
837 tmp2 = MULTIPLY(d2, FIX(0.541196100));
838 tmp3 = MULTIPLY(d2, FIX(1.306562965));
840 tmp0 = d0 << CONST_BITS;
842 tmp10 = tmp0 + tmp3;
843 tmp13 = tmp0 - tmp3;
844 tmp11 = tmp0 + tmp2;
845 tmp12 = tmp0 - tmp2;
847 else
849 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
850 tmp2 = MULTIPLY(d2, FIX(0.541196100));
851 tmp3 = MULTIPLY(d2, FIX(1.306562965));
853 tmp10 = tmp3;
854 tmp13 = -tmp3;
855 tmp11 = tmp2;
856 tmp12 = -tmp2;
859 else
861 if(d0)
863 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
864 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
866 else
868 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
869 tmp10 = tmp13 = tmp11 = tmp12 = 0;
875 /* Odd part per figure 8; the matrix is unitary and hence its
876 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
878 if(d7)
880 if(d5)
882 if(d3)
884 if(d1)
886 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
887 z1 = d7 + d1;
888 z2 = d5 + d3;
889 z3 = d7 + d3;
890 z4 = d5 + d1;
891 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
893 tmp0 = MULTIPLY(d7, FIX(0.298631336));
894 tmp1 = MULTIPLY(d5, FIX(2.053119869));
895 tmp2 = MULTIPLY(d3, FIX(3.072711026));
896 tmp3 = MULTIPLY(d1, FIX(1.501321110));
897 z1 = MULTIPLY(z1, -FIX(0.899976223));
898 z2 = MULTIPLY(z2, -FIX(2.562915447));
899 z3 = MULTIPLY(z3, -FIX(1.961570560));
900 z4 = MULTIPLY(z4, -FIX(0.390180644));
902 z3 += z5;
903 z4 += z5;
905 tmp0 += z1 + z3;
906 tmp1 += z2 + z4;
907 tmp2 += z2 + z3;
908 tmp3 += z1 + z4;
910 else
912 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
913 z1 = d7;
914 z2 = d5 + d3;
915 z3 = d7 + d3;
916 z5 = MULTIPLY(z3 + d5, FIX(1.175875602));
918 tmp0 = MULTIPLY(d7, FIX(0.298631336));
919 tmp1 = MULTIPLY(d5, FIX(2.053119869));
920 tmp2 = MULTIPLY(d3, FIX(3.072711026));
921 z1 = MULTIPLY(d7, -FIX(0.899976223));
922 z2 = MULTIPLY(z2, -FIX(2.562915447));
923 z3 = MULTIPLY(z3, -FIX(1.961570560));
924 z4 = MULTIPLY(d5, -FIX(0.390180644));
926 z3 += z5;
927 z4 += z5;
929 tmp0 += z1 + z3;
930 tmp1 += z2 + z4;
931 tmp2 += z2 + z3;
932 tmp3 = z1 + z4;
935 else
937 if(d1)
939 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
940 z1 = d7 + d1;
941 z2 = d5;
942 z3 = d7;
943 z4 = d5 + d1;
944 z5 = MULTIPLY(z3 + z4, FIX(1.175875602));
946 tmp0 = MULTIPLY(d7, FIX(0.298631336));
947 tmp1 = MULTIPLY(d5, FIX(2.053119869));
948 tmp3 = MULTIPLY(d1, FIX(1.501321110));
949 z1 = MULTIPLY(z1, -FIX(0.899976223));
950 z2 = MULTIPLY(d5, -FIX(2.562915447));
951 z3 = MULTIPLY(d7, -FIX(1.961570560));
952 z4 = MULTIPLY(z4, -FIX(0.390180644));
954 z3 += z5;
955 z4 += z5;
957 tmp0 += z1 + z3;
958 tmp1 += z2 + z4;
959 tmp2 = z2 + z3;
960 tmp3 += z1 + z4;
962 else
964 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
965 tmp0 = MULTIPLY(d7, -FIX(0.601344887));
966 z1 = MULTIPLY(d7, -FIX(0.899976223));
967 z3 = MULTIPLY(d7, -FIX(1.961570560));
968 tmp1 = MULTIPLY(d5, -FIX(0.509795578));
969 z2 = MULTIPLY(d5, -FIX(2.562915447));
970 z4 = MULTIPLY(d5, -FIX(0.390180644));
971 z5 = MULTIPLY(d5 + d7, FIX(1.175875602));
973 z3 += z5;
974 z4 += z5;
976 tmp0 += z3;
977 tmp1 += z4;
978 tmp2 = z2 + z3;
979 tmp3 = z1 + z4;
983 else
985 if(d3)
987 if(d1)
989 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
990 z1 = d7 + d1;
991 z3 = d7 + d3;
992 z5 = MULTIPLY(z3 + d1, FIX(1.175875602));
994 tmp0 = MULTIPLY(d7, FIX(0.298631336));
995 tmp2 = MULTIPLY(d3, FIX(3.072711026));
996 tmp3 = MULTIPLY(d1, FIX(1.501321110));
997 z1 = MULTIPLY(z1, -FIX(0.899976223));
998 z2 = MULTIPLY(d3, -FIX(2.562915447));
999 z3 = MULTIPLY(z3, -FIX(1.961570560));
1000 z4 = MULTIPLY(d1, -FIX(0.390180644));
1002 z3 += z5;
1003 z4 += z5;
1005 tmp0 += z1 + z3;
1006 tmp1 = z2 + z4;
1007 tmp2 += z2 + z3;
1008 tmp3 += z1 + z4;
1010 else
1012 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1013 z3 = d7 + d3;
1015 tmp0 = MULTIPLY(d7, -FIX(0.601344887));
1016 z1 = MULTIPLY(d7, -FIX(0.899976223));
1017 tmp2 = MULTIPLY(d3, FIX(0.509795579));
1018 z2 = MULTIPLY(d3, -FIX(2.562915447));
1019 z5 = MULTIPLY(z3, FIX(1.175875602));
1020 z3 = MULTIPLY(z3, -FIX(0.785694958));
1022 tmp0 += z3;
1023 tmp1 = z2 + z5;
1024 tmp2 += z3;
1025 tmp3 = z1 + z5;
1028 else
1030 if(d1)
1032 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1033 z1 = d7 + d1;
1034 z5 = MULTIPLY(z1, FIX(1.175875602));
1036 z1 = MULTIPLY(z1, FIX(0.275899379));
1037 z3 = MULTIPLY(d7, -FIX(1.961570560));
1038 tmp0 = MULTIPLY(d7, -FIX(1.662939224));
1039 z4 = MULTIPLY(d1, -FIX(0.390180644));
1040 tmp3 = MULTIPLY(d1, FIX(1.111140466));
1042 tmp0 += z1;
1043 tmp1 = z4 + z5;
1044 tmp2 = z3 + z5;
1045 tmp3 += z1;
1047 else
1049 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1050 tmp0 = MULTIPLY(d7, -FIX(1.387039845));
1051 tmp1 = MULTIPLY(d7, FIX(1.175875602));
1052 tmp2 = MULTIPLY(d7, -FIX(0.785694958));
1053 tmp3 = MULTIPLY(d7, FIX(0.275899379));
1058 else
1060 if(d5)
1062 if(d3)
1064 if(d1)
1066 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1067 z2 = d5 + d3;
1068 z4 = d5 + d1;
1069 z5 = MULTIPLY(d3 + z4, FIX(1.175875602));
1071 tmp1 = MULTIPLY(d5, FIX(2.053119869));
1072 tmp2 = MULTIPLY(d3, FIX(3.072711026));
1073 tmp3 = MULTIPLY(d1, FIX(1.501321110));
1074 z1 = MULTIPLY(d1, -FIX(0.899976223));
1075 z2 = MULTIPLY(z2, -FIX(2.562915447));
1076 z3 = MULTIPLY(d3, -FIX(1.961570560));
1077 z4 = MULTIPLY(z4, -FIX(0.390180644));
1079 z3 += z5;
1080 z4 += z5;
1082 tmp0 = z1 + z3;
1083 tmp1 += z2 + z4;
1084 tmp2 += z2 + z3;
1085 tmp3 += z1 + z4;
1087 else
1089 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1090 z2 = d5 + d3;
1092 z5 = MULTIPLY(z2, FIX(1.175875602));
1093 tmp1 = MULTIPLY(d5, FIX(1.662939225));
1094 z4 = MULTIPLY(d5, -FIX(0.390180644));
1095 z2 = MULTIPLY(z2, -FIX(1.387039845));
1096 tmp2 = MULTIPLY(d3, FIX(1.111140466));
1097 z3 = MULTIPLY(d3, -FIX(1.961570560));
1099 tmp0 = z3 + z5;
1100 tmp1 += z2;
1101 tmp2 += z2;
1102 tmp3 = z4 + z5;
1105 else
1107 if(d1)
1109 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1110 z4 = d5 + d1;
1112 z5 = MULTIPLY(z4, FIX(1.175875602));
1113 z1 = MULTIPLY(d1, -FIX(0.899976223));
1114 tmp3 = MULTIPLY(d1, FIX(0.601344887));
1115 tmp1 = MULTIPLY(d5, -FIX(0.509795578));
1116 z2 = MULTIPLY(d5, -FIX(2.562915447));
1117 z4 = MULTIPLY(z4, FIX(0.785694958));
1119 tmp0 = z1 + z5;
1120 tmp1 += z4;
1121 tmp2 = z2 + z5;
1122 tmp3 += z4;
1124 else
1126 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1127 tmp0 = MULTIPLY(d5, FIX(1.175875602));
1128 tmp1 = MULTIPLY(d5, FIX(0.275899380));
1129 tmp2 = MULTIPLY(d5, -FIX(1.387039845));
1130 tmp3 = MULTIPLY(d5, FIX(0.785694958));
1134 else
1136 if(d3)
1138 if(d1)
1140 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1141 z5 = d1 + d3;
1142 tmp3 = MULTIPLY(d1, FIX(0.211164243));
1143 tmp2 = MULTIPLY(d3, -FIX(1.451774981));
1144 z1 = MULTIPLY(d1, FIX(1.061594337));
1145 z2 = MULTIPLY(d3, -FIX(2.172734803));
1146 z4 = MULTIPLY(z5, FIX(0.785694958));
1147 z5 = MULTIPLY(z5, FIX(1.175875602));
1149 tmp0 = z1 - z4;
1150 tmp1 = z2 + z4;
1151 tmp2 += z5;
1152 tmp3 += z5;
1154 else
1156 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1157 tmp0 = MULTIPLY(d3, -FIX(0.785694958));
1158 tmp1 = MULTIPLY(d3, -FIX(1.387039845));
1159 tmp2 = MULTIPLY(d3, -FIX(0.275899379));
1160 tmp3 = MULTIPLY(d3, FIX(1.175875602));
1163 else
1165 if(d1)
1167 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1168 tmp0 = MULTIPLY(d1, FIX(0.275899379));
1169 tmp1 = MULTIPLY(d1, FIX(0.785694958));
1170 tmp2 = MULTIPLY(d1, FIX(1.175875602));
1171 tmp3 = MULTIPLY(d1, FIX(1.387039845));
1173 else
1175 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1176 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1182 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1185 outptr[DCTSIZE1 * 0] = rnglimit[(int)DESCALE(tmp10 + tmp3, CONST_BITS + PASS1_BITS + 3) & RANGE_MASK];
1186 outptr[DCTSIZE1 * 7] = rnglimit[(int)DESCALE(tmp10 - tmp3, CONST_BITS + PASS1_BITS + 3) & RANGE_MASK];
1187 outptr[DCTSIZE1 * 1] = rnglimit[(int)DESCALE(tmp11 + tmp2, CONST_BITS + PASS1_BITS + 3) & RANGE_MASK];
1188 outptr[DCTSIZE1 * 6] = rnglimit[(int)DESCALE(tmp11 - tmp2, CONST_BITS + PASS1_BITS + 3) & RANGE_MASK];
1189 outptr[DCTSIZE1 * 2] = rnglimit[(int)DESCALE(tmp12 + tmp1, CONST_BITS + PASS1_BITS + 3) & RANGE_MASK];
1190 outptr[DCTSIZE1 * 5] = rnglimit[(int)DESCALE(tmp12 - tmp1, CONST_BITS + PASS1_BITS + 3) & RANGE_MASK];
1191 outptr[DCTSIZE1 * 3] = rnglimit[(int)DESCALE(tmp13 + tmp0, CONST_BITS + PASS1_BITS + 3) & RANGE_MASK];
1192 outptr[DCTSIZE1 * 4] = rnglimit[(int)DESCALE(tmp13 - tmp0, CONST_BITS + PASS1_BITS + 3) & RANGE_MASK];
1194 dataptr++; /* advance pointer to next column */
1195 outptr++;