4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * Modification developed 2002-2018 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
9 * This file contains a slow-but-accurate integer implementation of the
10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11 * must also perform dequantization of the input coefficients.
13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14 * on each row (or vice versa, but it's more convenient to emit a row at
15 * a time). Direct algorithms are also available, but they are much more
16 * complex and seem not to be any faster when reduced to code.
18 * This implementation is based on an algorithm described in
19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22 * The primary algorithm described there uses 11 multiplies and 29 adds.
23 * We use their alternate method with 12 multiplies and 32 adds.
24 * The advantage of this method is that no data path contains more than one
25 * multiplication; this allows a very simple and accurate implementation in
26 * scaled fixed-point arithmetic, with a minimal number of shifts.
28 * We also provide IDCT routines with various output sample block sizes for
29 * direct resolution reduction or enlargement and for direct resolving the
30 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
33 * For N<8 we simply take the corresponding low-frequency coefficients of
34 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35 * to yield the downscaled outputs.
36 * This can be seen as direct low-pass downsampling from the DCT domain
37 * point of view rather than the usual spatial domain point of view,
38 * yielding significant computational savings and results at least
39 * as good as common bilinear (averaging) spatial downsampling.
41 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42 * lower frequencies and higher frequencies assumed to be zero.
43 * It turns out that the computational effort is similar to the 8x8 IDCT
44 * regarding the output size.
45 * Furthermore, the scaling and descaling is the same for all IDCT sizes.
47 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48 * since there would be too many additional constants to pre-calculate.
51 #define JPEG_INTERNALS
54 #include "jdct.h" /* Private declarations for DCT subsystem */
56 #ifdef DCT_ISLOW_SUPPORTED
60 * This module is specialized to the case DCTSIZE = 8.
64 Sorry
, this code only copes with
8x8 DCT blocks
. /* deliberate syntax err */
69 * The poop on this scaling stuff is as follows:
71 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72 * larger than the true IDCT outputs. The final outputs are therefore
73 * a factor of N larger than desired; since N=8 this can be cured by
74 * a simple right shift at the end of the algorithm. The advantage of
75 * this arrangement is that we save two multiplications per 1-D IDCT,
76 * because the y0 and y4 inputs need not be divided by sqrt(N).
78 * We have to do addition and subtraction of the integer inputs, which
79 * is no problem, and multiplication by fractional constants, which is
80 * a problem to do in integer arithmetic. We multiply all the constants
81 * by CONST_SCALE and convert them to integer constants (thus retaining
82 * CONST_BITS bits of precision in the constants). After doing a
83 * multiplication we have to divide the product by CONST_SCALE, with proper
84 * rounding, to produce the correct output. This division can be done
85 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
86 * as long as possible so that partial sums can be added together with
87 * full fractional precision.
89 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90 * they are represented to better-than-integral precision. These outputs
91 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92 * with the recommended scaling. (To scale up 12-bit sample data further, an
93 * intermediate INT32 array would be needed.)
95 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
96 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
97 * shows that the values given below are the most effective.
100 #if BITS_IN_JSAMPLE == 8
101 #define CONST_BITS 13
104 #define CONST_BITS 13
105 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109 * causing a lot of useless floating-point operations at run time.
110 * To get around this we use the following pre-calculated constants.
111 * If you change CONST_BITS you may want to add appropriate values.
112 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
116 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
117 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
118 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
119 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
120 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
121 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
122 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
123 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
124 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
125 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
126 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
127 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
129 #define FIX_0_298631336 FIX(0.298631336)
130 #define FIX_0_390180644 FIX(0.390180644)
131 #define FIX_0_541196100 FIX(0.541196100)
132 #define FIX_0_765366865 FIX(0.765366865)
133 #define FIX_0_899976223 FIX(0.899976223)
134 #define FIX_1_175875602 FIX(1.175875602)
135 #define FIX_1_501321110 FIX(1.501321110)
136 #define FIX_1_847759065 FIX(1.847759065)
137 #define FIX_1_961570560 FIX(1.961570560)
138 #define FIX_2_053119869 FIX(2.053119869)
139 #define FIX_2_562915447 FIX(2.562915447)
140 #define FIX_3_072711026 FIX(3.072711026)
144 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
145 * For 8-bit samples with the recommended scaling, all the variable
146 * and constant values involved are no more than 16 bits wide, so a
147 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
148 * For 12-bit samples, a full 32-bit multiplication will be needed.
151 #if BITS_IN_JSAMPLE == 8
152 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
154 #define MULTIPLY(var,const) ((var) * (const))
158 /* Dequantize a coefficient by multiplying it by the multiplier-table
159 * entry; produce an int result. In this module, both inputs and result
160 * are 16 bits or less, so either int or short multiply will work.
163 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
167 * Perform dequantization and inverse DCT on one block of coefficients.
169 * Optimized algorithm with 12 multiplications in the 1-D kernel.
170 * cK represents sqrt(2) * cos(K*pi/16).
174 jpeg_idct_islow (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
176 JSAMPARRAY output_buf
, JDIMENSION output_col
)
178 INT32 tmp0
, tmp1
, tmp2
, tmp3
;
179 INT32 tmp10
, tmp11
, tmp12
, tmp13
;
182 ISLOW_MULT_TYPE
* quantptr
;
185 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
187 int workspace
[DCTSIZE2
]; /* buffers data between passes */
190 /* Pass 1: process columns from input, store into work array.
191 * Note results are scaled up by sqrt(8) compared to a true IDCT;
192 * furthermore, we scale the results by 2**PASS1_BITS.
196 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
198 for (ctr
= DCTSIZE
; ctr
> 0; ctr
--) {
199 /* Due to quantization, we will usually find that many of the input
200 * coefficients are zero, especially the AC terms. We can exploit this
201 * by short-circuiting the IDCT calculation for any column in which all
202 * the AC terms are zero. In that case each output is equal to the
203 * DC coefficient (with scale factor as needed).
204 * With typical images and quantization tables, half or more of the
205 * column DCT calculations can be simplified this way.
208 if (inptr
[DCTSIZE
*1] == 0 && inptr
[DCTSIZE
*2] == 0 &&
209 inptr
[DCTSIZE
*3] == 0 && inptr
[DCTSIZE
*4] == 0 &&
210 inptr
[DCTSIZE
*5] == 0 && inptr
[DCTSIZE
*6] == 0 &&
211 inptr
[DCTSIZE
*7] == 0) {
212 /* AC terms all zero */
213 int dcval
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]) << PASS1_BITS
;
215 wsptr
[DCTSIZE
*0] = dcval
;
216 wsptr
[DCTSIZE
*1] = dcval
;
217 wsptr
[DCTSIZE
*2] = dcval
;
218 wsptr
[DCTSIZE
*3] = dcval
;
219 wsptr
[DCTSIZE
*4] = dcval
;
220 wsptr
[DCTSIZE
*5] = dcval
;
221 wsptr
[DCTSIZE
*6] = dcval
;
222 wsptr
[DCTSIZE
*7] = dcval
;
224 inptr
++; /* advance pointers to next column */
230 /* Even part: reverse the even part of the forward DCT.
231 * The rotator is c(-6).
234 z2
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
235 z3
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
238 /* Add fudge factor here for final descale. */
239 z2
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
244 z2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
245 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
247 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
248 tmp2
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
249 tmp3
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
256 /* Odd part per figure 8; the matrix is unitary and hence its
257 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
260 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
261 tmp1
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
262 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
263 tmp3
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
268 z1
= MULTIPLY(z2
+ z3
, FIX_1_175875602
); /* c3 */
269 z2
= MULTIPLY(z2
, - FIX_1_961570560
); /* -c3-c5 */
270 z3
= MULTIPLY(z3
, - FIX_0_390180644
); /* -c3+c5 */
274 z1
= MULTIPLY(tmp0
+ tmp3
, - FIX_0_899976223
); /* -c3+c7 */
275 tmp0
= MULTIPLY(tmp0
, FIX_0_298631336
); /* -c1+c3+c5-c7 */
276 tmp3
= MULTIPLY(tmp3
, FIX_1_501321110
); /* c1+c3-c5-c7 */
280 z1
= MULTIPLY(tmp1
+ tmp2
, - FIX_2_562915447
); /* -c1-c3 */
281 tmp1
= MULTIPLY(tmp1
, FIX_2_053119869
); /* c1+c3-c5+c7 */
282 tmp2
= MULTIPLY(tmp2
, FIX_3_072711026
); /* c1+c3+c5-c7 */
286 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
288 wsptr
[DCTSIZE
*0] = (int) RIGHT_SHIFT(tmp10
+ tmp3
, CONST_BITS
-PASS1_BITS
);
289 wsptr
[DCTSIZE
*7] = (int) RIGHT_SHIFT(tmp10
- tmp3
, CONST_BITS
-PASS1_BITS
);
290 wsptr
[DCTSIZE
*1] = (int) RIGHT_SHIFT(tmp11
+ tmp2
, CONST_BITS
-PASS1_BITS
);
291 wsptr
[DCTSIZE
*6] = (int) RIGHT_SHIFT(tmp11
- tmp2
, CONST_BITS
-PASS1_BITS
);
292 wsptr
[DCTSIZE
*2] = (int) RIGHT_SHIFT(tmp12
+ tmp1
, CONST_BITS
-PASS1_BITS
);
293 wsptr
[DCTSIZE
*5] = (int) RIGHT_SHIFT(tmp12
- tmp1
, CONST_BITS
-PASS1_BITS
);
294 wsptr
[DCTSIZE
*3] = (int) RIGHT_SHIFT(tmp13
+ tmp0
, CONST_BITS
-PASS1_BITS
);
295 wsptr
[DCTSIZE
*4] = (int) RIGHT_SHIFT(tmp13
- tmp0
, CONST_BITS
-PASS1_BITS
);
297 inptr
++; /* advance pointers to next column */
302 /* Pass 2: process rows from work array, store into output array.
303 * Note that we must descale the results by a factor of 8 == 2**3,
304 * and also undo the PASS1_BITS scaling.
308 for (ctr
= 0; ctr
< DCTSIZE
; ctr
++) {
309 outptr
= output_buf
[ctr
] + output_col
;
311 /* Add range center and fudge factor for final descale and range-limit. */
312 z2
= (INT32
) wsptr
[0] +
313 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
314 (ONE
<< (PASS1_BITS
+2)));
316 /* Rows of zeroes can be exploited in the same way as we did with columns.
317 * However, the column calculation has created many nonzero AC terms, so
318 * the simplification applies less often (typically 5% to 10% of the time).
319 * On machines with very fast multiplication, it's possible that the
320 * test takes more time than it's worth. In that case this section
321 * may be commented out.
324 #ifndef NO_ZERO_ROW_TEST
325 if (wsptr
[1] == 0 && wsptr
[2] == 0 && wsptr
[3] == 0 && wsptr
[4] == 0 &&
326 wsptr
[5] == 0 && wsptr
[6] == 0 && wsptr
[7] == 0) {
327 /* AC terms all zero */
328 JSAMPLE dcval
= range_limit
[(int) RIGHT_SHIFT(z2
, PASS1_BITS
+3)
340 wsptr
+= DCTSIZE
; /* advance pointer to next row */
345 /* Even part: reverse the even part of the forward DCT.
346 * The rotator is c(-6).
349 z3
= (INT32
) wsptr
[4];
351 tmp0
= (z2
+ z3
) << CONST_BITS
;
352 tmp1
= (z2
- z3
) << CONST_BITS
;
354 z2
= (INT32
) wsptr
[2];
355 z3
= (INT32
) wsptr
[6];
357 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
358 tmp2
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
359 tmp3
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
366 /* Odd part per figure 8; the matrix is unitary and hence its
367 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
370 tmp0
= (INT32
) wsptr
[7];
371 tmp1
= (INT32
) wsptr
[5];
372 tmp2
= (INT32
) wsptr
[3];
373 tmp3
= (INT32
) wsptr
[1];
378 z1
= MULTIPLY(z2
+ z3
, FIX_1_175875602
); /* c3 */
379 z2
= MULTIPLY(z2
, - FIX_1_961570560
); /* -c3-c5 */
380 z3
= MULTIPLY(z3
, - FIX_0_390180644
); /* -c3+c5 */
384 z1
= MULTIPLY(tmp0
+ tmp3
, - FIX_0_899976223
); /* -c3+c7 */
385 tmp0
= MULTIPLY(tmp0
, FIX_0_298631336
); /* -c1+c3+c5-c7 */
386 tmp3
= MULTIPLY(tmp3
, FIX_1_501321110
); /* c1+c3-c5-c7 */
390 z1
= MULTIPLY(tmp1
+ tmp2
, - FIX_2_562915447
); /* -c1-c3 */
391 tmp1
= MULTIPLY(tmp1
, FIX_2_053119869
); /* c1+c3-c5+c7 */
392 tmp2
= MULTIPLY(tmp2
, FIX_3_072711026
); /* c1+c3+c5-c7 */
396 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
398 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp3
,
399 CONST_BITS
+PASS1_BITS
+3)
401 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp3
,
402 CONST_BITS
+PASS1_BITS
+3)
404 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp2
,
405 CONST_BITS
+PASS1_BITS
+3)
407 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp2
,
408 CONST_BITS
+PASS1_BITS
+3)
410 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp1
,
411 CONST_BITS
+PASS1_BITS
+3)
413 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp1
,
414 CONST_BITS
+PASS1_BITS
+3)
416 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp13
+ tmp0
,
417 CONST_BITS
+PASS1_BITS
+3)
419 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp13
- tmp0
,
420 CONST_BITS
+PASS1_BITS
+3)
423 wsptr
+= DCTSIZE
; /* advance pointer to next row */
427 #ifdef IDCT_SCALING_SUPPORTED
431 * Perform dequantization and inverse DCT on one block of coefficients,
432 * producing a reduced-size 7x7 output block.
434 * Optimized algorithm with 12 multiplications in the 1-D kernel.
435 * cK represents sqrt(2) * cos(K*pi/14).
439 jpeg_idct_7x7 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
441 JSAMPARRAY output_buf
, JDIMENSION output_col
)
443 INT32 tmp0
, tmp1
, tmp2
, tmp10
, tmp11
, tmp12
, tmp13
;
446 ISLOW_MULT_TYPE
* quantptr
;
449 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
451 int workspace
[7*7]; /* buffers data between passes */
454 /* Pass 1: process columns from input, store into work array. */
457 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
459 for (ctr
= 0; ctr
< 7; ctr
++, inptr
++, quantptr
++, wsptr
++) {
462 tmp13
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
463 tmp13
<<= CONST_BITS
;
464 /* Add fudge factor here for final descale. */
465 tmp13
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
467 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
468 z2
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
469 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
471 tmp10
= MULTIPLY(z2
- z3
, FIX(0.881747734)); /* c4 */
472 tmp12
= MULTIPLY(z1
- z2
, FIX(0.314692123)); /* c6 */
473 tmp11
= tmp10
+ tmp12
+ tmp13
- MULTIPLY(z2
, FIX(1.841218003)); /* c2+c4-c6 */
476 tmp0
= MULTIPLY(tmp0
, FIX(1.274162392)) + tmp13
; /* c2 */
477 tmp10
+= tmp0
- MULTIPLY(z3
, FIX(0.077722536)); /* c2-c4-c6 */
478 tmp12
+= tmp0
- MULTIPLY(z1
, FIX(2.470602249)); /* c2+c4+c6 */
479 tmp13
+= MULTIPLY(z2
, FIX(1.414213562)); /* c0 */
483 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
484 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
485 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
487 tmp1
= MULTIPLY(z1
+ z2
, FIX(0.935414347)); /* (c3+c1-c5)/2 */
488 tmp2
= MULTIPLY(z1
- z2
, FIX(0.170262339)); /* (c3+c5-c1)/2 */
491 tmp2
= MULTIPLY(z2
+ z3
, - FIX(1.378756276)); /* -c1 */
493 z2
= MULTIPLY(z1
+ z3
, FIX(0.613604268)); /* c5 */
495 tmp2
+= z2
+ MULTIPLY(z3
, FIX(1.870828693)); /* c3+c1-c5 */
497 /* Final output stage */
499 wsptr
[7*0] = (int) RIGHT_SHIFT(tmp10
+ tmp0
, CONST_BITS
-PASS1_BITS
);
500 wsptr
[7*6] = (int) RIGHT_SHIFT(tmp10
- tmp0
, CONST_BITS
-PASS1_BITS
);
501 wsptr
[7*1] = (int) RIGHT_SHIFT(tmp11
+ tmp1
, CONST_BITS
-PASS1_BITS
);
502 wsptr
[7*5] = (int) RIGHT_SHIFT(tmp11
- tmp1
, CONST_BITS
-PASS1_BITS
);
503 wsptr
[7*2] = (int) RIGHT_SHIFT(tmp12
+ tmp2
, CONST_BITS
-PASS1_BITS
);
504 wsptr
[7*4] = (int) RIGHT_SHIFT(tmp12
- tmp2
, CONST_BITS
-PASS1_BITS
);
505 wsptr
[7*3] = (int) RIGHT_SHIFT(tmp13
, CONST_BITS
-PASS1_BITS
);
508 /* Pass 2: process 7 rows from work array, store into output array. */
511 for (ctr
= 0; ctr
< 7; ctr
++) {
512 outptr
= output_buf
[ctr
] + output_col
;
516 /* Add range center and fudge factor for final descale and range-limit. */
517 tmp13
= (INT32
) wsptr
[0] +
518 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
519 (ONE
<< (PASS1_BITS
+2)));
520 tmp13
<<= CONST_BITS
;
522 z1
= (INT32
) wsptr
[2];
523 z2
= (INT32
) wsptr
[4];
524 z3
= (INT32
) wsptr
[6];
526 tmp10
= MULTIPLY(z2
- z3
, FIX(0.881747734)); /* c4 */
527 tmp12
= MULTIPLY(z1
- z2
, FIX(0.314692123)); /* c6 */
528 tmp11
= tmp10
+ tmp12
+ tmp13
- MULTIPLY(z2
, FIX(1.841218003)); /* c2+c4-c6 */
531 tmp0
= MULTIPLY(tmp0
, FIX(1.274162392)) + tmp13
; /* c2 */
532 tmp10
+= tmp0
- MULTIPLY(z3
, FIX(0.077722536)); /* c2-c4-c6 */
533 tmp12
+= tmp0
- MULTIPLY(z1
, FIX(2.470602249)); /* c2+c4+c6 */
534 tmp13
+= MULTIPLY(z2
, FIX(1.414213562)); /* c0 */
538 z1
= (INT32
) wsptr
[1];
539 z2
= (INT32
) wsptr
[3];
540 z3
= (INT32
) wsptr
[5];
542 tmp1
= MULTIPLY(z1
+ z2
, FIX(0.935414347)); /* (c3+c1-c5)/2 */
543 tmp2
= MULTIPLY(z1
- z2
, FIX(0.170262339)); /* (c3+c5-c1)/2 */
546 tmp2
= MULTIPLY(z2
+ z3
, - FIX(1.378756276)); /* -c1 */
548 z2
= MULTIPLY(z1
+ z3
, FIX(0.613604268)); /* c5 */
550 tmp2
+= z2
+ MULTIPLY(z3
, FIX(1.870828693)); /* c3+c1-c5 */
552 /* Final output stage */
554 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
555 CONST_BITS
+PASS1_BITS
+3)
557 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
558 CONST_BITS
+PASS1_BITS
+3)
560 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp1
,
561 CONST_BITS
+PASS1_BITS
+3)
563 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp1
,
564 CONST_BITS
+PASS1_BITS
+3)
566 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp2
,
567 CONST_BITS
+PASS1_BITS
+3)
569 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp2
,
570 CONST_BITS
+PASS1_BITS
+3)
572 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp13
,
573 CONST_BITS
+PASS1_BITS
+3)
576 wsptr
+= 7; /* advance pointer to next row */
582 * Perform dequantization and inverse DCT on one block of coefficients,
583 * producing a reduced-size 6x6 output block.
585 * Optimized algorithm with 3 multiplications in the 1-D kernel.
586 * cK represents sqrt(2) * cos(K*pi/12).
590 jpeg_idct_6x6 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
592 JSAMPARRAY output_buf
, JDIMENSION output_col
)
594 INT32 tmp0
, tmp1
, tmp2
, tmp10
, tmp11
, tmp12
;
597 ISLOW_MULT_TYPE
* quantptr
;
600 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
602 int workspace
[6*6]; /* buffers data between passes */
605 /* Pass 1: process columns from input, store into work array. */
608 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
610 for (ctr
= 0; ctr
< 6; ctr
++, inptr
++, quantptr
++, wsptr
++) {
613 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
615 /* Add fudge factor here for final descale. */
616 tmp0
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
617 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
618 tmp10
= MULTIPLY(tmp2
, FIX(0.707106781)); /* c4 */
620 tmp11
= RIGHT_SHIFT(tmp0
- tmp10
- tmp10
, CONST_BITS
-PASS1_BITS
);
621 tmp10
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
622 tmp0
= MULTIPLY(tmp10
, FIX(1.224744871)); /* c2 */
628 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
629 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
630 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
631 tmp1
= MULTIPLY(z1
+ z3
, FIX(0.366025404)); /* c5 */
632 tmp0
= tmp1
+ ((z1
+ z2
) << CONST_BITS
);
633 tmp2
= tmp1
+ ((z3
- z2
) << CONST_BITS
);
634 tmp1
= (z1
- z2
- z3
) << PASS1_BITS
;
636 /* Final output stage */
638 wsptr
[6*0] = (int) RIGHT_SHIFT(tmp10
+ tmp0
, CONST_BITS
-PASS1_BITS
);
639 wsptr
[6*5] = (int) RIGHT_SHIFT(tmp10
- tmp0
, CONST_BITS
-PASS1_BITS
);
640 wsptr
[6*1] = (int) (tmp11
+ tmp1
);
641 wsptr
[6*4] = (int) (tmp11
- tmp1
);
642 wsptr
[6*2] = (int) RIGHT_SHIFT(tmp12
+ tmp2
, CONST_BITS
-PASS1_BITS
);
643 wsptr
[6*3] = (int) RIGHT_SHIFT(tmp12
- tmp2
, CONST_BITS
-PASS1_BITS
);
646 /* Pass 2: process 6 rows from work array, store into output array. */
649 for (ctr
= 0; ctr
< 6; ctr
++) {
650 outptr
= output_buf
[ctr
] + output_col
;
654 /* Add range center and fudge factor for final descale and range-limit. */
655 tmp0
= (INT32
) wsptr
[0] +
656 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
657 (ONE
<< (PASS1_BITS
+2)));
659 tmp2
= (INT32
) wsptr
[4];
660 tmp10
= MULTIPLY(tmp2
, FIX(0.707106781)); /* c4 */
662 tmp11
= tmp0
- tmp10
- tmp10
;
663 tmp10
= (INT32
) wsptr
[2];
664 tmp0
= MULTIPLY(tmp10
, FIX(1.224744871)); /* c2 */
670 z1
= (INT32
) wsptr
[1];
671 z2
= (INT32
) wsptr
[3];
672 z3
= (INT32
) wsptr
[5];
673 tmp1
= MULTIPLY(z1
+ z3
, FIX(0.366025404)); /* c5 */
674 tmp0
= tmp1
+ ((z1
+ z2
) << CONST_BITS
);
675 tmp2
= tmp1
+ ((z3
- z2
) << CONST_BITS
);
676 tmp1
= (z1
- z2
- z3
) << CONST_BITS
;
678 /* Final output stage */
680 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
681 CONST_BITS
+PASS1_BITS
+3)
683 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
684 CONST_BITS
+PASS1_BITS
+3)
686 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp1
,
687 CONST_BITS
+PASS1_BITS
+3)
689 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp1
,
690 CONST_BITS
+PASS1_BITS
+3)
692 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp2
,
693 CONST_BITS
+PASS1_BITS
+3)
695 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp2
,
696 CONST_BITS
+PASS1_BITS
+3)
699 wsptr
+= 6; /* advance pointer to next row */
705 * Perform dequantization and inverse DCT on one block of coefficients,
706 * producing a reduced-size 5x5 output block.
708 * Optimized algorithm with 5 multiplications in the 1-D kernel.
709 * cK represents sqrt(2) * cos(K*pi/10).
713 jpeg_idct_5x5 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
715 JSAMPARRAY output_buf
, JDIMENSION output_col
)
717 INT32 tmp0
, tmp1
, tmp10
, tmp11
, tmp12
;
720 ISLOW_MULT_TYPE
* quantptr
;
723 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
725 int workspace
[5*5]; /* buffers data between passes */
728 /* Pass 1: process columns from input, store into work array. */
731 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
733 for (ctr
= 0; ctr
< 5; ctr
++, inptr
++, quantptr
++, wsptr
++) {
736 tmp12
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
737 tmp12
<<= CONST_BITS
;
738 /* Add fudge factor here for final descale. */
739 tmp12
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
740 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
741 tmp1
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
742 z1
= MULTIPLY(tmp0
+ tmp1
, FIX(0.790569415)); /* (c2+c4)/2 */
743 z2
= MULTIPLY(tmp0
- tmp1
, FIX(0.353553391)); /* (c2-c4)/2 */
751 z2
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
752 z3
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
754 z1
= MULTIPLY(z2
+ z3
, FIX(0.831253876)); /* c3 */
755 tmp0
= z1
+ MULTIPLY(z2
, FIX(0.513743148)); /* c1-c3 */
756 tmp1
= z1
- MULTIPLY(z3
, FIX(2.176250899)); /* c1+c3 */
758 /* Final output stage */
760 wsptr
[5*0] = (int) RIGHT_SHIFT(tmp10
+ tmp0
, CONST_BITS
-PASS1_BITS
);
761 wsptr
[5*4] = (int) RIGHT_SHIFT(tmp10
- tmp0
, CONST_BITS
-PASS1_BITS
);
762 wsptr
[5*1] = (int) RIGHT_SHIFT(tmp11
+ tmp1
, CONST_BITS
-PASS1_BITS
);
763 wsptr
[5*3] = (int) RIGHT_SHIFT(tmp11
- tmp1
, CONST_BITS
-PASS1_BITS
);
764 wsptr
[5*2] = (int) RIGHT_SHIFT(tmp12
, CONST_BITS
-PASS1_BITS
);
767 /* Pass 2: process 5 rows from work array, store into output array. */
770 for (ctr
= 0; ctr
< 5; ctr
++) {
771 outptr
= output_buf
[ctr
] + output_col
;
775 /* Add range center and fudge factor for final descale and range-limit. */
776 tmp12
= (INT32
) wsptr
[0] +
777 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
778 (ONE
<< (PASS1_BITS
+2)));
779 tmp12
<<= CONST_BITS
;
780 tmp0
= (INT32
) wsptr
[2];
781 tmp1
= (INT32
) wsptr
[4];
782 z1
= MULTIPLY(tmp0
+ tmp1
, FIX(0.790569415)); /* (c2+c4)/2 */
783 z2
= MULTIPLY(tmp0
- tmp1
, FIX(0.353553391)); /* (c2-c4)/2 */
791 z2
= (INT32
) wsptr
[1];
792 z3
= (INT32
) wsptr
[3];
794 z1
= MULTIPLY(z2
+ z3
, FIX(0.831253876)); /* c3 */
795 tmp0
= z1
+ MULTIPLY(z2
, FIX(0.513743148)); /* c1-c3 */
796 tmp1
= z1
- MULTIPLY(z3
, FIX(2.176250899)); /* c1+c3 */
798 /* Final output stage */
800 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
801 CONST_BITS
+PASS1_BITS
+3)
803 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
804 CONST_BITS
+PASS1_BITS
+3)
806 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp1
,
807 CONST_BITS
+PASS1_BITS
+3)
809 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp1
,
810 CONST_BITS
+PASS1_BITS
+3)
812 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
,
813 CONST_BITS
+PASS1_BITS
+3)
816 wsptr
+= 5; /* advance pointer to next row */
822 * Perform dequantization and inverse DCT on one block of coefficients,
823 * producing a reduced-size 4x4 output block.
825 * Optimized algorithm with 3 multiplications in the 1-D kernel.
826 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
830 jpeg_idct_4x4 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
832 JSAMPARRAY output_buf
, JDIMENSION output_col
)
834 INT32 tmp0
, tmp2
, tmp10
, tmp12
;
837 ISLOW_MULT_TYPE
* quantptr
;
840 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
842 int workspace
[4*4]; /* buffers data between passes */
845 /* Pass 1: process columns from input, store into work array. */
848 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
850 for (ctr
= 0; ctr
< 4; ctr
++, inptr
++, quantptr
++, wsptr
++) {
853 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
854 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
856 tmp10
= (tmp0
+ tmp2
) << PASS1_BITS
;
857 tmp12
= (tmp0
- tmp2
) << PASS1_BITS
;
860 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
862 z2
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
863 z3
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
865 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
866 /* Add fudge factor here for final descale. */
867 z1
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
868 tmp0
= RIGHT_SHIFT(z1
+ MULTIPLY(z2
, FIX_0_765366865
), /* c2-c6 */
869 CONST_BITS
-PASS1_BITS
);
870 tmp2
= RIGHT_SHIFT(z1
- MULTIPLY(z3
, FIX_1_847759065
), /* c2+c6 */
871 CONST_BITS
-PASS1_BITS
);
873 /* Final output stage */
875 wsptr
[4*0] = (int) (tmp10
+ tmp0
);
876 wsptr
[4*3] = (int) (tmp10
- tmp0
);
877 wsptr
[4*1] = (int) (tmp12
+ tmp2
);
878 wsptr
[4*2] = (int) (tmp12
- tmp2
);
881 /* Pass 2: process 4 rows from work array, store into output array. */
884 for (ctr
= 0; ctr
< 4; ctr
++) {
885 outptr
= output_buf
[ctr
] + output_col
;
889 /* Add range center and fudge factor for final descale and range-limit. */
890 tmp0
= (INT32
) wsptr
[0] +
891 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
892 (ONE
<< (PASS1_BITS
+2)));
893 tmp2
= (INT32
) wsptr
[2];
895 tmp10
= (tmp0
+ tmp2
) << CONST_BITS
;
896 tmp12
= (tmp0
- tmp2
) << CONST_BITS
;
899 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
901 z2
= (INT32
) wsptr
[1];
902 z3
= (INT32
) wsptr
[3];
904 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
905 tmp0
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
906 tmp2
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
908 /* Final output stage */
910 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
911 CONST_BITS
+PASS1_BITS
+3)
913 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
914 CONST_BITS
+PASS1_BITS
+3)
916 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp2
,
917 CONST_BITS
+PASS1_BITS
+3)
919 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp2
,
920 CONST_BITS
+PASS1_BITS
+3)
923 wsptr
+= 4; /* advance pointer to next row */
929 * Perform dequantization and inverse DCT on one block of coefficients,
930 * producing a reduced-size 3x3 output block.
932 * Optimized algorithm with 2 multiplications in the 1-D kernel.
933 * cK represents sqrt(2) * cos(K*pi/6).
937 jpeg_idct_3x3 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
939 JSAMPARRAY output_buf
, JDIMENSION output_col
)
941 INT32 tmp0
, tmp2
, tmp10
, tmp12
;
943 ISLOW_MULT_TYPE
* quantptr
;
946 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
948 int workspace
[3*3]; /* buffers data between passes */
951 /* Pass 1: process columns from input, store into work array. */
954 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
956 for (ctr
= 0; ctr
< 3; ctr
++, inptr
++, quantptr
++, wsptr
++) {
959 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
961 /* Add fudge factor here for final descale. */
962 tmp0
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
963 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
964 tmp12
= MULTIPLY(tmp2
, FIX(0.707106781)); /* c2 */
965 tmp10
= tmp0
+ tmp12
;
966 tmp2
= tmp0
- tmp12
- tmp12
;
970 tmp12
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
971 tmp0
= MULTIPLY(tmp12
, FIX(1.224744871)); /* c1 */
973 /* Final output stage */
975 wsptr
[3*0] = (int) RIGHT_SHIFT(tmp10
+ tmp0
, CONST_BITS
-PASS1_BITS
);
976 wsptr
[3*2] = (int) RIGHT_SHIFT(tmp10
- tmp0
, CONST_BITS
-PASS1_BITS
);
977 wsptr
[3*1] = (int) RIGHT_SHIFT(tmp2
, CONST_BITS
-PASS1_BITS
);
980 /* Pass 2: process 3 rows from work array, store into output array. */
983 for (ctr
= 0; ctr
< 3; ctr
++) {
984 outptr
= output_buf
[ctr
] + output_col
;
988 /* Add range center and fudge factor for final descale and range-limit. */
989 tmp0
= (INT32
) wsptr
[0] +
990 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
991 (ONE
<< (PASS1_BITS
+2)));
993 tmp2
= (INT32
) wsptr
[2];
994 tmp12
= MULTIPLY(tmp2
, FIX(0.707106781)); /* c2 */
995 tmp10
= tmp0
+ tmp12
;
996 tmp2
= tmp0
- tmp12
- tmp12
;
1000 tmp12
= (INT32
) wsptr
[1];
1001 tmp0
= MULTIPLY(tmp12
, FIX(1.224744871)); /* c1 */
1003 /* Final output stage */
1005 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
1006 CONST_BITS
+PASS1_BITS
+3)
1008 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
1009 CONST_BITS
+PASS1_BITS
+3)
1011 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp2
,
1012 CONST_BITS
+PASS1_BITS
+3)
1015 wsptr
+= 3; /* advance pointer to next row */
1021 * Perform dequantization and inverse DCT on one block of coefficients,
1022 * producing a reduced-size 2x2 output block.
1024 * Multiplication-less algorithm.
1028 jpeg_idct_2x2 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
1029 JCOEFPTR coef_block
,
1030 JSAMPARRAY output_buf
, JDIMENSION output_col
)
1032 DCTELEM tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
;
1033 ISLOW_MULT_TYPE
* quantptr
;
1035 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
1038 /* Pass 1: process columns from input. */
1040 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
1043 tmp4
= DEQUANTIZE(coef_block
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
1044 tmp5
= DEQUANTIZE(coef_block
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
1045 /* Add range center and fudge factor for final descale and range-limit. */
1046 tmp4
+= (((DCTELEM
) RANGE_CENTER
) << 3) + (1 << 2);
1052 tmp4
= DEQUANTIZE(coef_block
[DCTSIZE
*0+1], quantptr
[DCTSIZE
*0+1]);
1053 tmp5
= DEQUANTIZE(coef_block
[DCTSIZE
*1+1], quantptr
[DCTSIZE
*1+1]);
1058 /* Pass 2: process 2 rows, store into output array. */
1061 outptr
= output_buf
[0] + output_col
;
1063 outptr
[0] = range_limit
[(int) IRIGHT_SHIFT(tmp0
+ tmp1
, 3) & RANGE_MASK
];
1064 outptr
[1] = range_limit
[(int) IRIGHT_SHIFT(tmp0
- tmp1
, 3) & RANGE_MASK
];
1067 outptr
= output_buf
[1] + output_col
;
1069 outptr
[0] = range_limit
[(int) IRIGHT_SHIFT(tmp2
+ tmp3
, 3) & RANGE_MASK
];
1070 outptr
[1] = range_limit
[(int) IRIGHT_SHIFT(tmp2
- tmp3
, 3) & RANGE_MASK
];
1075 * Perform dequantization and inverse DCT on one block of coefficients,
1076 * producing a reduced-size 1x1 output block.
1078 * We hardly need an inverse DCT routine for this: just take the
1079 * average pixel value, which is one-eighth of the DC coefficient.
1083 jpeg_idct_1x1 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
1084 JCOEFPTR coef_block
,
1085 JSAMPARRAY output_buf
, JDIMENSION output_col
)
1088 ISLOW_MULT_TYPE
* quantptr
;
1089 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
1092 /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1094 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
1096 dcval
= DEQUANTIZE(coef_block
[0], quantptr
[0]);
1097 /* Add range center and fudge factor for descale and range-limit. */
1098 dcval
+= (((DCTELEM
) RANGE_CENTER
) << 3) + (1 << 2);
1100 output_buf
[0][output_col
] =
1101 range_limit
[(int) IRIGHT_SHIFT(dcval
, 3) & RANGE_MASK
];
1106 * Perform dequantization and inverse DCT on one block of coefficients,
1107 * producing a 9x9 output block.
1109 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1110 * cK represents sqrt(2) * cos(K*pi/18).
1114 jpeg_idct_9x9 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
1115 JCOEFPTR coef_block
,
1116 JSAMPARRAY output_buf
, JDIMENSION output_col
)
1118 INT32 tmp0
, tmp1
, tmp2
, tmp3
, tmp10
, tmp11
, tmp12
, tmp13
, tmp14
;
1119 INT32 z1
, z2
, z3
, z4
;
1121 ISLOW_MULT_TYPE
* quantptr
;
1124 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
1126 int workspace
[8*9]; /* buffers data between passes */
1129 /* Pass 1: process columns from input, store into work array. */
1132 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
1134 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
1137 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
1138 tmp0
<<= CONST_BITS
;
1139 /* Add fudge factor here for final descale. */
1140 tmp0
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
1142 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
1143 z2
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
1144 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
1146 tmp3
= MULTIPLY(z3
, FIX(0.707106781)); /* c6 */
1148 tmp2
= tmp0
- tmp3
- tmp3
;
1150 tmp0
= MULTIPLY(z1
- z2
, FIX(0.707106781)); /* c6 */
1151 tmp11
= tmp2
+ tmp0
;
1152 tmp14
= tmp2
- tmp0
- tmp0
;
1154 tmp0
= MULTIPLY(z1
+ z2
, FIX(1.328926049)); /* c2 */
1155 tmp2
= MULTIPLY(z1
, FIX(1.083350441)); /* c4 */
1156 tmp3
= MULTIPLY(z2
, FIX(0.245575608)); /* c8 */
1158 tmp10
= tmp1
+ tmp0
- tmp3
;
1159 tmp12
= tmp1
- tmp0
+ tmp2
;
1160 tmp13
= tmp1
- tmp2
+ tmp3
;
1164 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
1165 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
1166 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
1167 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
1169 z2
= MULTIPLY(z2
, - FIX(1.224744871)); /* -c3 */
1171 tmp2
= MULTIPLY(z1
+ z3
, FIX(0.909038955)); /* c5 */
1172 tmp3
= MULTIPLY(z1
+ z4
, FIX(0.483689525)); /* c7 */
1173 tmp0
= tmp2
+ tmp3
- z2
;
1174 tmp1
= MULTIPLY(z3
- z4
, FIX(1.392728481)); /* c1 */
1177 tmp1
= MULTIPLY(z1
- z3
- z4
, FIX(1.224744871)); /* c3 */
1179 /* Final output stage */
1181 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp10
+ tmp0
, CONST_BITS
-PASS1_BITS
);
1182 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp10
- tmp0
, CONST_BITS
-PASS1_BITS
);
1183 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp11
+ tmp1
, CONST_BITS
-PASS1_BITS
);
1184 wsptr
[8*7] = (int) RIGHT_SHIFT(tmp11
- tmp1
, CONST_BITS
-PASS1_BITS
);
1185 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp12
+ tmp2
, CONST_BITS
-PASS1_BITS
);
1186 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp12
- tmp2
, CONST_BITS
-PASS1_BITS
);
1187 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp13
+ tmp3
, CONST_BITS
-PASS1_BITS
);
1188 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp13
- tmp3
, CONST_BITS
-PASS1_BITS
);
1189 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp14
, CONST_BITS
-PASS1_BITS
);
1192 /* Pass 2: process 9 rows from work array, store into output array. */
1195 for (ctr
= 0; ctr
< 9; ctr
++) {
1196 outptr
= output_buf
[ctr
] + output_col
;
1200 /* Add range center and fudge factor for final descale and range-limit. */
1201 tmp0
= (INT32
) wsptr
[0] +
1202 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
1203 (ONE
<< (PASS1_BITS
+2)));
1204 tmp0
<<= CONST_BITS
;
1206 z1
= (INT32
) wsptr
[2];
1207 z2
= (INT32
) wsptr
[4];
1208 z3
= (INT32
) wsptr
[6];
1210 tmp3
= MULTIPLY(z3
, FIX(0.707106781)); /* c6 */
1212 tmp2
= tmp0
- tmp3
- tmp3
;
1214 tmp0
= MULTIPLY(z1
- z2
, FIX(0.707106781)); /* c6 */
1215 tmp11
= tmp2
+ tmp0
;
1216 tmp14
= tmp2
- tmp0
- tmp0
;
1218 tmp0
= MULTIPLY(z1
+ z2
, FIX(1.328926049)); /* c2 */
1219 tmp2
= MULTIPLY(z1
, FIX(1.083350441)); /* c4 */
1220 tmp3
= MULTIPLY(z2
, FIX(0.245575608)); /* c8 */
1222 tmp10
= tmp1
+ tmp0
- tmp3
;
1223 tmp12
= tmp1
- tmp0
+ tmp2
;
1224 tmp13
= tmp1
- tmp2
+ tmp3
;
1228 z1
= (INT32
) wsptr
[1];
1229 z2
= (INT32
) wsptr
[3];
1230 z3
= (INT32
) wsptr
[5];
1231 z4
= (INT32
) wsptr
[7];
1233 z2
= MULTIPLY(z2
, - FIX(1.224744871)); /* -c3 */
1235 tmp2
= MULTIPLY(z1
+ z3
, FIX(0.909038955)); /* c5 */
1236 tmp3
= MULTIPLY(z1
+ z4
, FIX(0.483689525)); /* c7 */
1237 tmp0
= tmp2
+ tmp3
- z2
;
1238 tmp1
= MULTIPLY(z3
- z4
, FIX(1.392728481)); /* c1 */
1241 tmp1
= MULTIPLY(z1
- z3
- z4
, FIX(1.224744871)); /* c3 */
1243 /* Final output stage */
1245 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
1246 CONST_BITS
+PASS1_BITS
+3)
1248 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
1249 CONST_BITS
+PASS1_BITS
+3)
1251 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp1
,
1252 CONST_BITS
+PASS1_BITS
+3)
1254 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp1
,
1255 CONST_BITS
+PASS1_BITS
+3)
1257 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp2
,
1258 CONST_BITS
+PASS1_BITS
+3)
1260 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp2
,
1261 CONST_BITS
+PASS1_BITS
+3)
1263 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp13
+ tmp3
,
1264 CONST_BITS
+PASS1_BITS
+3)
1266 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp13
- tmp3
,
1267 CONST_BITS
+PASS1_BITS
+3)
1269 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp14
,
1270 CONST_BITS
+PASS1_BITS
+3)
1273 wsptr
+= 8; /* advance pointer to next row */
1279 * Perform dequantization and inverse DCT on one block of coefficients,
1280 * producing a 10x10 output block.
1282 * Optimized algorithm with 12 multiplications in the 1-D kernel.
1283 * cK represents sqrt(2) * cos(K*pi/20).
1287 jpeg_idct_10x10 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
1288 JCOEFPTR coef_block
,
1289 JSAMPARRAY output_buf
, JDIMENSION output_col
)
1291 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
;
1292 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
;
1293 INT32 z1
, z2
, z3
, z4
, z5
;
1295 ISLOW_MULT_TYPE
* quantptr
;
1298 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
1300 int workspace
[8*10]; /* buffers data between passes */
1303 /* Pass 1: process columns from input, store into work array. */
1306 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
1308 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
1311 z3
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
1313 /* Add fudge factor here for final descale. */
1314 z3
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
1315 z4
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
1316 z1
= MULTIPLY(z4
, FIX(1.144122806)); /* c4 */
1317 z2
= MULTIPLY(z4
, FIX(0.437016024)); /* c8 */
1321 tmp22
= RIGHT_SHIFT(z3
- ((z1
- z2
) << 1), /* c0 = (c4-c8)*2 */
1322 CONST_BITS
-PASS1_BITS
);
1324 z2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
1325 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
1327 z1
= MULTIPLY(z2
+ z3
, FIX(0.831253876)); /* c6 */
1328 tmp12
= z1
+ MULTIPLY(z2
, FIX(0.513743148)); /* c2-c6 */
1329 tmp13
= z1
- MULTIPLY(z3
, FIX(2.176250899)); /* c2+c6 */
1331 tmp20
= tmp10
+ tmp12
;
1332 tmp24
= tmp10
- tmp12
;
1333 tmp21
= tmp11
+ tmp13
;
1334 tmp23
= tmp11
- tmp13
;
1338 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
1339 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
1340 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
1341 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
1346 tmp12
= MULTIPLY(tmp13
, FIX(0.309016994)); /* (c3-c7)/2 */
1347 z5
= z3
<< CONST_BITS
;
1349 z2
= MULTIPLY(tmp11
, FIX(0.951056516)); /* (c3+c7)/2 */
1352 tmp10
= MULTIPLY(z1
, FIX(1.396802247)) + z2
+ z4
; /* c1 */
1353 tmp14
= MULTIPLY(z1
, FIX(0.221231742)) - z2
+ z4
; /* c9 */
1355 z2
= MULTIPLY(tmp11
, FIX(0.587785252)); /* (c1-c9)/2 */
1356 z4
= z5
- tmp12
- (tmp13
<< (CONST_BITS
- 1));
1358 tmp12
= (z1
- tmp13
- z3
) << PASS1_BITS
;
1360 tmp11
= MULTIPLY(z1
, FIX(1.260073511)) - z2
- z4
; /* c3 */
1361 tmp13
= MULTIPLY(z1
, FIX(0.642039522)) - z2
+ z4
; /* c7 */
1363 /* Final output stage */
1365 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
1366 wsptr
[8*9] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
1367 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
1368 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
1369 wsptr
[8*2] = (int) (tmp22
+ tmp12
);
1370 wsptr
[8*7] = (int) (tmp22
- tmp12
);
1371 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp23
+ tmp13
, CONST_BITS
-PASS1_BITS
);
1372 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp23
- tmp13
, CONST_BITS
-PASS1_BITS
);
1373 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
1374 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
1377 /* Pass 2: process 10 rows from work array, store into output array. */
1380 for (ctr
= 0; ctr
< 10; ctr
++) {
1381 outptr
= output_buf
[ctr
] + output_col
;
1385 /* Add range center and fudge factor for final descale and range-limit. */
1386 z3
= (INT32
) wsptr
[0] +
1387 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
1388 (ONE
<< (PASS1_BITS
+2)));
1390 z4
= (INT32
) wsptr
[4];
1391 z1
= MULTIPLY(z4
, FIX(1.144122806)); /* c4 */
1392 z2
= MULTIPLY(z4
, FIX(0.437016024)); /* c8 */
1396 tmp22
= z3
- ((z1
- z2
) << 1); /* c0 = (c4-c8)*2 */
1398 z2
= (INT32
) wsptr
[2];
1399 z3
= (INT32
) wsptr
[6];
1401 z1
= MULTIPLY(z2
+ z3
, FIX(0.831253876)); /* c6 */
1402 tmp12
= z1
+ MULTIPLY(z2
, FIX(0.513743148)); /* c2-c6 */
1403 tmp13
= z1
- MULTIPLY(z3
, FIX(2.176250899)); /* c2+c6 */
1405 tmp20
= tmp10
+ tmp12
;
1406 tmp24
= tmp10
- tmp12
;
1407 tmp21
= tmp11
+ tmp13
;
1408 tmp23
= tmp11
- tmp13
;
1412 z1
= (INT32
) wsptr
[1];
1413 z2
= (INT32
) wsptr
[3];
1414 z3
= (INT32
) wsptr
[5];
1416 z4
= (INT32
) wsptr
[7];
1421 tmp12
= MULTIPLY(tmp13
, FIX(0.309016994)); /* (c3-c7)/2 */
1423 z2
= MULTIPLY(tmp11
, FIX(0.951056516)); /* (c3+c7)/2 */
1426 tmp10
= MULTIPLY(z1
, FIX(1.396802247)) + z2
+ z4
; /* c1 */
1427 tmp14
= MULTIPLY(z1
, FIX(0.221231742)) - z2
+ z4
; /* c9 */
1429 z2
= MULTIPLY(tmp11
, FIX(0.587785252)); /* (c1-c9)/2 */
1430 z4
= z3
- tmp12
- (tmp13
<< (CONST_BITS
- 1));
1432 tmp12
= ((z1
- tmp13
) << CONST_BITS
) - z3
;
1434 tmp11
= MULTIPLY(z1
, FIX(1.260073511)) - z2
- z4
; /* c3 */
1435 tmp13
= MULTIPLY(z1
, FIX(0.642039522)) - z2
+ z4
; /* c7 */
1437 /* Final output stage */
1439 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
1440 CONST_BITS
+PASS1_BITS
+3)
1442 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
1443 CONST_BITS
+PASS1_BITS
+3)
1445 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
1446 CONST_BITS
+PASS1_BITS
+3)
1448 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
1449 CONST_BITS
+PASS1_BITS
+3)
1451 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
1452 CONST_BITS
+PASS1_BITS
+3)
1454 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
1455 CONST_BITS
+PASS1_BITS
+3)
1457 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
1458 CONST_BITS
+PASS1_BITS
+3)
1460 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
1461 CONST_BITS
+PASS1_BITS
+3)
1463 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
1464 CONST_BITS
+PASS1_BITS
+3)
1466 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
1467 CONST_BITS
+PASS1_BITS
+3)
1470 wsptr
+= 8; /* advance pointer to next row */
1476 * Perform dequantization and inverse DCT on one block of coefficients,
1477 * producing an 11x11 output block.
1479 * Optimized algorithm with 24 multiplications in the 1-D kernel.
1480 * cK represents sqrt(2) * cos(K*pi/22).
1484 jpeg_idct_11x11 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
1485 JCOEFPTR coef_block
,
1486 JSAMPARRAY output_buf
, JDIMENSION output_col
)
1488 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
;
1489 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
;
1490 INT32 z1
, z2
, z3
, z4
;
1492 ISLOW_MULT_TYPE
* quantptr
;
1495 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
1497 int workspace
[8*11]; /* buffers data between passes */
1500 /* Pass 1: process columns from input, store into work array. */
1503 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
1505 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
1508 tmp10
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
1509 tmp10
<<= CONST_BITS
;
1510 /* Add fudge factor here for final descale. */
1511 tmp10
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
1513 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
1514 z2
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
1515 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
1517 tmp20
= MULTIPLY(z2
- z3
, FIX(2.546640132)); /* c2+c4 */
1518 tmp23
= MULTIPLY(z2
- z1
, FIX(0.430815045)); /* c2-c6 */
1520 tmp24
= MULTIPLY(z4
, - FIX(1.155664402)); /* -(c2-c10) */
1522 tmp25
= tmp10
+ MULTIPLY(z4
, FIX(1.356927976)); /* c2 */
1523 tmp21
= tmp20
+ tmp23
+ tmp25
-
1524 MULTIPLY(z2
, FIX(1.821790775)); /* c2+c4+c10-c6 */
1525 tmp20
+= tmp25
+ MULTIPLY(z3
, FIX(2.115825087)); /* c4+c6 */
1526 tmp23
+= tmp25
- MULTIPLY(z1
, FIX(1.513598477)); /* c6+c8 */
1528 tmp22
= tmp24
- MULTIPLY(z3
, FIX(0.788749120)); /* c8+c10 */
1529 tmp24
+= MULTIPLY(z2
, FIX(1.944413522)) - /* c2+c8 */
1530 MULTIPLY(z1
, FIX(1.390975730)); /* c4+c10 */
1531 tmp25
= tmp10
- MULTIPLY(z4
, FIX(1.414213562)); /* c0 */
1535 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
1536 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
1537 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
1538 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
1541 tmp14
= MULTIPLY(tmp11
+ z3
+ z4
, FIX(0.398430003)); /* c9 */
1542 tmp11
= MULTIPLY(tmp11
, FIX(0.887983902)); /* c3-c9 */
1543 tmp12
= MULTIPLY(z1
+ z3
, FIX(0.670361295)); /* c5-c9 */
1544 tmp13
= tmp14
+ MULTIPLY(z1
+ z4
, FIX(0.366151574)); /* c7-c9 */
1545 tmp10
= tmp11
+ tmp12
+ tmp13
-
1546 MULTIPLY(z1
, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1547 z1
= tmp14
- MULTIPLY(z2
+ z3
, FIX(1.163011579)); /* c7+c9 */
1548 tmp11
+= z1
+ MULTIPLY(z2
, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1549 tmp12
+= z1
- MULTIPLY(z3
, FIX(1.192193623)); /* c3+c5-c7-c9 */
1550 z1
= MULTIPLY(z2
+ z4
, - FIX(1.798248910)); /* -(c1+c9) */
1552 tmp13
+= z1
+ MULTIPLY(z4
, FIX(2.102458632)); /* c1+c5+c9-c7 */
1553 tmp14
+= MULTIPLY(z2
, - FIX(1.467221301)) + /* -(c5+c9) */
1554 MULTIPLY(z3
, FIX(1.001388905)) - /* c1-c9 */
1555 MULTIPLY(z4
, FIX(1.684843907)); /* c3+c9 */
1557 /* Final output stage */
1559 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
1560 wsptr
[8*10] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
1561 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
1562 wsptr
[8*9] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
1563 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
1564 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
1565 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp23
+ tmp13
, CONST_BITS
-PASS1_BITS
);
1566 wsptr
[8*7] = (int) RIGHT_SHIFT(tmp23
- tmp13
, CONST_BITS
-PASS1_BITS
);
1567 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
1568 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
1569 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp25
, CONST_BITS
-PASS1_BITS
);
1572 /* Pass 2: process 11 rows from work array, store into output array. */
1575 for (ctr
= 0; ctr
< 11; ctr
++) {
1576 outptr
= output_buf
[ctr
] + output_col
;
1580 /* Add range center and fudge factor for final descale and range-limit. */
1581 tmp10
= (INT32
) wsptr
[0] +
1582 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
1583 (ONE
<< (PASS1_BITS
+2)));
1584 tmp10
<<= CONST_BITS
;
1586 z1
= (INT32
) wsptr
[2];
1587 z2
= (INT32
) wsptr
[4];
1588 z3
= (INT32
) wsptr
[6];
1590 tmp20
= MULTIPLY(z2
- z3
, FIX(2.546640132)); /* c2+c4 */
1591 tmp23
= MULTIPLY(z2
- z1
, FIX(0.430815045)); /* c2-c6 */
1593 tmp24
= MULTIPLY(z4
, - FIX(1.155664402)); /* -(c2-c10) */
1595 tmp25
= tmp10
+ MULTIPLY(z4
, FIX(1.356927976)); /* c2 */
1596 tmp21
= tmp20
+ tmp23
+ tmp25
-
1597 MULTIPLY(z2
, FIX(1.821790775)); /* c2+c4+c10-c6 */
1598 tmp20
+= tmp25
+ MULTIPLY(z3
, FIX(2.115825087)); /* c4+c6 */
1599 tmp23
+= tmp25
- MULTIPLY(z1
, FIX(1.513598477)); /* c6+c8 */
1601 tmp22
= tmp24
- MULTIPLY(z3
, FIX(0.788749120)); /* c8+c10 */
1602 tmp24
+= MULTIPLY(z2
, FIX(1.944413522)) - /* c2+c8 */
1603 MULTIPLY(z1
, FIX(1.390975730)); /* c4+c10 */
1604 tmp25
= tmp10
- MULTIPLY(z4
, FIX(1.414213562)); /* c0 */
1608 z1
= (INT32
) wsptr
[1];
1609 z2
= (INT32
) wsptr
[3];
1610 z3
= (INT32
) wsptr
[5];
1611 z4
= (INT32
) wsptr
[7];
1614 tmp14
= MULTIPLY(tmp11
+ z3
+ z4
, FIX(0.398430003)); /* c9 */
1615 tmp11
= MULTIPLY(tmp11
, FIX(0.887983902)); /* c3-c9 */
1616 tmp12
= MULTIPLY(z1
+ z3
, FIX(0.670361295)); /* c5-c9 */
1617 tmp13
= tmp14
+ MULTIPLY(z1
+ z4
, FIX(0.366151574)); /* c7-c9 */
1618 tmp10
= tmp11
+ tmp12
+ tmp13
-
1619 MULTIPLY(z1
, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1620 z1
= tmp14
- MULTIPLY(z2
+ z3
, FIX(1.163011579)); /* c7+c9 */
1621 tmp11
+= z1
+ MULTIPLY(z2
, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1622 tmp12
+= z1
- MULTIPLY(z3
, FIX(1.192193623)); /* c3+c5-c7-c9 */
1623 z1
= MULTIPLY(z2
+ z4
, - FIX(1.798248910)); /* -(c1+c9) */
1625 tmp13
+= z1
+ MULTIPLY(z4
, FIX(2.102458632)); /* c1+c5+c9-c7 */
1626 tmp14
+= MULTIPLY(z2
, - FIX(1.467221301)) + /* -(c5+c9) */
1627 MULTIPLY(z3
, FIX(1.001388905)) - /* c1-c9 */
1628 MULTIPLY(z4
, FIX(1.684843907)); /* c3+c9 */
1630 /* Final output stage */
1632 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
1633 CONST_BITS
+PASS1_BITS
+3)
1635 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
1636 CONST_BITS
+PASS1_BITS
+3)
1638 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
1639 CONST_BITS
+PASS1_BITS
+3)
1641 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
1642 CONST_BITS
+PASS1_BITS
+3)
1644 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
1645 CONST_BITS
+PASS1_BITS
+3)
1647 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
1648 CONST_BITS
+PASS1_BITS
+3)
1650 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
1651 CONST_BITS
+PASS1_BITS
+3)
1653 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
1654 CONST_BITS
+PASS1_BITS
+3)
1656 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
1657 CONST_BITS
+PASS1_BITS
+3)
1659 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
1660 CONST_BITS
+PASS1_BITS
+3)
1662 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
,
1663 CONST_BITS
+PASS1_BITS
+3)
1666 wsptr
+= 8; /* advance pointer to next row */
1672 * Perform dequantization and inverse DCT on one block of coefficients,
1673 * producing a 12x12 output block.
1675 * Optimized algorithm with 15 multiplications in the 1-D kernel.
1676 * cK represents sqrt(2) * cos(K*pi/24).
1680 jpeg_idct_12x12 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
1681 JCOEFPTR coef_block
,
1682 JSAMPARRAY output_buf
, JDIMENSION output_col
)
1684 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
, tmp15
;
1685 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
;
1686 INT32 z1
, z2
, z3
, z4
;
1688 ISLOW_MULT_TYPE
* quantptr
;
1691 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
1693 int workspace
[8*12]; /* buffers data between passes */
1696 /* Pass 1: process columns from input, store into work array. */
1699 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
1701 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
1704 z3
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
1706 /* Add fudge factor here for final descale. */
1707 z3
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
1709 z4
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
1710 z4
= MULTIPLY(z4
, FIX(1.224744871)); /* c4 */
1715 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
1716 z4
= MULTIPLY(z1
, FIX(1.366025404)); /* c2 */
1718 z2
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
1728 tmp20
= tmp10
+ tmp12
;
1729 tmp25
= tmp10
- tmp12
;
1731 tmp12
= z4
- z1
- z2
;
1733 tmp22
= tmp11
+ tmp12
;
1734 tmp23
= tmp11
- tmp12
;
1738 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
1739 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
1740 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
1741 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
1743 tmp11
= MULTIPLY(z2
, FIX(1.306562965)); /* c3 */
1744 tmp14
= MULTIPLY(z2
, - FIX_0_541196100
); /* -c9 */
1747 tmp15
= MULTIPLY(tmp10
+ z4
, FIX(0.860918669)); /* c7 */
1748 tmp12
= tmp15
+ MULTIPLY(tmp10
, FIX(0.261052384)); /* c5-c7 */
1749 tmp10
= tmp12
+ tmp11
+ MULTIPLY(z1
, FIX(0.280143716)); /* c1-c5 */
1750 tmp13
= MULTIPLY(z3
+ z4
, - FIX(1.045510580)); /* -(c7+c11) */
1751 tmp12
+= tmp13
+ tmp14
- MULTIPLY(z3
, FIX(1.478575242)); /* c1+c5-c7-c11 */
1752 tmp13
+= tmp15
- tmp11
+ MULTIPLY(z4
, FIX(1.586706681)); /* c1+c11 */
1753 tmp15
+= tmp14
- MULTIPLY(z1
, FIX(0.676326758)) - /* c7-c11 */
1754 MULTIPLY(z4
, FIX(1.982889723)); /* c5+c7 */
1758 z3
= MULTIPLY(z1
+ z2
, FIX_0_541196100
); /* c9 */
1759 tmp11
= z3
+ MULTIPLY(z1
, FIX_0_765366865
); /* c3-c9 */
1760 tmp14
= z3
- MULTIPLY(z2
, FIX_1_847759065
); /* c3+c9 */
1762 /* Final output stage */
1764 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
1765 wsptr
[8*11] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
1766 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
1767 wsptr
[8*10] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
1768 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
1769 wsptr
[8*9] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
1770 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp23
+ tmp13
, CONST_BITS
-PASS1_BITS
);
1771 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp23
- tmp13
, CONST_BITS
-PASS1_BITS
);
1772 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
1773 wsptr
[8*7] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
1774 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp25
+ tmp15
, CONST_BITS
-PASS1_BITS
);
1775 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp25
- tmp15
, CONST_BITS
-PASS1_BITS
);
1778 /* Pass 2: process 12 rows from work array, store into output array. */
1781 for (ctr
= 0; ctr
< 12; ctr
++) {
1782 outptr
= output_buf
[ctr
] + output_col
;
1786 /* Add range center and fudge factor for final descale and range-limit. */
1787 z3
= (INT32
) wsptr
[0] +
1788 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
1789 (ONE
<< (PASS1_BITS
+2)));
1792 z4
= (INT32
) wsptr
[4];
1793 z4
= MULTIPLY(z4
, FIX(1.224744871)); /* c4 */
1798 z1
= (INT32
) wsptr
[2];
1799 z4
= MULTIPLY(z1
, FIX(1.366025404)); /* c2 */
1801 z2
= (INT32
) wsptr
[6];
1811 tmp20
= tmp10
+ tmp12
;
1812 tmp25
= tmp10
- tmp12
;
1814 tmp12
= z4
- z1
- z2
;
1816 tmp22
= tmp11
+ tmp12
;
1817 tmp23
= tmp11
- tmp12
;
1821 z1
= (INT32
) wsptr
[1];
1822 z2
= (INT32
) wsptr
[3];
1823 z3
= (INT32
) wsptr
[5];
1824 z4
= (INT32
) wsptr
[7];
1826 tmp11
= MULTIPLY(z2
, FIX(1.306562965)); /* c3 */
1827 tmp14
= MULTIPLY(z2
, - FIX_0_541196100
); /* -c9 */
1830 tmp15
= MULTIPLY(tmp10
+ z4
, FIX(0.860918669)); /* c7 */
1831 tmp12
= tmp15
+ MULTIPLY(tmp10
, FIX(0.261052384)); /* c5-c7 */
1832 tmp10
= tmp12
+ tmp11
+ MULTIPLY(z1
, FIX(0.280143716)); /* c1-c5 */
1833 tmp13
= MULTIPLY(z3
+ z4
, - FIX(1.045510580)); /* -(c7+c11) */
1834 tmp12
+= tmp13
+ tmp14
- MULTIPLY(z3
, FIX(1.478575242)); /* c1+c5-c7-c11 */
1835 tmp13
+= tmp15
- tmp11
+ MULTIPLY(z4
, FIX(1.586706681)); /* c1+c11 */
1836 tmp15
+= tmp14
- MULTIPLY(z1
, FIX(0.676326758)) - /* c7-c11 */
1837 MULTIPLY(z4
, FIX(1.982889723)); /* c5+c7 */
1841 z3
= MULTIPLY(z1
+ z2
, FIX_0_541196100
); /* c9 */
1842 tmp11
= z3
+ MULTIPLY(z1
, FIX_0_765366865
); /* c3-c9 */
1843 tmp14
= z3
- MULTIPLY(z2
, FIX_1_847759065
); /* c3+c9 */
1845 /* Final output stage */
1847 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
1848 CONST_BITS
+PASS1_BITS
+3)
1850 outptr
[11] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
1851 CONST_BITS
+PASS1_BITS
+3)
1853 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
1854 CONST_BITS
+PASS1_BITS
+3)
1856 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
1857 CONST_BITS
+PASS1_BITS
+3)
1859 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
1860 CONST_BITS
+PASS1_BITS
+3)
1862 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
1863 CONST_BITS
+PASS1_BITS
+3)
1865 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
1866 CONST_BITS
+PASS1_BITS
+3)
1868 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
1869 CONST_BITS
+PASS1_BITS
+3)
1871 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
1872 CONST_BITS
+PASS1_BITS
+3)
1874 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
1875 CONST_BITS
+PASS1_BITS
+3)
1877 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
+ tmp15
,
1878 CONST_BITS
+PASS1_BITS
+3)
1880 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp25
- tmp15
,
1881 CONST_BITS
+PASS1_BITS
+3)
1884 wsptr
+= 8; /* advance pointer to next row */
1890 * Perform dequantization and inverse DCT on one block of coefficients,
1891 * producing a 13x13 output block.
1893 * Optimized algorithm with 29 multiplications in the 1-D kernel.
1894 * cK represents sqrt(2) * cos(K*pi/26).
1898 jpeg_idct_13x13 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
1899 JCOEFPTR coef_block
,
1900 JSAMPARRAY output_buf
, JDIMENSION output_col
)
1902 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
, tmp15
;
1903 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
, tmp26
;
1904 INT32 z1
, z2
, z3
, z4
;
1906 ISLOW_MULT_TYPE
* quantptr
;
1909 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
1911 int workspace
[8*13]; /* buffers data between passes */
1914 /* Pass 1: process columns from input, store into work array. */
1917 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
1919 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
1922 z1
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
1924 /* Add fudge factor here for final descale. */
1925 z1
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
1927 z2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
1928 z3
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
1929 z4
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
1934 tmp12
= MULTIPLY(tmp10
, FIX(1.155388986)); /* (c4+c6)/2 */
1935 tmp13
= MULTIPLY(tmp11
, FIX(0.096834934)) + z1
; /* (c4-c6)/2 */
1937 tmp20
= MULTIPLY(z2
, FIX(1.373119086)) + tmp12
+ tmp13
; /* c2 */
1938 tmp22
= MULTIPLY(z2
, FIX(0.501487041)) - tmp12
+ tmp13
; /* c10 */
1940 tmp12
= MULTIPLY(tmp10
, FIX(0.316450131)); /* (c8-c12)/2 */
1941 tmp13
= MULTIPLY(tmp11
, FIX(0.486914739)) + z1
; /* (c8+c12)/2 */
1943 tmp21
= MULTIPLY(z2
, FIX(1.058554052)) - tmp12
+ tmp13
; /* c6 */
1944 tmp25
= MULTIPLY(z2
, - FIX(1.252223920)) + tmp12
+ tmp13
; /* c4 */
1946 tmp12
= MULTIPLY(tmp10
, FIX(0.435816023)); /* (c2-c10)/2 */
1947 tmp13
= MULTIPLY(tmp11
, FIX(0.937303064)) - z1
; /* (c2+c10)/2 */
1949 tmp23
= MULTIPLY(z2
, - FIX(0.170464608)) - tmp12
- tmp13
; /* c12 */
1950 tmp24
= MULTIPLY(z2
, - FIX(0.803364869)) + tmp12
- tmp13
; /* c8 */
1952 tmp26
= MULTIPLY(tmp11
- z2
, FIX(1.414213562)) + z1
; /* c0 */
1956 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
1957 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
1958 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
1959 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
1961 tmp11
= MULTIPLY(z1
+ z2
, FIX(1.322312651)); /* c3 */
1962 tmp12
= MULTIPLY(z1
+ z3
, FIX(1.163874945)); /* c5 */
1964 tmp13
= MULTIPLY(tmp15
, FIX(0.937797057)); /* c7 */
1965 tmp10
= tmp11
+ tmp12
+ tmp13
-
1966 MULTIPLY(z1
, FIX(2.020082300)); /* c7+c5+c3-c1 */
1967 tmp14
= MULTIPLY(z2
+ z3
, - FIX(0.338443458)); /* -c11 */
1968 tmp11
+= tmp14
+ MULTIPLY(z2
, FIX(0.837223564)); /* c5+c9+c11-c3 */
1969 tmp12
+= tmp14
- MULTIPLY(z3
, FIX(1.572116027)); /* c1+c5-c9-c11 */
1970 tmp14
= MULTIPLY(z2
+ z4
, - FIX(1.163874945)); /* -c5 */
1972 tmp13
+= tmp14
+ MULTIPLY(z4
, FIX(2.205608352)); /* c3+c5+c9-c7 */
1973 tmp14
= MULTIPLY(z3
+ z4
, - FIX(0.657217813)); /* -c9 */
1976 tmp15
= MULTIPLY(tmp15
, FIX(0.338443458)); /* c11 */
1977 tmp14
= tmp15
+ MULTIPLY(z1
, FIX(0.318774355)) - /* c9-c11 */
1978 MULTIPLY(z2
, FIX(0.466105296)); /* c1-c7 */
1979 z1
= MULTIPLY(z3
- z2
, FIX(0.937797057)); /* c7 */
1981 tmp15
+= z1
+ MULTIPLY(z3
, FIX(0.384515595)) - /* c3-c7 */
1982 MULTIPLY(z4
, FIX(1.742345811)); /* c1+c11 */
1984 /* Final output stage */
1986 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
1987 wsptr
[8*12] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
1988 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
1989 wsptr
[8*11] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
1990 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
1991 wsptr
[8*10] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
1992 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp23
+ tmp13
, CONST_BITS
-PASS1_BITS
);
1993 wsptr
[8*9] = (int) RIGHT_SHIFT(tmp23
- tmp13
, CONST_BITS
-PASS1_BITS
);
1994 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
1995 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
1996 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp25
+ tmp15
, CONST_BITS
-PASS1_BITS
);
1997 wsptr
[8*7] = (int) RIGHT_SHIFT(tmp25
- tmp15
, CONST_BITS
-PASS1_BITS
);
1998 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp26
, CONST_BITS
-PASS1_BITS
);
2001 /* Pass 2: process 13 rows from work array, store into output array. */
2004 for (ctr
= 0; ctr
< 13; ctr
++) {
2005 outptr
= output_buf
[ctr
] + output_col
;
2009 /* Add range center and fudge factor for final descale and range-limit. */
2010 z1
= (INT32
) wsptr
[0] +
2011 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
2012 (ONE
<< (PASS1_BITS
+2)));
2015 z2
= (INT32
) wsptr
[2];
2016 z3
= (INT32
) wsptr
[4];
2017 z4
= (INT32
) wsptr
[6];
2022 tmp12
= MULTIPLY(tmp10
, FIX(1.155388986)); /* (c4+c6)/2 */
2023 tmp13
= MULTIPLY(tmp11
, FIX(0.096834934)) + z1
; /* (c4-c6)/2 */
2025 tmp20
= MULTIPLY(z2
, FIX(1.373119086)) + tmp12
+ tmp13
; /* c2 */
2026 tmp22
= MULTIPLY(z2
, FIX(0.501487041)) - tmp12
+ tmp13
; /* c10 */
2028 tmp12
= MULTIPLY(tmp10
, FIX(0.316450131)); /* (c8-c12)/2 */
2029 tmp13
= MULTIPLY(tmp11
, FIX(0.486914739)) + z1
; /* (c8+c12)/2 */
2031 tmp21
= MULTIPLY(z2
, FIX(1.058554052)) - tmp12
+ tmp13
; /* c6 */
2032 tmp25
= MULTIPLY(z2
, - FIX(1.252223920)) + tmp12
+ tmp13
; /* c4 */
2034 tmp12
= MULTIPLY(tmp10
, FIX(0.435816023)); /* (c2-c10)/2 */
2035 tmp13
= MULTIPLY(tmp11
, FIX(0.937303064)) - z1
; /* (c2+c10)/2 */
2037 tmp23
= MULTIPLY(z2
, - FIX(0.170464608)) - tmp12
- tmp13
; /* c12 */
2038 tmp24
= MULTIPLY(z2
, - FIX(0.803364869)) + tmp12
- tmp13
; /* c8 */
2040 tmp26
= MULTIPLY(tmp11
- z2
, FIX(1.414213562)) + z1
; /* c0 */
2044 z1
= (INT32
) wsptr
[1];
2045 z2
= (INT32
) wsptr
[3];
2046 z3
= (INT32
) wsptr
[5];
2047 z4
= (INT32
) wsptr
[7];
2049 tmp11
= MULTIPLY(z1
+ z2
, FIX(1.322312651)); /* c3 */
2050 tmp12
= MULTIPLY(z1
+ z3
, FIX(1.163874945)); /* c5 */
2052 tmp13
= MULTIPLY(tmp15
, FIX(0.937797057)); /* c7 */
2053 tmp10
= tmp11
+ tmp12
+ tmp13
-
2054 MULTIPLY(z1
, FIX(2.020082300)); /* c7+c5+c3-c1 */
2055 tmp14
= MULTIPLY(z2
+ z3
, - FIX(0.338443458)); /* -c11 */
2056 tmp11
+= tmp14
+ MULTIPLY(z2
, FIX(0.837223564)); /* c5+c9+c11-c3 */
2057 tmp12
+= tmp14
- MULTIPLY(z3
, FIX(1.572116027)); /* c1+c5-c9-c11 */
2058 tmp14
= MULTIPLY(z2
+ z4
, - FIX(1.163874945)); /* -c5 */
2060 tmp13
+= tmp14
+ MULTIPLY(z4
, FIX(2.205608352)); /* c3+c5+c9-c7 */
2061 tmp14
= MULTIPLY(z3
+ z4
, - FIX(0.657217813)); /* -c9 */
2064 tmp15
= MULTIPLY(tmp15
, FIX(0.338443458)); /* c11 */
2065 tmp14
= tmp15
+ MULTIPLY(z1
, FIX(0.318774355)) - /* c9-c11 */
2066 MULTIPLY(z2
, FIX(0.466105296)); /* c1-c7 */
2067 z1
= MULTIPLY(z3
- z2
, FIX(0.937797057)); /* c7 */
2069 tmp15
+= z1
+ MULTIPLY(z3
, FIX(0.384515595)) - /* c3-c7 */
2070 MULTIPLY(z4
, FIX(1.742345811)); /* c1+c11 */
2072 /* Final output stage */
2074 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
2075 CONST_BITS
+PASS1_BITS
+3)
2077 outptr
[12] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
2078 CONST_BITS
+PASS1_BITS
+3)
2080 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
2081 CONST_BITS
+PASS1_BITS
+3)
2083 outptr
[11] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
2084 CONST_BITS
+PASS1_BITS
+3)
2086 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
2087 CONST_BITS
+PASS1_BITS
+3)
2089 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
2090 CONST_BITS
+PASS1_BITS
+3)
2092 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
2093 CONST_BITS
+PASS1_BITS
+3)
2095 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
2096 CONST_BITS
+PASS1_BITS
+3)
2098 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
2099 CONST_BITS
+PASS1_BITS
+3)
2101 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
2102 CONST_BITS
+PASS1_BITS
+3)
2104 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
+ tmp15
,
2105 CONST_BITS
+PASS1_BITS
+3)
2107 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp25
- tmp15
,
2108 CONST_BITS
+PASS1_BITS
+3)
2110 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp26
,
2111 CONST_BITS
+PASS1_BITS
+3)
2114 wsptr
+= 8; /* advance pointer to next row */
2120 * Perform dequantization and inverse DCT on one block of coefficients,
2121 * producing a 14x14 output block.
2123 * Optimized algorithm with 20 multiplications in the 1-D kernel.
2124 * cK represents sqrt(2) * cos(K*pi/28).
2128 jpeg_idct_14x14 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
2129 JCOEFPTR coef_block
,
2130 JSAMPARRAY output_buf
, JDIMENSION output_col
)
2132 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
, tmp15
, tmp16
;
2133 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
, tmp26
;
2134 INT32 z1
, z2
, z3
, z4
;
2136 ISLOW_MULT_TYPE
* quantptr
;
2139 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
2141 int workspace
[8*14]; /* buffers data between passes */
2144 /* Pass 1: process columns from input, store into work array. */
2147 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
2149 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
2152 z1
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
2154 /* Add fudge factor here for final descale. */
2155 z1
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
2156 z4
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
2157 z2
= MULTIPLY(z4
, FIX(1.274162392)); /* c4 */
2158 z3
= MULTIPLY(z4
, FIX(0.314692123)); /* c12 */
2159 z4
= MULTIPLY(z4
, FIX(0.881747734)); /* c8 */
2165 tmp23
= RIGHT_SHIFT(z1
- ((z2
+ z3
- z4
) << 1), /* c0 = (c4+c12-c8)*2 */
2166 CONST_BITS
-PASS1_BITS
);
2168 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
2169 z2
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
2171 z3
= MULTIPLY(z1
+ z2
, FIX(1.105676686)); /* c6 */
2173 tmp13
= z3
+ MULTIPLY(z1
, FIX(0.273079590)); /* c2-c6 */
2174 tmp14
= z3
- MULTIPLY(z2
, FIX(1.719280954)); /* c6+c10 */
2175 tmp15
= MULTIPLY(z1
, FIX(0.613604268)) - /* c10 */
2176 MULTIPLY(z2
, FIX(1.378756276)); /* c2 */
2178 tmp20
= tmp10
+ tmp13
;
2179 tmp26
= tmp10
- tmp13
;
2180 tmp21
= tmp11
+ tmp14
;
2181 tmp25
= tmp11
- tmp14
;
2182 tmp22
= tmp12
+ tmp15
;
2183 tmp24
= tmp12
- tmp15
;
2187 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
2188 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
2189 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
2190 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
2191 tmp13
= z4
<< CONST_BITS
;
2194 tmp11
= MULTIPLY(z1
+ z2
, FIX(1.334852607)); /* c3 */
2195 tmp12
= MULTIPLY(tmp14
, FIX(1.197448846)); /* c5 */
2196 tmp10
= tmp11
+ tmp12
+ tmp13
- MULTIPLY(z1
, FIX(1.126980169)); /* c3+c5-c1 */
2197 tmp14
= MULTIPLY(tmp14
, FIX(0.752406978)); /* c9 */
2198 tmp16
= tmp14
- MULTIPLY(z1
, FIX(1.061150426)); /* c9+c11-c13 */
2200 tmp15
= MULTIPLY(z1
, FIX(0.467085129)) - tmp13
; /* c11 */
2203 z4
= MULTIPLY(z2
+ z3
, - FIX(0.158341681)) - tmp13
; /* -c13 */
2204 tmp11
+= z4
- MULTIPLY(z2
, FIX(0.424103948)); /* c3-c9-c13 */
2205 tmp12
+= z4
- MULTIPLY(z3
, FIX(2.373959773)); /* c3+c5-c13 */
2206 z4
= MULTIPLY(z3
- z2
, FIX(1.405321284)); /* c1 */
2207 tmp14
+= z4
+ tmp13
- MULTIPLY(z3
, FIX(1.6906431334)); /* c1+c9-c11 */
2208 tmp15
+= z4
+ MULTIPLY(z2
, FIX(0.674957567)); /* c1+c11-c5 */
2210 tmp13
= (z1
- z3
) << PASS1_BITS
;
2212 /* Final output stage */
2214 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
2215 wsptr
[8*13] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
2216 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
2217 wsptr
[8*12] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
2218 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
2219 wsptr
[8*11] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
2220 wsptr
[8*3] = (int) (tmp23
+ tmp13
);
2221 wsptr
[8*10] = (int) (tmp23
- tmp13
);
2222 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
2223 wsptr
[8*9] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
2224 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp25
+ tmp15
, CONST_BITS
-PASS1_BITS
);
2225 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp25
- tmp15
, CONST_BITS
-PASS1_BITS
);
2226 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp26
+ tmp16
, CONST_BITS
-PASS1_BITS
);
2227 wsptr
[8*7] = (int) RIGHT_SHIFT(tmp26
- tmp16
, CONST_BITS
-PASS1_BITS
);
2230 /* Pass 2: process 14 rows from work array, store into output array. */
2233 for (ctr
= 0; ctr
< 14; ctr
++) {
2234 outptr
= output_buf
[ctr
] + output_col
;
2238 /* Add range center and fudge factor for final descale and range-limit. */
2239 z1
= (INT32
) wsptr
[0] +
2240 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
2241 (ONE
<< (PASS1_BITS
+2)));
2243 z4
= (INT32
) wsptr
[4];
2244 z2
= MULTIPLY(z4
, FIX(1.274162392)); /* c4 */
2245 z3
= MULTIPLY(z4
, FIX(0.314692123)); /* c12 */
2246 z4
= MULTIPLY(z4
, FIX(0.881747734)); /* c8 */
2252 tmp23
= z1
- ((z2
+ z3
- z4
) << 1); /* c0 = (c4+c12-c8)*2 */
2254 z1
= (INT32
) wsptr
[2];
2255 z2
= (INT32
) wsptr
[6];
2257 z3
= MULTIPLY(z1
+ z2
, FIX(1.105676686)); /* c6 */
2259 tmp13
= z3
+ MULTIPLY(z1
, FIX(0.273079590)); /* c2-c6 */
2260 tmp14
= z3
- MULTIPLY(z2
, FIX(1.719280954)); /* c6+c10 */
2261 tmp15
= MULTIPLY(z1
, FIX(0.613604268)) - /* c10 */
2262 MULTIPLY(z2
, FIX(1.378756276)); /* c2 */
2264 tmp20
= tmp10
+ tmp13
;
2265 tmp26
= tmp10
- tmp13
;
2266 tmp21
= tmp11
+ tmp14
;
2267 tmp25
= tmp11
- tmp14
;
2268 tmp22
= tmp12
+ tmp15
;
2269 tmp24
= tmp12
- tmp15
;
2273 z1
= (INT32
) wsptr
[1];
2274 z2
= (INT32
) wsptr
[3];
2275 z3
= (INT32
) wsptr
[5];
2276 z4
= (INT32
) wsptr
[7];
2280 tmp11
= MULTIPLY(z1
+ z2
, FIX(1.334852607)); /* c3 */
2281 tmp12
= MULTIPLY(tmp14
, FIX(1.197448846)); /* c5 */
2282 tmp10
= tmp11
+ tmp12
+ z4
- MULTIPLY(z1
, FIX(1.126980169)); /* c3+c5-c1 */
2283 tmp14
= MULTIPLY(tmp14
, FIX(0.752406978)); /* c9 */
2284 tmp16
= tmp14
- MULTIPLY(z1
, FIX(1.061150426)); /* c9+c11-c13 */
2286 tmp15
= MULTIPLY(z1
, FIX(0.467085129)) - z4
; /* c11 */
2288 tmp13
= MULTIPLY(z2
+ z3
, - FIX(0.158341681)) - z4
; /* -c13 */
2289 tmp11
+= tmp13
- MULTIPLY(z2
, FIX(0.424103948)); /* c3-c9-c13 */
2290 tmp12
+= tmp13
- MULTIPLY(z3
, FIX(2.373959773)); /* c3+c5-c13 */
2291 tmp13
= MULTIPLY(z3
- z2
, FIX(1.405321284)); /* c1 */
2292 tmp14
+= tmp13
+ z4
- MULTIPLY(z3
, FIX(1.6906431334)); /* c1+c9-c11 */
2293 tmp15
+= tmp13
+ MULTIPLY(z2
, FIX(0.674957567)); /* c1+c11-c5 */
2295 tmp13
= ((z1
- z3
) << CONST_BITS
) + z4
;
2297 /* Final output stage */
2299 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
2300 CONST_BITS
+PASS1_BITS
+3)
2302 outptr
[13] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
2303 CONST_BITS
+PASS1_BITS
+3)
2305 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
2306 CONST_BITS
+PASS1_BITS
+3)
2308 outptr
[12] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
2309 CONST_BITS
+PASS1_BITS
+3)
2311 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
2312 CONST_BITS
+PASS1_BITS
+3)
2314 outptr
[11] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
2315 CONST_BITS
+PASS1_BITS
+3)
2317 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
2318 CONST_BITS
+PASS1_BITS
+3)
2320 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
2321 CONST_BITS
+PASS1_BITS
+3)
2323 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
2324 CONST_BITS
+PASS1_BITS
+3)
2326 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
2327 CONST_BITS
+PASS1_BITS
+3)
2329 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
+ tmp15
,
2330 CONST_BITS
+PASS1_BITS
+3)
2332 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp25
- tmp15
,
2333 CONST_BITS
+PASS1_BITS
+3)
2335 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp26
+ tmp16
,
2336 CONST_BITS
+PASS1_BITS
+3)
2338 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp26
- tmp16
,
2339 CONST_BITS
+PASS1_BITS
+3)
2342 wsptr
+= 8; /* advance pointer to next row */
2348 * Perform dequantization and inverse DCT on one block of coefficients,
2349 * producing a 15x15 output block.
2351 * Optimized algorithm with 22 multiplications in the 1-D kernel.
2352 * cK represents sqrt(2) * cos(K*pi/30).
2356 jpeg_idct_15x15 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
2357 JCOEFPTR coef_block
,
2358 JSAMPARRAY output_buf
, JDIMENSION output_col
)
2360 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
, tmp15
, tmp16
;
2361 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
, tmp26
, tmp27
;
2362 INT32 z1
, z2
, z3
, z4
;
2364 ISLOW_MULT_TYPE
* quantptr
;
2367 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
2369 int workspace
[8*15]; /* buffers data between passes */
2372 /* Pass 1: process columns from input, store into work array. */
2375 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
2377 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
2380 z1
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
2382 /* Add fudge factor here for final descale. */
2383 z1
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
2385 z2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
2386 z3
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
2387 z4
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
2389 tmp10
= MULTIPLY(z4
, FIX(0.437016024)); /* c12 */
2390 tmp11
= MULTIPLY(z4
, FIX(1.144122806)); /* c6 */
2394 z1
-= (tmp11
- tmp10
) << 1; /* c0 = (c6-c12)*2 */
2398 tmp10
= MULTIPLY(z3
, FIX(1.337628990)); /* (c2+c4)/2 */
2399 tmp11
= MULTIPLY(z4
, FIX(0.045680613)); /* (c2-c4)/2 */
2400 z2
= MULTIPLY(z2
, FIX(1.439773946)); /* c4+c14 */
2402 tmp20
= tmp13
+ tmp10
+ tmp11
;
2403 tmp23
= tmp12
- tmp10
+ tmp11
+ z2
;
2405 tmp10
= MULTIPLY(z3
, FIX(0.547059574)); /* (c8+c14)/2 */
2406 tmp11
= MULTIPLY(z4
, FIX(0.399234004)); /* (c8-c14)/2 */
2408 tmp25
= tmp13
- tmp10
- tmp11
;
2409 tmp26
= tmp12
+ tmp10
- tmp11
- z2
;
2411 tmp10
= MULTIPLY(z3
, FIX(0.790569415)); /* (c6+c12)/2 */
2412 tmp11
= MULTIPLY(z4
, FIX(0.353553391)); /* (c6-c12)/2 */
2414 tmp21
= tmp12
+ tmp10
+ tmp11
;
2415 tmp24
= tmp13
- tmp10
+ tmp11
;
2417 tmp22
= z1
+ tmp11
; /* c10 = c6-c12 */
2418 tmp27
= z1
- tmp11
- tmp11
; /* c0 = (c6-c12)*2 */
2422 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
2423 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
2424 z4
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
2425 z3
= MULTIPLY(z4
, FIX(1.224744871)); /* c5 */
2426 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
2429 tmp15
= MULTIPLY(z1
+ tmp13
, FIX(0.831253876)); /* c9 */
2430 tmp11
= tmp15
+ MULTIPLY(z1
, FIX(0.513743148)); /* c3-c9 */
2431 tmp14
= tmp15
- MULTIPLY(tmp13
, FIX(2.176250899)); /* c3+c9 */
2433 tmp13
= MULTIPLY(z2
, - FIX(0.831253876)); /* -c9 */
2434 tmp15
= MULTIPLY(z2
, - FIX(1.344997024)); /* -c3 */
2436 tmp12
= z3
+ MULTIPLY(z2
, FIX(1.406466353)); /* c1 */
2438 tmp10
= tmp12
+ MULTIPLY(z4
, FIX(2.457431844)) - tmp15
; /* c1+c7 */
2439 tmp16
= tmp12
- MULTIPLY(z1
, FIX(1.112434820)) + tmp13
; /* c1-c13 */
2440 tmp12
= MULTIPLY(z2
, FIX(1.224744871)) - z3
; /* c5 */
2441 z2
= MULTIPLY(z1
+ z4
, FIX(0.575212477)); /* c11 */
2442 tmp13
+= z2
+ MULTIPLY(z1
, FIX(0.475753014)) - z3
; /* c7-c11 */
2443 tmp15
+= z2
- MULTIPLY(z4
, FIX(0.869244010)) + z3
; /* c11+c13 */
2445 /* Final output stage */
2447 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
2448 wsptr
[8*14] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
2449 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
2450 wsptr
[8*13] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
2451 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
2452 wsptr
[8*12] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
2453 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp23
+ tmp13
, CONST_BITS
-PASS1_BITS
);
2454 wsptr
[8*11] = (int) RIGHT_SHIFT(tmp23
- tmp13
, CONST_BITS
-PASS1_BITS
);
2455 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
2456 wsptr
[8*10] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
2457 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp25
+ tmp15
, CONST_BITS
-PASS1_BITS
);
2458 wsptr
[8*9] = (int) RIGHT_SHIFT(tmp25
- tmp15
, CONST_BITS
-PASS1_BITS
);
2459 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp26
+ tmp16
, CONST_BITS
-PASS1_BITS
);
2460 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp26
- tmp16
, CONST_BITS
-PASS1_BITS
);
2461 wsptr
[8*7] = (int) RIGHT_SHIFT(tmp27
, CONST_BITS
-PASS1_BITS
);
2464 /* Pass 2: process 15 rows from work array, store into output array. */
2467 for (ctr
= 0; ctr
< 15; ctr
++) {
2468 outptr
= output_buf
[ctr
] + output_col
;
2472 /* Add range center and fudge factor for final descale and range-limit. */
2473 z1
= (INT32
) wsptr
[0] +
2474 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
2475 (ONE
<< (PASS1_BITS
+2)));
2478 z2
= (INT32
) wsptr
[2];
2479 z3
= (INT32
) wsptr
[4];
2480 z4
= (INT32
) wsptr
[6];
2482 tmp10
= MULTIPLY(z4
, FIX(0.437016024)); /* c12 */
2483 tmp11
= MULTIPLY(z4
, FIX(1.144122806)); /* c6 */
2487 z1
-= (tmp11
- tmp10
) << 1; /* c0 = (c6-c12)*2 */
2491 tmp10
= MULTIPLY(z3
, FIX(1.337628990)); /* (c2+c4)/2 */
2492 tmp11
= MULTIPLY(z4
, FIX(0.045680613)); /* (c2-c4)/2 */
2493 z2
= MULTIPLY(z2
, FIX(1.439773946)); /* c4+c14 */
2495 tmp20
= tmp13
+ tmp10
+ tmp11
;
2496 tmp23
= tmp12
- tmp10
+ tmp11
+ z2
;
2498 tmp10
= MULTIPLY(z3
, FIX(0.547059574)); /* (c8+c14)/2 */
2499 tmp11
= MULTIPLY(z4
, FIX(0.399234004)); /* (c8-c14)/2 */
2501 tmp25
= tmp13
- tmp10
- tmp11
;
2502 tmp26
= tmp12
+ tmp10
- tmp11
- z2
;
2504 tmp10
= MULTIPLY(z3
, FIX(0.790569415)); /* (c6+c12)/2 */
2505 tmp11
= MULTIPLY(z4
, FIX(0.353553391)); /* (c6-c12)/2 */
2507 tmp21
= tmp12
+ tmp10
+ tmp11
;
2508 tmp24
= tmp13
- tmp10
+ tmp11
;
2510 tmp22
= z1
+ tmp11
; /* c10 = c6-c12 */
2511 tmp27
= z1
- tmp11
- tmp11
; /* c0 = (c6-c12)*2 */
2515 z1
= (INT32
) wsptr
[1];
2516 z2
= (INT32
) wsptr
[3];
2517 z4
= (INT32
) wsptr
[5];
2518 z3
= MULTIPLY(z4
, FIX(1.224744871)); /* c5 */
2519 z4
= (INT32
) wsptr
[7];
2522 tmp15
= MULTIPLY(z1
+ tmp13
, FIX(0.831253876)); /* c9 */
2523 tmp11
= tmp15
+ MULTIPLY(z1
, FIX(0.513743148)); /* c3-c9 */
2524 tmp14
= tmp15
- MULTIPLY(tmp13
, FIX(2.176250899)); /* c3+c9 */
2526 tmp13
= MULTIPLY(z2
, - FIX(0.831253876)); /* -c9 */
2527 tmp15
= MULTIPLY(z2
, - FIX(1.344997024)); /* -c3 */
2529 tmp12
= z3
+ MULTIPLY(z2
, FIX(1.406466353)); /* c1 */
2531 tmp10
= tmp12
+ MULTIPLY(z4
, FIX(2.457431844)) - tmp15
; /* c1+c7 */
2532 tmp16
= tmp12
- MULTIPLY(z1
, FIX(1.112434820)) + tmp13
; /* c1-c13 */
2533 tmp12
= MULTIPLY(z2
, FIX(1.224744871)) - z3
; /* c5 */
2534 z2
= MULTIPLY(z1
+ z4
, FIX(0.575212477)); /* c11 */
2535 tmp13
+= z2
+ MULTIPLY(z1
, FIX(0.475753014)) - z3
; /* c7-c11 */
2536 tmp15
+= z2
- MULTIPLY(z4
, FIX(0.869244010)) + z3
; /* c11+c13 */
2538 /* Final output stage */
2540 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
2541 CONST_BITS
+PASS1_BITS
+3)
2543 outptr
[14] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
2544 CONST_BITS
+PASS1_BITS
+3)
2546 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
2547 CONST_BITS
+PASS1_BITS
+3)
2549 outptr
[13] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
2550 CONST_BITS
+PASS1_BITS
+3)
2552 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
2553 CONST_BITS
+PASS1_BITS
+3)
2555 outptr
[12] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
2556 CONST_BITS
+PASS1_BITS
+3)
2558 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
2559 CONST_BITS
+PASS1_BITS
+3)
2561 outptr
[11] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
2562 CONST_BITS
+PASS1_BITS
+3)
2564 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
2565 CONST_BITS
+PASS1_BITS
+3)
2567 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
2568 CONST_BITS
+PASS1_BITS
+3)
2570 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
+ tmp15
,
2571 CONST_BITS
+PASS1_BITS
+3)
2573 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp25
- tmp15
,
2574 CONST_BITS
+PASS1_BITS
+3)
2576 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp26
+ tmp16
,
2577 CONST_BITS
+PASS1_BITS
+3)
2579 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp26
- tmp16
,
2580 CONST_BITS
+PASS1_BITS
+3)
2582 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp27
,
2583 CONST_BITS
+PASS1_BITS
+3)
2586 wsptr
+= 8; /* advance pointer to next row */
2592 * Perform dequantization and inverse DCT on one block of coefficients,
2593 * producing a 16x16 output block.
2595 * Optimized algorithm with 28 multiplications in the 1-D kernel.
2596 * cK represents sqrt(2) * cos(K*pi/32).
2600 jpeg_idct_16x16 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
2601 JCOEFPTR coef_block
,
2602 JSAMPARRAY output_buf
, JDIMENSION output_col
)
2604 INT32 tmp0
, tmp1
, tmp2
, tmp3
, tmp10
, tmp11
, tmp12
, tmp13
;
2605 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
, tmp26
, tmp27
;
2606 INT32 z1
, z2
, z3
, z4
;
2608 ISLOW_MULT_TYPE
* quantptr
;
2611 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
2613 int workspace
[8*16]; /* buffers data between passes */
2616 /* Pass 1: process columns from input, store into work array. */
2619 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
2621 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
2624 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
2625 tmp0
<<= CONST_BITS
;
2626 /* Add fudge factor here for final descale. */
2627 tmp0
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
2629 z1
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
2630 tmp1
= MULTIPLY(z1
, FIX(1.306562965)); /* c4[16] = c2[8] */
2631 tmp2
= MULTIPLY(z1
, FIX_0_541196100
); /* c12[16] = c6[8] */
2633 tmp10
= tmp0
+ tmp1
;
2634 tmp11
= tmp0
- tmp1
;
2635 tmp12
= tmp0
+ tmp2
;
2636 tmp13
= tmp0
- tmp2
;
2638 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
2639 z2
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
2641 z4
= MULTIPLY(z3
, FIX(0.275899379)); /* c14[16] = c7[8] */
2642 z3
= MULTIPLY(z3
, FIX(1.387039845)); /* c2[16] = c1[8] */
2644 tmp0
= z3
+ MULTIPLY(z2
, FIX_2_562915447
); /* (c6+c2)[16] = (c3+c1)[8] */
2645 tmp1
= z4
+ MULTIPLY(z1
, FIX_0_899976223
); /* (c6-c14)[16] = (c3-c7)[8] */
2646 tmp2
= z3
- MULTIPLY(z1
, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2647 tmp3
= z4
- MULTIPLY(z2
, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2649 tmp20
= tmp10
+ tmp0
;
2650 tmp27
= tmp10
- tmp0
;
2651 tmp21
= tmp12
+ tmp1
;
2652 tmp26
= tmp12
- tmp1
;
2653 tmp22
= tmp13
+ tmp2
;
2654 tmp25
= tmp13
- tmp2
;
2655 tmp23
= tmp11
+ tmp3
;
2656 tmp24
= tmp11
- tmp3
;
2660 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
2661 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
2662 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
2663 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
2667 tmp1
= MULTIPLY(z1
+ z2
, FIX(1.353318001)); /* c3 */
2668 tmp2
= MULTIPLY(tmp11
, FIX(1.247225013)); /* c5 */
2669 tmp3
= MULTIPLY(z1
+ z4
, FIX(1.093201867)); /* c7 */
2670 tmp10
= MULTIPLY(z1
- z4
, FIX(0.897167586)); /* c9 */
2671 tmp11
= MULTIPLY(tmp11
, FIX(0.666655658)); /* c11 */
2672 tmp12
= MULTIPLY(z1
- z2
, FIX(0.410524528)); /* c13 */
2673 tmp0
= tmp1
+ tmp2
+ tmp3
-
2674 MULTIPLY(z1
, FIX(2.286341144)); /* c7+c5+c3-c1 */
2675 tmp13
= tmp10
+ tmp11
+ tmp12
-
2676 MULTIPLY(z1
, FIX(1.835730603)); /* c9+c11+c13-c15 */
2677 z1
= MULTIPLY(z2
+ z3
, FIX(0.138617169)); /* c15 */
2678 tmp1
+= z1
+ MULTIPLY(z2
, FIX(0.071888074)); /* c9+c11-c3-c15 */
2679 tmp2
+= z1
- MULTIPLY(z3
, FIX(1.125726048)); /* c5+c7+c15-c3 */
2680 z1
= MULTIPLY(z3
- z2
, FIX(1.407403738)); /* c1 */
2681 tmp11
+= z1
- MULTIPLY(z3
, FIX(0.766367282)); /* c1+c11-c9-c13 */
2682 tmp12
+= z1
+ MULTIPLY(z2
, FIX(1.971951411)); /* c1+c5+c13-c7 */
2684 z1
= MULTIPLY(z2
, - FIX(0.666655658)); /* -c11 */
2686 tmp3
+= z1
+ MULTIPLY(z4
, FIX(1.065388962)); /* c3+c11+c15-c7 */
2687 z2
= MULTIPLY(z2
, - FIX(1.247225013)); /* -c5 */
2688 tmp10
+= z2
+ MULTIPLY(z4
, FIX(3.141271809)); /* c1+c5+c9-c13 */
2690 z2
= MULTIPLY(z3
+ z4
, - FIX(1.353318001)); /* -c3 */
2693 z2
= MULTIPLY(z4
- z3
, FIX(0.410524528)); /* c13 */
2697 /* Final output stage */
2699 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp0
, CONST_BITS
-PASS1_BITS
);
2700 wsptr
[8*15] = (int) RIGHT_SHIFT(tmp20
- tmp0
, CONST_BITS
-PASS1_BITS
);
2701 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp1
, CONST_BITS
-PASS1_BITS
);
2702 wsptr
[8*14] = (int) RIGHT_SHIFT(tmp21
- tmp1
, CONST_BITS
-PASS1_BITS
);
2703 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp2
, CONST_BITS
-PASS1_BITS
);
2704 wsptr
[8*13] = (int) RIGHT_SHIFT(tmp22
- tmp2
, CONST_BITS
-PASS1_BITS
);
2705 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp23
+ tmp3
, CONST_BITS
-PASS1_BITS
);
2706 wsptr
[8*12] = (int) RIGHT_SHIFT(tmp23
- tmp3
, CONST_BITS
-PASS1_BITS
);
2707 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp24
+ tmp10
, CONST_BITS
-PASS1_BITS
);
2708 wsptr
[8*11] = (int) RIGHT_SHIFT(tmp24
- tmp10
, CONST_BITS
-PASS1_BITS
);
2709 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp25
+ tmp11
, CONST_BITS
-PASS1_BITS
);
2710 wsptr
[8*10] = (int) RIGHT_SHIFT(tmp25
- tmp11
, CONST_BITS
-PASS1_BITS
);
2711 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp26
+ tmp12
, CONST_BITS
-PASS1_BITS
);
2712 wsptr
[8*9] = (int) RIGHT_SHIFT(tmp26
- tmp12
, CONST_BITS
-PASS1_BITS
);
2713 wsptr
[8*7] = (int) RIGHT_SHIFT(tmp27
+ tmp13
, CONST_BITS
-PASS1_BITS
);
2714 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp27
- tmp13
, CONST_BITS
-PASS1_BITS
);
2717 /* Pass 2: process 16 rows from work array, store into output array. */
2720 for (ctr
= 0; ctr
< 16; ctr
++) {
2721 outptr
= output_buf
[ctr
] + output_col
;
2725 /* Add range center and fudge factor for final descale and range-limit. */
2726 tmp0
= (INT32
) wsptr
[0] +
2727 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
2728 (ONE
<< (PASS1_BITS
+2)));
2729 tmp0
<<= CONST_BITS
;
2731 z1
= (INT32
) wsptr
[4];
2732 tmp1
= MULTIPLY(z1
, FIX(1.306562965)); /* c4[16] = c2[8] */
2733 tmp2
= MULTIPLY(z1
, FIX_0_541196100
); /* c12[16] = c6[8] */
2735 tmp10
= tmp0
+ tmp1
;
2736 tmp11
= tmp0
- tmp1
;
2737 tmp12
= tmp0
+ tmp2
;
2738 tmp13
= tmp0
- tmp2
;
2740 z1
= (INT32
) wsptr
[2];
2741 z2
= (INT32
) wsptr
[6];
2743 z4
= MULTIPLY(z3
, FIX(0.275899379)); /* c14[16] = c7[8] */
2744 z3
= MULTIPLY(z3
, FIX(1.387039845)); /* c2[16] = c1[8] */
2746 tmp0
= z3
+ MULTIPLY(z2
, FIX_2_562915447
); /* (c6+c2)[16] = (c3+c1)[8] */
2747 tmp1
= z4
+ MULTIPLY(z1
, FIX_0_899976223
); /* (c6-c14)[16] = (c3-c7)[8] */
2748 tmp2
= z3
- MULTIPLY(z1
, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2749 tmp3
= z4
- MULTIPLY(z2
, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2751 tmp20
= tmp10
+ tmp0
;
2752 tmp27
= tmp10
- tmp0
;
2753 tmp21
= tmp12
+ tmp1
;
2754 tmp26
= tmp12
- tmp1
;
2755 tmp22
= tmp13
+ tmp2
;
2756 tmp25
= tmp13
- tmp2
;
2757 tmp23
= tmp11
+ tmp3
;
2758 tmp24
= tmp11
- tmp3
;
2762 z1
= (INT32
) wsptr
[1];
2763 z2
= (INT32
) wsptr
[3];
2764 z3
= (INT32
) wsptr
[5];
2765 z4
= (INT32
) wsptr
[7];
2769 tmp1
= MULTIPLY(z1
+ z2
, FIX(1.353318001)); /* c3 */
2770 tmp2
= MULTIPLY(tmp11
, FIX(1.247225013)); /* c5 */
2771 tmp3
= MULTIPLY(z1
+ z4
, FIX(1.093201867)); /* c7 */
2772 tmp10
= MULTIPLY(z1
- z4
, FIX(0.897167586)); /* c9 */
2773 tmp11
= MULTIPLY(tmp11
, FIX(0.666655658)); /* c11 */
2774 tmp12
= MULTIPLY(z1
- z2
, FIX(0.410524528)); /* c13 */
2775 tmp0
= tmp1
+ tmp2
+ tmp3
-
2776 MULTIPLY(z1
, FIX(2.286341144)); /* c7+c5+c3-c1 */
2777 tmp13
= tmp10
+ tmp11
+ tmp12
-
2778 MULTIPLY(z1
, FIX(1.835730603)); /* c9+c11+c13-c15 */
2779 z1
= MULTIPLY(z2
+ z3
, FIX(0.138617169)); /* c15 */
2780 tmp1
+= z1
+ MULTIPLY(z2
, FIX(0.071888074)); /* c9+c11-c3-c15 */
2781 tmp2
+= z1
- MULTIPLY(z3
, FIX(1.125726048)); /* c5+c7+c15-c3 */
2782 z1
= MULTIPLY(z3
- z2
, FIX(1.407403738)); /* c1 */
2783 tmp11
+= z1
- MULTIPLY(z3
, FIX(0.766367282)); /* c1+c11-c9-c13 */
2784 tmp12
+= z1
+ MULTIPLY(z2
, FIX(1.971951411)); /* c1+c5+c13-c7 */
2786 z1
= MULTIPLY(z2
, - FIX(0.666655658)); /* -c11 */
2788 tmp3
+= z1
+ MULTIPLY(z4
, FIX(1.065388962)); /* c3+c11+c15-c7 */
2789 z2
= MULTIPLY(z2
, - FIX(1.247225013)); /* -c5 */
2790 tmp10
+= z2
+ MULTIPLY(z4
, FIX(3.141271809)); /* c1+c5+c9-c13 */
2792 z2
= MULTIPLY(z3
+ z4
, - FIX(1.353318001)); /* -c3 */
2795 z2
= MULTIPLY(z4
- z3
, FIX(0.410524528)); /* c13 */
2799 /* Final output stage */
2801 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp0
,
2802 CONST_BITS
+PASS1_BITS
+3)
2804 outptr
[15] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp0
,
2805 CONST_BITS
+PASS1_BITS
+3)
2807 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp1
,
2808 CONST_BITS
+PASS1_BITS
+3)
2810 outptr
[14] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp1
,
2811 CONST_BITS
+PASS1_BITS
+3)
2813 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp2
,
2814 CONST_BITS
+PASS1_BITS
+3)
2816 outptr
[13] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp2
,
2817 CONST_BITS
+PASS1_BITS
+3)
2819 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp3
,
2820 CONST_BITS
+PASS1_BITS
+3)
2822 outptr
[12] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp3
,
2823 CONST_BITS
+PASS1_BITS
+3)
2825 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp10
,
2826 CONST_BITS
+PASS1_BITS
+3)
2828 outptr
[11] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp10
,
2829 CONST_BITS
+PASS1_BITS
+3)
2831 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
+ tmp11
,
2832 CONST_BITS
+PASS1_BITS
+3)
2834 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp25
- tmp11
,
2835 CONST_BITS
+PASS1_BITS
+3)
2837 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp26
+ tmp12
,
2838 CONST_BITS
+PASS1_BITS
+3)
2840 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp26
- tmp12
,
2841 CONST_BITS
+PASS1_BITS
+3)
2843 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp27
+ tmp13
,
2844 CONST_BITS
+PASS1_BITS
+3)
2846 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp27
- tmp13
,
2847 CONST_BITS
+PASS1_BITS
+3)
2850 wsptr
+= 8; /* advance pointer to next row */
2856 * Perform dequantization and inverse DCT on one block of coefficients,
2857 * producing a 16x8 output block.
2859 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2863 jpeg_idct_16x8 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
2864 JCOEFPTR coef_block
,
2865 JSAMPARRAY output_buf
, JDIMENSION output_col
)
2867 INT32 tmp0
, tmp1
, tmp2
, tmp3
, tmp10
, tmp11
, tmp12
, tmp13
;
2868 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
, tmp26
, tmp27
;
2869 INT32 z1
, z2
, z3
, z4
;
2871 ISLOW_MULT_TYPE
* quantptr
;
2874 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
2876 int workspace
[8*8]; /* buffers data between passes */
2879 /* Pass 1: process columns from input, store into work array.
2880 * Note results are scaled up by sqrt(8) compared to a true IDCT;
2881 * furthermore, we scale the results by 2**PASS1_BITS.
2882 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2886 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
2888 for (ctr
= DCTSIZE
; ctr
> 0; ctr
--) {
2889 /* Due to quantization, we will usually find that many of the input
2890 * coefficients are zero, especially the AC terms. We can exploit this
2891 * by short-circuiting the IDCT calculation for any column in which all
2892 * the AC terms are zero. In that case each output is equal to the
2893 * DC coefficient (with scale factor as needed).
2894 * With typical images and quantization tables, half or more of the
2895 * column DCT calculations can be simplified this way.
2898 if (inptr
[DCTSIZE
*1] == 0 && inptr
[DCTSIZE
*2] == 0 &&
2899 inptr
[DCTSIZE
*3] == 0 && inptr
[DCTSIZE
*4] == 0 &&
2900 inptr
[DCTSIZE
*5] == 0 && inptr
[DCTSIZE
*6] == 0 &&
2901 inptr
[DCTSIZE
*7] == 0) {
2902 /* AC terms all zero */
2903 int dcval
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]) << PASS1_BITS
;
2905 wsptr
[DCTSIZE
*0] = dcval
;
2906 wsptr
[DCTSIZE
*1] = dcval
;
2907 wsptr
[DCTSIZE
*2] = dcval
;
2908 wsptr
[DCTSIZE
*3] = dcval
;
2909 wsptr
[DCTSIZE
*4] = dcval
;
2910 wsptr
[DCTSIZE
*5] = dcval
;
2911 wsptr
[DCTSIZE
*6] = dcval
;
2912 wsptr
[DCTSIZE
*7] = dcval
;
2914 inptr
++; /* advance pointers to next column */
2920 /* Even part: reverse the even part of the forward DCT.
2921 * The rotator is c(-6).
2924 z2
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
2925 z3
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
2928 /* Add fudge factor here for final descale. */
2929 z2
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
2934 z2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
2935 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
2937 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
2938 tmp2
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
2939 tmp3
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
2941 tmp10
= tmp0
+ tmp2
;
2942 tmp13
= tmp0
- tmp2
;
2943 tmp11
= tmp1
+ tmp3
;
2944 tmp12
= tmp1
- tmp3
;
2946 /* Odd part per figure 8; the matrix is unitary and hence its
2947 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2950 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
2951 tmp1
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
2952 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
2953 tmp3
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
2958 z1
= MULTIPLY(z2
+ z3
, FIX_1_175875602
); /* c3 */
2959 z2
= MULTIPLY(z2
, - FIX_1_961570560
); /* -c3-c5 */
2960 z3
= MULTIPLY(z3
, - FIX_0_390180644
); /* -c3+c5 */
2964 z1
= MULTIPLY(tmp0
+ tmp3
, - FIX_0_899976223
); /* -c3+c7 */
2965 tmp0
= MULTIPLY(tmp0
, FIX_0_298631336
); /* -c1+c3+c5-c7 */
2966 tmp3
= MULTIPLY(tmp3
, FIX_1_501321110
); /* c1+c3-c5-c7 */
2970 z1
= MULTIPLY(tmp1
+ tmp2
, - FIX_2_562915447
); /* -c1-c3 */
2971 tmp1
= MULTIPLY(tmp1
, FIX_2_053119869
); /* c1+c3-c5+c7 */
2972 tmp2
= MULTIPLY(tmp2
, FIX_3_072711026
); /* c1+c3+c5-c7 */
2976 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2978 wsptr
[DCTSIZE
*0] = (int) RIGHT_SHIFT(tmp10
+ tmp3
, CONST_BITS
-PASS1_BITS
);
2979 wsptr
[DCTSIZE
*7] = (int) RIGHT_SHIFT(tmp10
- tmp3
, CONST_BITS
-PASS1_BITS
);
2980 wsptr
[DCTSIZE
*1] = (int) RIGHT_SHIFT(tmp11
+ tmp2
, CONST_BITS
-PASS1_BITS
);
2981 wsptr
[DCTSIZE
*6] = (int) RIGHT_SHIFT(tmp11
- tmp2
, CONST_BITS
-PASS1_BITS
);
2982 wsptr
[DCTSIZE
*2] = (int) RIGHT_SHIFT(tmp12
+ tmp1
, CONST_BITS
-PASS1_BITS
);
2983 wsptr
[DCTSIZE
*5] = (int) RIGHT_SHIFT(tmp12
- tmp1
, CONST_BITS
-PASS1_BITS
);
2984 wsptr
[DCTSIZE
*3] = (int) RIGHT_SHIFT(tmp13
+ tmp0
, CONST_BITS
-PASS1_BITS
);
2985 wsptr
[DCTSIZE
*4] = (int) RIGHT_SHIFT(tmp13
- tmp0
, CONST_BITS
-PASS1_BITS
);
2987 inptr
++; /* advance pointers to next column */
2992 /* Pass 2: process 8 rows from work array, store into output array.
2993 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2997 for (ctr
= 0; ctr
< 8; ctr
++) {
2998 outptr
= output_buf
[ctr
] + output_col
;
3002 /* Add range center and fudge factor for final descale and range-limit. */
3003 tmp0
= (INT32
) wsptr
[0] +
3004 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
3005 (ONE
<< (PASS1_BITS
+2)));
3006 tmp0
<<= CONST_BITS
;
3008 z1
= (INT32
) wsptr
[4];
3009 tmp1
= MULTIPLY(z1
, FIX(1.306562965)); /* c4[16] = c2[8] */
3010 tmp2
= MULTIPLY(z1
, FIX_0_541196100
); /* c12[16] = c6[8] */
3012 tmp10
= tmp0
+ tmp1
;
3013 tmp11
= tmp0
- tmp1
;
3014 tmp12
= tmp0
+ tmp2
;
3015 tmp13
= tmp0
- tmp2
;
3017 z1
= (INT32
) wsptr
[2];
3018 z2
= (INT32
) wsptr
[6];
3020 z4
= MULTIPLY(z3
, FIX(0.275899379)); /* c14[16] = c7[8] */
3021 z3
= MULTIPLY(z3
, FIX(1.387039845)); /* c2[16] = c1[8] */
3023 tmp0
= z3
+ MULTIPLY(z2
, FIX_2_562915447
); /* (c6+c2)[16] = (c3+c1)[8] */
3024 tmp1
= z4
+ MULTIPLY(z1
, FIX_0_899976223
); /* (c6-c14)[16] = (c3-c7)[8] */
3025 tmp2
= z3
- MULTIPLY(z1
, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3026 tmp3
= z4
- MULTIPLY(z2
, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3028 tmp20
= tmp10
+ tmp0
;
3029 tmp27
= tmp10
- tmp0
;
3030 tmp21
= tmp12
+ tmp1
;
3031 tmp26
= tmp12
- tmp1
;
3032 tmp22
= tmp13
+ tmp2
;
3033 tmp25
= tmp13
- tmp2
;
3034 tmp23
= tmp11
+ tmp3
;
3035 tmp24
= tmp11
- tmp3
;
3039 z1
= (INT32
) wsptr
[1];
3040 z2
= (INT32
) wsptr
[3];
3041 z3
= (INT32
) wsptr
[5];
3042 z4
= (INT32
) wsptr
[7];
3046 tmp1
= MULTIPLY(z1
+ z2
, FIX(1.353318001)); /* c3 */
3047 tmp2
= MULTIPLY(tmp11
, FIX(1.247225013)); /* c5 */
3048 tmp3
= MULTIPLY(z1
+ z4
, FIX(1.093201867)); /* c7 */
3049 tmp10
= MULTIPLY(z1
- z4
, FIX(0.897167586)); /* c9 */
3050 tmp11
= MULTIPLY(tmp11
, FIX(0.666655658)); /* c11 */
3051 tmp12
= MULTIPLY(z1
- z2
, FIX(0.410524528)); /* c13 */
3052 tmp0
= tmp1
+ tmp2
+ tmp3
-
3053 MULTIPLY(z1
, FIX(2.286341144)); /* c7+c5+c3-c1 */
3054 tmp13
= tmp10
+ tmp11
+ tmp12
-
3055 MULTIPLY(z1
, FIX(1.835730603)); /* c9+c11+c13-c15 */
3056 z1
= MULTIPLY(z2
+ z3
, FIX(0.138617169)); /* c15 */
3057 tmp1
+= z1
+ MULTIPLY(z2
, FIX(0.071888074)); /* c9+c11-c3-c15 */
3058 tmp2
+= z1
- MULTIPLY(z3
, FIX(1.125726048)); /* c5+c7+c15-c3 */
3059 z1
= MULTIPLY(z3
- z2
, FIX(1.407403738)); /* c1 */
3060 tmp11
+= z1
- MULTIPLY(z3
, FIX(0.766367282)); /* c1+c11-c9-c13 */
3061 tmp12
+= z1
+ MULTIPLY(z2
, FIX(1.971951411)); /* c1+c5+c13-c7 */
3063 z1
= MULTIPLY(z2
, - FIX(0.666655658)); /* -c11 */
3065 tmp3
+= z1
+ MULTIPLY(z4
, FIX(1.065388962)); /* c3+c11+c15-c7 */
3066 z2
= MULTIPLY(z2
, - FIX(1.247225013)); /* -c5 */
3067 tmp10
+= z2
+ MULTIPLY(z4
, FIX(3.141271809)); /* c1+c5+c9-c13 */
3069 z2
= MULTIPLY(z3
+ z4
, - FIX(1.353318001)); /* -c3 */
3072 z2
= MULTIPLY(z4
- z3
, FIX(0.410524528)); /* c13 */
3076 /* Final output stage */
3078 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp0
,
3079 CONST_BITS
+PASS1_BITS
+3)
3081 outptr
[15] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp0
,
3082 CONST_BITS
+PASS1_BITS
+3)
3084 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp1
,
3085 CONST_BITS
+PASS1_BITS
+3)
3087 outptr
[14] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp1
,
3088 CONST_BITS
+PASS1_BITS
+3)
3090 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp2
,
3091 CONST_BITS
+PASS1_BITS
+3)
3093 outptr
[13] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp2
,
3094 CONST_BITS
+PASS1_BITS
+3)
3096 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp3
,
3097 CONST_BITS
+PASS1_BITS
+3)
3099 outptr
[12] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp3
,
3100 CONST_BITS
+PASS1_BITS
+3)
3102 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp10
,
3103 CONST_BITS
+PASS1_BITS
+3)
3105 outptr
[11] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp10
,
3106 CONST_BITS
+PASS1_BITS
+3)
3108 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
+ tmp11
,
3109 CONST_BITS
+PASS1_BITS
+3)
3111 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp25
- tmp11
,
3112 CONST_BITS
+PASS1_BITS
+3)
3114 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp26
+ tmp12
,
3115 CONST_BITS
+PASS1_BITS
+3)
3117 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp26
- tmp12
,
3118 CONST_BITS
+PASS1_BITS
+3)
3120 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp27
+ tmp13
,
3121 CONST_BITS
+PASS1_BITS
+3)
3123 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp27
- tmp13
,
3124 CONST_BITS
+PASS1_BITS
+3)
3127 wsptr
+= 8; /* advance pointer to next row */
3133 * Perform dequantization and inverse DCT on one block of coefficients,
3134 * producing a 14x7 output block.
3136 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3140 jpeg_idct_14x7 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
3141 JCOEFPTR coef_block
,
3142 JSAMPARRAY output_buf
, JDIMENSION output_col
)
3144 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
, tmp15
, tmp16
;
3145 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
, tmp26
;
3146 INT32 z1
, z2
, z3
, z4
;
3148 ISLOW_MULT_TYPE
* quantptr
;
3151 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
3153 int workspace
[8*7]; /* buffers data between passes */
3156 /* Pass 1: process columns from input, store into work array.
3157 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3161 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
3163 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
3166 tmp23
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
3167 tmp23
<<= CONST_BITS
;
3168 /* Add fudge factor here for final descale. */
3169 tmp23
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
3171 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
3172 z2
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
3173 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
3175 tmp20
= MULTIPLY(z2
- z3
, FIX(0.881747734)); /* c4 */
3176 tmp22
= MULTIPLY(z1
- z2
, FIX(0.314692123)); /* c6 */
3177 tmp21
= tmp20
+ tmp22
+ tmp23
- MULTIPLY(z2
, FIX(1.841218003)); /* c2+c4-c6 */
3180 tmp10
= MULTIPLY(tmp10
, FIX(1.274162392)) + tmp23
; /* c2 */
3181 tmp20
+= tmp10
- MULTIPLY(z3
, FIX(0.077722536)); /* c2-c4-c6 */
3182 tmp22
+= tmp10
- MULTIPLY(z1
, FIX(2.470602249)); /* c2+c4+c6 */
3183 tmp23
+= MULTIPLY(z2
, FIX(1.414213562)); /* c0 */
3187 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
3188 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
3189 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
3191 tmp11
= MULTIPLY(z1
+ z2
, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3192 tmp12
= MULTIPLY(z1
- z2
, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3193 tmp10
= tmp11
- tmp12
;
3195 tmp12
= MULTIPLY(z2
+ z3
, - FIX(1.378756276)); /* -c1 */
3197 z2
= MULTIPLY(z1
+ z3
, FIX(0.613604268)); /* c5 */
3199 tmp12
+= z2
+ MULTIPLY(z3
, FIX(1.870828693)); /* c3+c1-c5 */
3201 /* Final output stage */
3203 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
3204 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
3205 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
3206 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
3207 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
3208 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
3209 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp23
, CONST_BITS
-PASS1_BITS
);
3212 /* Pass 2: process 7 rows from work array, store into output array.
3213 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3217 for (ctr
= 0; ctr
< 7; ctr
++) {
3218 outptr
= output_buf
[ctr
] + output_col
;
3222 /* Add range center and fudge factor for final descale and range-limit. */
3223 z1
= (INT32
) wsptr
[0] +
3224 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
3225 (ONE
<< (PASS1_BITS
+2)));
3227 z4
= (INT32
) wsptr
[4];
3228 z2
= MULTIPLY(z4
, FIX(1.274162392)); /* c4 */
3229 z3
= MULTIPLY(z4
, FIX(0.314692123)); /* c12 */
3230 z4
= MULTIPLY(z4
, FIX(0.881747734)); /* c8 */
3236 tmp23
= z1
- ((z2
+ z3
- z4
) << 1); /* c0 = (c4+c12-c8)*2 */
3238 z1
= (INT32
) wsptr
[2];
3239 z2
= (INT32
) wsptr
[6];
3241 z3
= MULTIPLY(z1
+ z2
, FIX(1.105676686)); /* c6 */
3243 tmp13
= z3
+ MULTIPLY(z1
, FIX(0.273079590)); /* c2-c6 */
3244 tmp14
= z3
- MULTIPLY(z2
, FIX(1.719280954)); /* c6+c10 */
3245 tmp15
= MULTIPLY(z1
, FIX(0.613604268)) - /* c10 */
3246 MULTIPLY(z2
, FIX(1.378756276)); /* c2 */
3248 tmp20
= tmp10
+ tmp13
;
3249 tmp26
= tmp10
- tmp13
;
3250 tmp21
= tmp11
+ tmp14
;
3251 tmp25
= tmp11
- tmp14
;
3252 tmp22
= tmp12
+ tmp15
;
3253 tmp24
= tmp12
- tmp15
;
3257 z1
= (INT32
) wsptr
[1];
3258 z2
= (INT32
) wsptr
[3];
3259 z3
= (INT32
) wsptr
[5];
3260 z4
= (INT32
) wsptr
[7];
3264 tmp11
= MULTIPLY(z1
+ z2
, FIX(1.334852607)); /* c3 */
3265 tmp12
= MULTIPLY(tmp14
, FIX(1.197448846)); /* c5 */
3266 tmp10
= tmp11
+ tmp12
+ z4
- MULTIPLY(z1
, FIX(1.126980169)); /* c3+c5-c1 */
3267 tmp14
= MULTIPLY(tmp14
, FIX(0.752406978)); /* c9 */
3268 tmp16
= tmp14
- MULTIPLY(z1
, FIX(1.061150426)); /* c9+c11-c13 */
3270 tmp15
= MULTIPLY(z1
, FIX(0.467085129)) - z4
; /* c11 */
3272 tmp13
= MULTIPLY(z2
+ z3
, - FIX(0.158341681)) - z4
; /* -c13 */
3273 tmp11
+= tmp13
- MULTIPLY(z2
, FIX(0.424103948)); /* c3-c9-c13 */
3274 tmp12
+= tmp13
- MULTIPLY(z3
, FIX(2.373959773)); /* c3+c5-c13 */
3275 tmp13
= MULTIPLY(z3
- z2
, FIX(1.405321284)); /* c1 */
3276 tmp14
+= tmp13
+ z4
- MULTIPLY(z3
, FIX(1.6906431334)); /* c1+c9-c11 */
3277 tmp15
+= tmp13
+ MULTIPLY(z2
, FIX(0.674957567)); /* c1+c11-c5 */
3279 tmp13
= ((z1
- z3
) << CONST_BITS
) + z4
;
3281 /* Final output stage */
3283 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
3284 CONST_BITS
+PASS1_BITS
+3)
3286 outptr
[13] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
3287 CONST_BITS
+PASS1_BITS
+3)
3289 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
3290 CONST_BITS
+PASS1_BITS
+3)
3292 outptr
[12] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
3293 CONST_BITS
+PASS1_BITS
+3)
3295 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
3296 CONST_BITS
+PASS1_BITS
+3)
3298 outptr
[11] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
3299 CONST_BITS
+PASS1_BITS
+3)
3301 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
3302 CONST_BITS
+PASS1_BITS
+3)
3304 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
3305 CONST_BITS
+PASS1_BITS
+3)
3307 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
3308 CONST_BITS
+PASS1_BITS
+3)
3310 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
3311 CONST_BITS
+PASS1_BITS
+3)
3313 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
+ tmp15
,
3314 CONST_BITS
+PASS1_BITS
+3)
3316 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp25
- tmp15
,
3317 CONST_BITS
+PASS1_BITS
+3)
3319 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp26
+ tmp16
,
3320 CONST_BITS
+PASS1_BITS
+3)
3322 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp26
- tmp16
,
3323 CONST_BITS
+PASS1_BITS
+3)
3326 wsptr
+= 8; /* advance pointer to next row */
3332 * Perform dequantization and inverse DCT on one block of coefficients,
3333 * producing a 12x6 output block.
3335 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3339 jpeg_idct_12x6 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
3340 JCOEFPTR coef_block
,
3341 JSAMPARRAY output_buf
, JDIMENSION output_col
)
3343 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
, tmp15
;
3344 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
;
3345 INT32 z1
, z2
, z3
, z4
;
3347 ISLOW_MULT_TYPE
* quantptr
;
3350 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
3352 int workspace
[8*6]; /* buffers data between passes */
3355 /* Pass 1: process columns from input, store into work array.
3356 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3360 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
3362 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
3365 tmp10
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
3366 tmp10
<<= CONST_BITS
;
3367 /* Add fudge factor here for final descale. */
3368 tmp10
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
3369 tmp12
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
3370 tmp20
= MULTIPLY(tmp12
, FIX(0.707106781)); /* c4 */
3371 tmp11
= tmp10
+ tmp20
;
3372 tmp21
= RIGHT_SHIFT(tmp10
- tmp20
- tmp20
, CONST_BITS
-PASS1_BITS
);
3373 tmp20
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
3374 tmp10
= MULTIPLY(tmp20
, FIX(1.224744871)); /* c2 */
3375 tmp20
= tmp11
+ tmp10
;
3376 tmp22
= tmp11
- tmp10
;
3380 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
3381 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
3382 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
3383 tmp11
= MULTIPLY(z1
+ z3
, FIX(0.366025404)); /* c5 */
3384 tmp10
= tmp11
+ ((z1
+ z2
) << CONST_BITS
);
3385 tmp12
= tmp11
+ ((z3
- z2
) << CONST_BITS
);
3386 tmp11
= (z1
- z2
- z3
) << PASS1_BITS
;
3388 /* Final output stage */
3390 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
3391 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
3392 wsptr
[8*1] = (int) (tmp21
+ tmp11
);
3393 wsptr
[8*4] = (int) (tmp21
- tmp11
);
3394 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
3395 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
3398 /* Pass 2: process 6 rows from work array, store into output array.
3399 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3403 for (ctr
= 0; ctr
< 6; ctr
++) {
3404 outptr
= output_buf
[ctr
] + output_col
;
3408 /* Add range center and fudge factor for final descale and range-limit. */
3409 z3
= (INT32
) wsptr
[0] +
3410 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
3411 (ONE
<< (PASS1_BITS
+2)));
3414 z4
= (INT32
) wsptr
[4];
3415 z4
= MULTIPLY(z4
, FIX(1.224744871)); /* c4 */
3420 z1
= (INT32
) wsptr
[2];
3421 z4
= MULTIPLY(z1
, FIX(1.366025404)); /* c2 */
3423 z2
= (INT32
) wsptr
[6];
3433 tmp20
= tmp10
+ tmp12
;
3434 tmp25
= tmp10
- tmp12
;
3436 tmp12
= z4
- z1
- z2
;
3438 tmp22
= tmp11
+ tmp12
;
3439 tmp23
= tmp11
- tmp12
;
3443 z1
= (INT32
) wsptr
[1];
3444 z2
= (INT32
) wsptr
[3];
3445 z3
= (INT32
) wsptr
[5];
3446 z4
= (INT32
) wsptr
[7];
3448 tmp11
= MULTIPLY(z2
, FIX(1.306562965)); /* c3 */
3449 tmp14
= MULTIPLY(z2
, - FIX_0_541196100
); /* -c9 */
3452 tmp15
= MULTIPLY(tmp10
+ z4
, FIX(0.860918669)); /* c7 */
3453 tmp12
= tmp15
+ MULTIPLY(tmp10
, FIX(0.261052384)); /* c5-c7 */
3454 tmp10
= tmp12
+ tmp11
+ MULTIPLY(z1
, FIX(0.280143716)); /* c1-c5 */
3455 tmp13
= MULTIPLY(z3
+ z4
, - FIX(1.045510580)); /* -(c7+c11) */
3456 tmp12
+= tmp13
+ tmp14
- MULTIPLY(z3
, FIX(1.478575242)); /* c1+c5-c7-c11 */
3457 tmp13
+= tmp15
- tmp11
+ MULTIPLY(z4
, FIX(1.586706681)); /* c1+c11 */
3458 tmp15
+= tmp14
- MULTIPLY(z1
, FIX(0.676326758)) - /* c7-c11 */
3459 MULTIPLY(z4
, FIX(1.982889723)); /* c5+c7 */
3463 z3
= MULTIPLY(z1
+ z2
, FIX_0_541196100
); /* c9 */
3464 tmp11
= z3
+ MULTIPLY(z1
, FIX_0_765366865
); /* c3-c9 */
3465 tmp14
= z3
- MULTIPLY(z2
, FIX_1_847759065
); /* c3+c9 */
3467 /* Final output stage */
3469 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
3470 CONST_BITS
+PASS1_BITS
+3)
3472 outptr
[11] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
3473 CONST_BITS
+PASS1_BITS
+3)
3475 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
3476 CONST_BITS
+PASS1_BITS
+3)
3478 outptr
[10] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
3479 CONST_BITS
+PASS1_BITS
+3)
3481 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
3482 CONST_BITS
+PASS1_BITS
+3)
3484 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
3485 CONST_BITS
+PASS1_BITS
+3)
3487 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
3488 CONST_BITS
+PASS1_BITS
+3)
3490 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
3491 CONST_BITS
+PASS1_BITS
+3)
3493 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
3494 CONST_BITS
+PASS1_BITS
+3)
3496 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
3497 CONST_BITS
+PASS1_BITS
+3)
3499 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp25
+ tmp15
,
3500 CONST_BITS
+PASS1_BITS
+3)
3502 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp25
- tmp15
,
3503 CONST_BITS
+PASS1_BITS
+3)
3506 wsptr
+= 8; /* advance pointer to next row */
3512 * Perform dequantization and inverse DCT on one block of coefficients,
3513 * producing a 10x5 output block.
3515 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3519 jpeg_idct_10x5 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
3520 JCOEFPTR coef_block
,
3521 JSAMPARRAY output_buf
, JDIMENSION output_col
)
3523 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
;
3524 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
;
3525 INT32 z1
, z2
, z3
, z4
;
3527 ISLOW_MULT_TYPE
* quantptr
;
3530 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
3532 int workspace
[8*5]; /* buffers data between passes */
3535 /* Pass 1: process columns from input, store into work array.
3536 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3540 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
3542 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
3545 tmp12
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
3546 tmp12
<<= CONST_BITS
;
3547 /* Add fudge factor here for final descale. */
3548 tmp12
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
3549 tmp13
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
3550 tmp14
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
3551 z1
= MULTIPLY(tmp13
+ tmp14
, FIX(0.790569415)); /* (c2+c4)/2 */
3552 z2
= MULTIPLY(tmp13
- tmp14
, FIX(0.353553391)); /* (c2-c4)/2 */
3560 z2
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
3561 z3
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
3563 z1
= MULTIPLY(z2
+ z3
, FIX(0.831253876)); /* c3 */
3564 tmp13
= z1
+ MULTIPLY(z2
, FIX(0.513743148)); /* c1-c3 */
3565 tmp14
= z1
- MULTIPLY(z3
, FIX(2.176250899)); /* c1+c3 */
3567 /* Final output stage */
3569 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp10
+ tmp13
, CONST_BITS
-PASS1_BITS
);
3570 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp10
- tmp13
, CONST_BITS
-PASS1_BITS
);
3571 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp11
+ tmp14
, CONST_BITS
-PASS1_BITS
);
3572 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp11
- tmp14
, CONST_BITS
-PASS1_BITS
);
3573 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp12
, CONST_BITS
-PASS1_BITS
);
3576 /* Pass 2: process 5 rows from work array, store into output array.
3577 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3581 for (ctr
= 0; ctr
< 5; ctr
++) {
3582 outptr
= output_buf
[ctr
] + output_col
;
3586 /* Add range center and fudge factor for final descale and range-limit. */
3587 z3
= (INT32
) wsptr
[0] +
3588 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
3589 (ONE
<< (PASS1_BITS
+2)));
3591 z4
= (INT32
) wsptr
[4];
3592 z1
= MULTIPLY(z4
, FIX(1.144122806)); /* c4 */
3593 z2
= MULTIPLY(z4
, FIX(0.437016024)); /* c8 */
3597 tmp22
= z3
- ((z1
- z2
) << 1); /* c0 = (c4-c8)*2 */
3599 z2
= (INT32
) wsptr
[2];
3600 z3
= (INT32
) wsptr
[6];
3602 z1
= MULTIPLY(z2
+ z3
, FIX(0.831253876)); /* c6 */
3603 tmp12
= z1
+ MULTIPLY(z2
, FIX(0.513743148)); /* c2-c6 */
3604 tmp13
= z1
- MULTIPLY(z3
, FIX(2.176250899)); /* c2+c6 */
3606 tmp20
= tmp10
+ tmp12
;
3607 tmp24
= tmp10
- tmp12
;
3608 tmp21
= tmp11
+ tmp13
;
3609 tmp23
= tmp11
- tmp13
;
3613 z1
= (INT32
) wsptr
[1];
3614 z2
= (INT32
) wsptr
[3];
3615 z3
= (INT32
) wsptr
[5];
3617 z4
= (INT32
) wsptr
[7];
3622 tmp12
= MULTIPLY(tmp13
, FIX(0.309016994)); /* (c3-c7)/2 */
3624 z2
= MULTIPLY(tmp11
, FIX(0.951056516)); /* (c3+c7)/2 */
3627 tmp10
= MULTIPLY(z1
, FIX(1.396802247)) + z2
+ z4
; /* c1 */
3628 tmp14
= MULTIPLY(z1
, FIX(0.221231742)) - z2
+ z4
; /* c9 */
3630 z2
= MULTIPLY(tmp11
, FIX(0.587785252)); /* (c1-c9)/2 */
3631 z4
= z3
- tmp12
- (tmp13
<< (CONST_BITS
- 1));
3633 tmp12
= ((z1
- tmp13
) << CONST_BITS
) - z3
;
3635 tmp11
= MULTIPLY(z1
, FIX(1.260073511)) - z2
- z4
; /* c3 */
3636 tmp13
= MULTIPLY(z1
, FIX(0.642039522)) - z2
+ z4
; /* c7 */
3638 /* Final output stage */
3640 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
3641 CONST_BITS
+PASS1_BITS
+3)
3643 outptr
[9] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
3644 CONST_BITS
+PASS1_BITS
+3)
3646 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
3647 CONST_BITS
+PASS1_BITS
+3)
3649 outptr
[8] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
3650 CONST_BITS
+PASS1_BITS
+3)
3652 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
3653 CONST_BITS
+PASS1_BITS
+3)
3655 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
3656 CONST_BITS
+PASS1_BITS
+3)
3658 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
+ tmp13
,
3659 CONST_BITS
+PASS1_BITS
+3)
3661 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp23
- tmp13
,
3662 CONST_BITS
+PASS1_BITS
+3)
3664 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp24
+ tmp14
,
3665 CONST_BITS
+PASS1_BITS
+3)
3667 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp24
- tmp14
,
3668 CONST_BITS
+PASS1_BITS
+3)
3671 wsptr
+= 8; /* advance pointer to next row */
3677 * Perform dequantization and inverse DCT on one block of coefficients,
3678 * producing an 8x4 output block.
3680 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3684 jpeg_idct_8x4 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
3685 JCOEFPTR coef_block
,
3686 JSAMPARRAY output_buf
, JDIMENSION output_col
)
3688 INT32 tmp0
, tmp1
, tmp2
, tmp3
;
3689 INT32 tmp10
, tmp11
, tmp12
, tmp13
;
3692 ISLOW_MULT_TYPE
* quantptr
;
3695 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
3697 int workspace
[8*4]; /* buffers data between passes */
3700 /* Pass 1: process columns from input, store into work array.
3701 * 4-point IDCT kernel,
3702 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3706 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
3708 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
3711 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
3712 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
3714 tmp10
= (tmp0
+ tmp2
) << PASS1_BITS
;
3715 tmp12
= (tmp0
- tmp2
) << PASS1_BITS
;
3718 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3720 z2
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
3721 z3
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
3723 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
3724 /* Add fudge factor here for final descale. */
3725 z1
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
3726 tmp0
= RIGHT_SHIFT(z1
+ MULTIPLY(z2
, FIX_0_765366865
), /* c2-c6 */
3727 CONST_BITS
-PASS1_BITS
);
3728 tmp2
= RIGHT_SHIFT(z1
- MULTIPLY(z3
, FIX_1_847759065
), /* c2+c6 */
3729 CONST_BITS
-PASS1_BITS
);
3731 /* Final output stage */
3733 wsptr
[8*0] = (int) (tmp10
+ tmp0
);
3734 wsptr
[8*3] = (int) (tmp10
- tmp0
);
3735 wsptr
[8*1] = (int) (tmp12
+ tmp2
);
3736 wsptr
[8*2] = (int) (tmp12
- tmp2
);
3739 /* Pass 2: process rows from work array, store into output array.
3740 * Note that we must descale the results by a factor of 8 == 2**3,
3741 * and also undo the PASS1_BITS scaling.
3742 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3746 for (ctr
= 0; ctr
< 4; ctr
++) {
3747 outptr
= output_buf
[ctr
] + output_col
;
3749 /* Even part: reverse the even part of the forward DCT.
3750 * The rotator is c(-6).
3753 /* Add range center and fudge factor for final descale and range-limit. */
3754 z2
= (INT32
) wsptr
[0] +
3755 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
3756 (ONE
<< (PASS1_BITS
+2)));
3757 z3
= (INT32
) wsptr
[4];
3759 tmp0
= (z2
+ z3
) << CONST_BITS
;
3760 tmp1
= (z2
- z3
) << CONST_BITS
;
3762 z2
= (INT32
) wsptr
[2];
3763 z3
= (INT32
) wsptr
[6];
3765 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
3766 tmp2
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
3767 tmp3
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
3769 tmp10
= tmp0
+ tmp2
;
3770 tmp13
= tmp0
- tmp2
;
3771 tmp11
= tmp1
+ tmp3
;
3772 tmp12
= tmp1
- tmp3
;
3774 /* Odd part per figure 8; the matrix is unitary and hence its
3775 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3778 tmp0
= (INT32
) wsptr
[7];
3779 tmp1
= (INT32
) wsptr
[5];
3780 tmp2
= (INT32
) wsptr
[3];
3781 tmp3
= (INT32
) wsptr
[1];
3786 z1
= MULTIPLY(z2
+ z3
, FIX_1_175875602
); /* c3 */
3787 z2
= MULTIPLY(z2
, - FIX_1_961570560
); /* -c3-c5 */
3788 z3
= MULTIPLY(z3
, - FIX_0_390180644
); /* -c3+c5 */
3792 z1
= MULTIPLY(tmp0
+ tmp3
, - FIX_0_899976223
); /* -c3+c7 */
3793 tmp0
= MULTIPLY(tmp0
, FIX_0_298631336
); /* -c1+c3+c5-c7 */
3794 tmp3
= MULTIPLY(tmp3
, FIX_1_501321110
); /* c1+c3-c5-c7 */
3798 z1
= MULTIPLY(tmp1
+ tmp2
, - FIX_2_562915447
); /* -c1-c3 */
3799 tmp1
= MULTIPLY(tmp1
, FIX_2_053119869
); /* c1+c3-c5+c7 */
3800 tmp2
= MULTIPLY(tmp2
, FIX_3_072711026
); /* c1+c3+c5-c7 */
3804 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3806 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp3
,
3807 CONST_BITS
+PASS1_BITS
+3)
3809 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp3
,
3810 CONST_BITS
+PASS1_BITS
+3)
3812 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp2
,
3813 CONST_BITS
+PASS1_BITS
+3)
3815 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp2
,
3816 CONST_BITS
+PASS1_BITS
+3)
3818 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp1
,
3819 CONST_BITS
+PASS1_BITS
+3)
3821 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp1
,
3822 CONST_BITS
+PASS1_BITS
+3)
3824 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp13
+ tmp0
,
3825 CONST_BITS
+PASS1_BITS
+3)
3827 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp13
- tmp0
,
3828 CONST_BITS
+PASS1_BITS
+3)
3831 wsptr
+= DCTSIZE
; /* advance pointer to next row */
3837 * Perform dequantization and inverse DCT on one block of coefficients,
3838 * producing a 6x3 output block.
3840 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3844 jpeg_idct_6x3 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
3845 JCOEFPTR coef_block
,
3846 JSAMPARRAY output_buf
, JDIMENSION output_col
)
3848 INT32 tmp0
, tmp1
, tmp2
, tmp10
, tmp11
, tmp12
;
3851 ISLOW_MULT_TYPE
* quantptr
;
3854 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
3856 int workspace
[6*3]; /* buffers data between passes */
3859 /* Pass 1: process columns from input, store into work array.
3860 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3864 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
3866 for (ctr
= 0; ctr
< 6; ctr
++, inptr
++, quantptr
++, wsptr
++) {
3869 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
3870 tmp0
<<= CONST_BITS
;
3871 /* Add fudge factor here for final descale. */
3872 tmp0
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
3873 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
3874 tmp12
= MULTIPLY(tmp2
, FIX(0.707106781)); /* c2 */
3875 tmp10
= tmp0
+ tmp12
;
3876 tmp2
= tmp0
- tmp12
- tmp12
;
3880 tmp12
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
3881 tmp0
= MULTIPLY(tmp12
, FIX(1.224744871)); /* c1 */
3883 /* Final output stage */
3885 wsptr
[6*0] = (int) RIGHT_SHIFT(tmp10
+ tmp0
, CONST_BITS
-PASS1_BITS
);
3886 wsptr
[6*2] = (int) RIGHT_SHIFT(tmp10
- tmp0
, CONST_BITS
-PASS1_BITS
);
3887 wsptr
[6*1] = (int) RIGHT_SHIFT(tmp2
, CONST_BITS
-PASS1_BITS
);
3890 /* Pass 2: process 3 rows from work array, store into output array.
3891 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3895 for (ctr
= 0; ctr
< 3; ctr
++) {
3896 outptr
= output_buf
[ctr
] + output_col
;
3900 /* Add range center and fudge factor for final descale and range-limit. */
3901 tmp0
= (INT32
) wsptr
[0] +
3902 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
3903 (ONE
<< (PASS1_BITS
+2)));
3904 tmp0
<<= CONST_BITS
;
3905 tmp2
= (INT32
) wsptr
[4];
3906 tmp10
= MULTIPLY(tmp2
, FIX(0.707106781)); /* c4 */
3907 tmp1
= tmp0
+ tmp10
;
3908 tmp11
= tmp0
- tmp10
- tmp10
;
3909 tmp10
= (INT32
) wsptr
[2];
3910 tmp0
= MULTIPLY(tmp10
, FIX(1.224744871)); /* c2 */
3911 tmp10
= tmp1
+ tmp0
;
3912 tmp12
= tmp1
- tmp0
;
3916 z1
= (INT32
) wsptr
[1];
3917 z2
= (INT32
) wsptr
[3];
3918 z3
= (INT32
) wsptr
[5];
3919 tmp1
= MULTIPLY(z1
+ z3
, FIX(0.366025404)); /* c5 */
3920 tmp0
= tmp1
+ ((z1
+ z2
) << CONST_BITS
);
3921 tmp2
= tmp1
+ ((z3
- z2
) << CONST_BITS
);
3922 tmp1
= (z1
- z2
- z3
) << CONST_BITS
;
3924 /* Final output stage */
3926 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
3927 CONST_BITS
+PASS1_BITS
+3)
3929 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
3930 CONST_BITS
+PASS1_BITS
+3)
3932 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp1
,
3933 CONST_BITS
+PASS1_BITS
+3)
3935 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp1
,
3936 CONST_BITS
+PASS1_BITS
+3)
3938 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp2
,
3939 CONST_BITS
+PASS1_BITS
+3)
3941 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp2
,
3942 CONST_BITS
+PASS1_BITS
+3)
3945 wsptr
+= 6; /* advance pointer to next row */
3951 * Perform dequantization and inverse DCT on one block of coefficients,
3952 * producing a 4x2 output block.
3954 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
3958 jpeg_idct_4x2 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
3959 JCOEFPTR coef_block
,
3960 JSAMPARRAY output_buf
, JDIMENSION output_col
)
3962 INT32 tmp0
, tmp2
, tmp10
, tmp12
;
3965 ISLOW_MULT_TYPE
* quantptr
;
3968 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
3970 INT32 workspace
[4*2]; /* buffers data between passes */
3973 /* Pass 1: process columns from input, store into work array. */
3976 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
3978 for (ctr
= 0; ctr
< 4; ctr
++, inptr
++, quantptr
++, wsptr
++) {
3981 tmp10
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
3985 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
3987 /* Final output stage */
3989 wsptr
[4*0] = tmp10
+ tmp0
;
3990 wsptr
[4*1] = tmp10
- tmp0
;
3993 /* Pass 2: process 2 rows from work array, store into output array.
3994 * 4-point IDCT kernel,
3995 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3999 for (ctr
= 0; ctr
< 2; ctr
++) {
4000 outptr
= output_buf
[ctr
] + output_col
;
4004 /* Add range center and fudge factor for final descale and range-limit. */
4005 tmp0
= wsptr
[0] + ((((INT32
) RANGE_CENTER
) << 3) + (ONE
<< 2));
4008 tmp10
= (tmp0
+ tmp2
) << CONST_BITS
;
4009 tmp12
= (tmp0
- tmp2
) << CONST_BITS
;
4012 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4017 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
4018 tmp0
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
4019 tmp2
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
4021 /* Final output stage */
4023 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
4026 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
4029 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp2
,
4032 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp2
,
4036 wsptr
+= 4; /* advance pointer to next row */
4042 * Perform dequantization and inverse DCT on one block of coefficients,
4043 * producing a 2x1 output block.
4045 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4049 jpeg_idct_2x1 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
4050 JCOEFPTR coef_block
,
4051 JSAMPARRAY output_buf
, JDIMENSION output_col
)
4054 ISLOW_MULT_TYPE
* quantptr
;
4056 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
4059 /* Pass 1: empty. */
4061 /* Pass 2: process 1 row from input, store into output array. */
4063 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
4064 outptr
= output_buf
[0] + output_col
;
4068 tmp0
= DEQUANTIZE(coef_block
[0], quantptr
[0]);
4069 /* Add range center and fudge factor for final descale and range-limit. */
4070 tmp0
+= (((DCTELEM
) RANGE_CENTER
) << 3) + (1 << 2);
4074 tmp1
= DEQUANTIZE(coef_block
[1], quantptr
[1]);
4076 /* Final output stage */
4078 outptr
[0] = range_limit
[(int) IRIGHT_SHIFT(tmp0
+ tmp1
, 3) & RANGE_MASK
];
4079 outptr
[1] = range_limit
[(int) IRIGHT_SHIFT(tmp0
- tmp1
, 3) & RANGE_MASK
];
4084 * Perform dequantization and inverse DCT on one block of coefficients,
4085 * producing an 8x16 output block.
4087 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4091 jpeg_idct_8x16 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
4092 JCOEFPTR coef_block
,
4093 JSAMPARRAY output_buf
, JDIMENSION output_col
)
4095 INT32 tmp0
, tmp1
, tmp2
, tmp3
, tmp10
, tmp11
, tmp12
, tmp13
;
4096 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
, tmp26
, tmp27
;
4097 INT32 z1
, z2
, z3
, z4
;
4099 ISLOW_MULT_TYPE
* quantptr
;
4102 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
4104 int workspace
[8*16]; /* buffers data between passes */
4107 /* Pass 1: process columns from input, store into work array.
4108 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4112 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
4114 for (ctr
= 0; ctr
< 8; ctr
++, inptr
++, quantptr
++, wsptr
++) {
4117 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
4118 tmp0
<<= CONST_BITS
;
4119 /* Add fudge factor here for final descale. */
4120 tmp0
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
4122 z1
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
4123 tmp1
= MULTIPLY(z1
, FIX(1.306562965)); /* c4[16] = c2[8] */
4124 tmp2
= MULTIPLY(z1
, FIX_0_541196100
); /* c12[16] = c6[8] */
4126 tmp10
= tmp0
+ tmp1
;
4127 tmp11
= tmp0
- tmp1
;
4128 tmp12
= tmp0
+ tmp2
;
4129 tmp13
= tmp0
- tmp2
;
4131 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
4132 z2
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
4134 z4
= MULTIPLY(z3
, FIX(0.275899379)); /* c14[16] = c7[8] */
4135 z3
= MULTIPLY(z3
, FIX(1.387039845)); /* c2[16] = c1[8] */
4137 tmp0
= z3
+ MULTIPLY(z2
, FIX_2_562915447
); /* (c6+c2)[16] = (c3+c1)[8] */
4138 tmp1
= z4
+ MULTIPLY(z1
, FIX_0_899976223
); /* (c6-c14)[16] = (c3-c7)[8] */
4139 tmp2
= z3
- MULTIPLY(z1
, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4140 tmp3
= z4
- MULTIPLY(z2
, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4142 tmp20
= tmp10
+ tmp0
;
4143 tmp27
= tmp10
- tmp0
;
4144 tmp21
= tmp12
+ tmp1
;
4145 tmp26
= tmp12
- tmp1
;
4146 tmp22
= tmp13
+ tmp2
;
4147 tmp25
= tmp13
- tmp2
;
4148 tmp23
= tmp11
+ tmp3
;
4149 tmp24
= tmp11
- tmp3
;
4153 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
4154 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
4155 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
4156 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
4160 tmp1
= MULTIPLY(z1
+ z2
, FIX(1.353318001)); /* c3 */
4161 tmp2
= MULTIPLY(tmp11
, FIX(1.247225013)); /* c5 */
4162 tmp3
= MULTIPLY(z1
+ z4
, FIX(1.093201867)); /* c7 */
4163 tmp10
= MULTIPLY(z1
- z4
, FIX(0.897167586)); /* c9 */
4164 tmp11
= MULTIPLY(tmp11
, FIX(0.666655658)); /* c11 */
4165 tmp12
= MULTIPLY(z1
- z2
, FIX(0.410524528)); /* c13 */
4166 tmp0
= tmp1
+ tmp2
+ tmp3
-
4167 MULTIPLY(z1
, FIX(2.286341144)); /* c7+c5+c3-c1 */
4168 tmp13
= tmp10
+ tmp11
+ tmp12
-
4169 MULTIPLY(z1
, FIX(1.835730603)); /* c9+c11+c13-c15 */
4170 z1
= MULTIPLY(z2
+ z3
, FIX(0.138617169)); /* c15 */
4171 tmp1
+= z1
+ MULTIPLY(z2
, FIX(0.071888074)); /* c9+c11-c3-c15 */
4172 tmp2
+= z1
- MULTIPLY(z3
, FIX(1.125726048)); /* c5+c7+c15-c3 */
4173 z1
= MULTIPLY(z3
- z2
, FIX(1.407403738)); /* c1 */
4174 tmp11
+= z1
- MULTIPLY(z3
, FIX(0.766367282)); /* c1+c11-c9-c13 */
4175 tmp12
+= z1
+ MULTIPLY(z2
, FIX(1.971951411)); /* c1+c5+c13-c7 */
4177 z1
= MULTIPLY(z2
, - FIX(0.666655658)); /* -c11 */
4179 tmp3
+= z1
+ MULTIPLY(z4
, FIX(1.065388962)); /* c3+c11+c15-c7 */
4180 z2
= MULTIPLY(z2
, - FIX(1.247225013)); /* -c5 */
4181 tmp10
+= z2
+ MULTIPLY(z4
, FIX(3.141271809)); /* c1+c5+c9-c13 */
4183 z2
= MULTIPLY(z3
+ z4
, - FIX(1.353318001)); /* -c3 */
4186 z2
= MULTIPLY(z4
- z3
, FIX(0.410524528)); /* c13 */
4190 /* Final output stage */
4192 wsptr
[8*0] = (int) RIGHT_SHIFT(tmp20
+ tmp0
, CONST_BITS
-PASS1_BITS
);
4193 wsptr
[8*15] = (int) RIGHT_SHIFT(tmp20
- tmp0
, CONST_BITS
-PASS1_BITS
);
4194 wsptr
[8*1] = (int) RIGHT_SHIFT(tmp21
+ tmp1
, CONST_BITS
-PASS1_BITS
);
4195 wsptr
[8*14] = (int) RIGHT_SHIFT(tmp21
- tmp1
, CONST_BITS
-PASS1_BITS
);
4196 wsptr
[8*2] = (int) RIGHT_SHIFT(tmp22
+ tmp2
, CONST_BITS
-PASS1_BITS
);
4197 wsptr
[8*13] = (int) RIGHT_SHIFT(tmp22
- tmp2
, CONST_BITS
-PASS1_BITS
);
4198 wsptr
[8*3] = (int) RIGHT_SHIFT(tmp23
+ tmp3
, CONST_BITS
-PASS1_BITS
);
4199 wsptr
[8*12] = (int) RIGHT_SHIFT(tmp23
- tmp3
, CONST_BITS
-PASS1_BITS
);
4200 wsptr
[8*4] = (int) RIGHT_SHIFT(tmp24
+ tmp10
, CONST_BITS
-PASS1_BITS
);
4201 wsptr
[8*11] = (int) RIGHT_SHIFT(tmp24
- tmp10
, CONST_BITS
-PASS1_BITS
);
4202 wsptr
[8*5] = (int) RIGHT_SHIFT(tmp25
+ tmp11
, CONST_BITS
-PASS1_BITS
);
4203 wsptr
[8*10] = (int) RIGHT_SHIFT(tmp25
- tmp11
, CONST_BITS
-PASS1_BITS
);
4204 wsptr
[8*6] = (int) RIGHT_SHIFT(tmp26
+ tmp12
, CONST_BITS
-PASS1_BITS
);
4205 wsptr
[8*9] = (int) RIGHT_SHIFT(tmp26
- tmp12
, CONST_BITS
-PASS1_BITS
);
4206 wsptr
[8*7] = (int) RIGHT_SHIFT(tmp27
+ tmp13
, CONST_BITS
-PASS1_BITS
);
4207 wsptr
[8*8] = (int) RIGHT_SHIFT(tmp27
- tmp13
, CONST_BITS
-PASS1_BITS
);
4210 /* Pass 2: process rows from work array, store into output array.
4211 * Note that we must descale the results by a factor of 8 == 2**3,
4212 * and also undo the PASS1_BITS scaling.
4213 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4217 for (ctr
= 0; ctr
< 16; ctr
++) {
4218 outptr
= output_buf
[ctr
] + output_col
;
4220 /* Even part: reverse the even part of the forward DCT.
4221 * The rotator is c(-6).
4224 /* Add range center and fudge factor for final descale and range-limit. */
4225 z2
= (INT32
) wsptr
[0] +
4226 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
4227 (ONE
<< (PASS1_BITS
+2)));
4228 z3
= (INT32
) wsptr
[4];
4230 tmp0
= (z2
+ z3
) << CONST_BITS
;
4231 tmp1
= (z2
- z3
) << CONST_BITS
;
4233 z2
= (INT32
) wsptr
[2];
4234 z3
= (INT32
) wsptr
[6];
4236 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
4237 tmp2
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
4238 tmp3
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
4240 tmp10
= tmp0
+ tmp2
;
4241 tmp13
= tmp0
- tmp2
;
4242 tmp11
= tmp1
+ tmp3
;
4243 tmp12
= tmp1
- tmp3
;
4245 /* Odd part per figure 8; the matrix is unitary and hence its
4246 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4249 tmp0
= (INT32
) wsptr
[7];
4250 tmp1
= (INT32
) wsptr
[5];
4251 tmp2
= (INT32
) wsptr
[3];
4252 tmp3
= (INT32
) wsptr
[1];
4257 z1
= MULTIPLY(z2
+ z3
, FIX_1_175875602
); /* c3 */
4258 z2
= MULTIPLY(z2
, - FIX_1_961570560
); /* -c3-c5 */
4259 z3
= MULTIPLY(z3
, - FIX_0_390180644
); /* -c3+c5 */
4263 z1
= MULTIPLY(tmp0
+ tmp3
, - FIX_0_899976223
); /* -c3+c7 */
4264 tmp0
= MULTIPLY(tmp0
, FIX_0_298631336
); /* -c1+c3+c5-c7 */
4265 tmp3
= MULTIPLY(tmp3
, FIX_1_501321110
); /* c1+c3-c5-c7 */
4269 z1
= MULTIPLY(tmp1
+ tmp2
, - FIX_2_562915447
); /* -c1-c3 */
4270 tmp1
= MULTIPLY(tmp1
, FIX_2_053119869
); /* c1+c3-c5+c7 */
4271 tmp2
= MULTIPLY(tmp2
, FIX_3_072711026
); /* c1+c3+c5-c7 */
4275 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4277 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp3
,
4278 CONST_BITS
+PASS1_BITS
+3)
4280 outptr
[7] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp3
,
4281 CONST_BITS
+PASS1_BITS
+3)
4283 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp2
,
4284 CONST_BITS
+PASS1_BITS
+3)
4286 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp2
,
4287 CONST_BITS
+PASS1_BITS
+3)
4289 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp1
,
4290 CONST_BITS
+PASS1_BITS
+3)
4292 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp1
,
4293 CONST_BITS
+PASS1_BITS
+3)
4295 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp13
+ tmp0
,
4296 CONST_BITS
+PASS1_BITS
+3)
4298 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp13
- tmp0
,
4299 CONST_BITS
+PASS1_BITS
+3)
4302 wsptr
+= DCTSIZE
; /* advance pointer to next row */
4308 * Perform dequantization and inverse DCT on one block of coefficients,
4309 * producing a 7x14 output block.
4311 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4315 jpeg_idct_7x14 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
4316 JCOEFPTR coef_block
,
4317 JSAMPARRAY output_buf
, JDIMENSION output_col
)
4319 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
, tmp15
, tmp16
;
4320 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
, tmp26
;
4321 INT32 z1
, z2
, z3
, z4
;
4323 ISLOW_MULT_TYPE
* quantptr
;
4326 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
4328 int workspace
[7*14]; /* buffers data between passes */
4331 /* Pass 1: process columns from input, store into work array.
4332 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4336 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
4338 for (ctr
= 0; ctr
< 7; ctr
++, inptr
++, quantptr
++, wsptr
++) {
4341 z1
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
4343 /* Add fudge factor here for final descale. */
4344 z1
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
4345 z4
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
4346 z2
= MULTIPLY(z4
, FIX(1.274162392)); /* c4 */
4347 z3
= MULTIPLY(z4
, FIX(0.314692123)); /* c12 */
4348 z4
= MULTIPLY(z4
, FIX(0.881747734)); /* c8 */
4354 tmp23
= RIGHT_SHIFT(z1
- ((z2
+ z3
- z4
) << 1), /* c0 = (c4+c12-c8)*2 */
4355 CONST_BITS
-PASS1_BITS
);
4357 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
4358 z2
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
4360 z3
= MULTIPLY(z1
+ z2
, FIX(1.105676686)); /* c6 */
4362 tmp13
= z3
+ MULTIPLY(z1
, FIX(0.273079590)); /* c2-c6 */
4363 tmp14
= z3
- MULTIPLY(z2
, FIX(1.719280954)); /* c6+c10 */
4364 tmp15
= MULTIPLY(z1
, FIX(0.613604268)) - /* c10 */
4365 MULTIPLY(z2
, FIX(1.378756276)); /* c2 */
4367 tmp20
= tmp10
+ tmp13
;
4368 tmp26
= tmp10
- tmp13
;
4369 tmp21
= tmp11
+ tmp14
;
4370 tmp25
= tmp11
- tmp14
;
4371 tmp22
= tmp12
+ tmp15
;
4372 tmp24
= tmp12
- tmp15
;
4376 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
4377 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
4378 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
4379 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
4380 tmp13
= z4
<< CONST_BITS
;
4383 tmp11
= MULTIPLY(z1
+ z2
, FIX(1.334852607)); /* c3 */
4384 tmp12
= MULTIPLY(tmp14
, FIX(1.197448846)); /* c5 */
4385 tmp10
= tmp11
+ tmp12
+ tmp13
- MULTIPLY(z1
, FIX(1.126980169)); /* c3+c5-c1 */
4386 tmp14
= MULTIPLY(tmp14
, FIX(0.752406978)); /* c9 */
4387 tmp16
= tmp14
- MULTIPLY(z1
, FIX(1.061150426)); /* c9+c11-c13 */
4389 tmp15
= MULTIPLY(z1
, FIX(0.467085129)) - tmp13
; /* c11 */
4392 z4
= MULTIPLY(z2
+ z3
, - FIX(0.158341681)) - tmp13
; /* -c13 */
4393 tmp11
+= z4
- MULTIPLY(z2
, FIX(0.424103948)); /* c3-c9-c13 */
4394 tmp12
+= z4
- MULTIPLY(z3
, FIX(2.373959773)); /* c3+c5-c13 */
4395 z4
= MULTIPLY(z3
- z2
, FIX(1.405321284)); /* c1 */
4396 tmp14
+= z4
+ tmp13
- MULTIPLY(z3
, FIX(1.6906431334)); /* c1+c9-c11 */
4397 tmp15
+= z4
+ MULTIPLY(z2
, FIX(0.674957567)); /* c1+c11-c5 */
4399 tmp13
= (z1
- z3
) << PASS1_BITS
;
4401 /* Final output stage */
4403 wsptr
[7*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
4404 wsptr
[7*13] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
4405 wsptr
[7*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
4406 wsptr
[7*12] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
4407 wsptr
[7*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
4408 wsptr
[7*11] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
4409 wsptr
[7*3] = (int) (tmp23
+ tmp13
);
4410 wsptr
[7*10] = (int) (tmp23
- tmp13
);
4411 wsptr
[7*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
4412 wsptr
[7*9] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
4413 wsptr
[7*5] = (int) RIGHT_SHIFT(tmp25
+ tmp15
, CONST_BITS
-PASS1_BITS
);
4414 wsptr
[7*8] = (int) RIGHT_SHIFT(tmp25
- tmp15
, CONST_BITS
-PASS1_BITS
);
4415 wsptr
[7*6] = (int) RIGHT_SHIFT(tmp26
+ tmp16
, CONST_BITS
-PASS1_BITS
);
4416 wsptr
[7*7] = (int) RIGHT_SHIFT(tmp26
- tmp16
, CONST_BITS
-PASS1_BITS
);
4419 /* Pass 2: process 14 rows from work array, store into output array.
4420 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4424 for (ctr
= 0; ctr
< 14; ctr
++) {
4425 outptr
= output_buf
[ctr
] + output_col
;
4429 /* Add range center and fudge factor for final descale and range-limit. */
4430 tmp23
= (INT32
) wsptr
[0] +
4431 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
4432 (ONE
<< (PASS1_BITS
+2)));
4433 tmp23
<<= CONST_BITS
;
4435 z1
= (INT32
) wsptr
[2];
4436 z2
= (INT32
) wsptr
[4];
4437 z3
= (INT32
) wsptr
[6];
4439 tmp20
= MULTIPLY(z2
- z3
, FIX(0.881747734)); /* c4 */
4440 tmp22
= MULTIPLY(z1
- z2
, FIX(0.314692123)); /* c6 */
4441 tmp21
= tmp20
+ tmp22
+ tmp23
- MULTIPLY(z2
, FIX(1.841218003)); /* c2+c4-c6 */
4444 tmp10
= MULTIPLY(tmp10
, FIX(1.274162392)) + tmp23
; /* c2 */
4445 tmp20
+= tmp10
- MULTIPLY(z3
, FIX(0.077722536)); /* c2-c4-c6 */
4446 tmp22
+= tmp10
- MULTIPLY(z1
, FIX(2.470602249)); /* c2+c4+c6 */
4447 tmp23
+= MULTIPLY(z2
, FIX(1.414213562)); /* c0 */
4451 z1
= (INT32
) wsptr
[1];
4452 z2
= (INT32
) wsptr
[3];
4453 z3
= (INT32
) wsptr
[5];
4455 tmp11
= MULTIPLY(z1
+ z2
, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4456 tmp12
= MULTIPLY(z1
- z2
, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4457 tmp10
= tmp11
- tmp12
;
4459 tmp12
= MULTIPLY(z2
+ z3
, - FIX(1.378756276)); /* -c1 */
4461 z2
= MULTIPLY(z1
+ z3
, FIX(0.613604268)); /* c5 */
4463 tmp12
+= z2
+ MULTIPLY(z3
, FIX(1.870828693)); /* c3+c1-c5 */
4465 /* Final output stage */
4467 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
4468 CONST_BITS
+PASS1_BITS
+3)
4470 outptr
[6] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
4471 CONST_BITS
+PASS1_BITS
+3)
4473 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
4474 CONST_BITS
+PASS1_BITS
+3)
4476 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
4477 CONST_BITS
+PASS1_BITS
+3)
4479 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
4480 CONST_BITS
+PASS1_BITS
+3)
4482 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
4483 CONST_BITS
+PASS1_BITS
+3)
4485 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp23
,
4486 CONST_BITS
+PASS1_BITS
+3)
4489 wsptr
+= 7; /* advance pointer to next row */
4495 * Perform dequantization and inverse DCT on one block of coefficients,
4496 * producing a 6x12 output block.
4498 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4502 jpeg_idct_6x12 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
4503 JCOEFPTR coef_block
,
4504 JSAMPARRAY output_buf
, JDIMENSION output_col
)
4506 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
, tmp15
;
4507 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
, tmp25
;
4508 INT32 z1
, z2
, z3
, z4
;
4510 ISLOW_MULT_TYPE
* quantptr
;
4513 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
4515 int workspace
[6*12]; /* buffers data between passes */
4518 /* Pass 1: process columns from input, store into work array.
4519 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4523 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
4525 for (ctr
= 0; ctr
< 6; ctr
++, inptr
++, quantptr
++, wsptr
++) {
4528 z3
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
4530 /* Add fudge factor here for final descale. */
4531 z3
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
4533 z4
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
4534 z4
= MULTIPLY(z4
, FIX(1.224744871)); /* c4 */
4539 z1
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
4540 z4
= MULTIPLY(z1
, FIX(1.366025404)); /* c2 */
4542 z2
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
4552 tmp20
= tmp10
+ tmp12
;
4553 tmp25
= tmp10
- tmp12
;
4555 tmp12
= z4
- z1
- z2
;
4557 tmp22
= tmp11
+ tmp12
;
4558 tmp23
= tmp11
- tmp12
;
4562 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
4563 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
4564 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
4565 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
4567 tmp11
= MULTIPLY(z2
, FIX(1.306562965)); /* c3 */
4568 tmp14
= MULTIPLY(z2
, - FIX_0_541196100
); /* -c9 */
4571 tmp15
= MULTIPLY(tmp10
+ z4
, FIX(0.860918669)); /* c7 */
4572 tmp12
= tmp15
+ MULTIPLY(tmp10
, FIX(0.261052384)); /* c5-c7 */
4573 tmp10
= tmp12
+ tmp11
+ MULTIPLY(z1
, FIX(0.280143716)); /* c1-c5 */
4574 tmp13
= MULTIPLY(z3
+ z4
, - FIX(1.045510580)); /* -(c7+c11) */
4575 tmp12
+= tmp13
+ tmp14
- MULTIPLY(z3
, FIX(1.478575242)); /* c1+c5-c7-c11 */
4576 tmp13
+= tmp15
- tmp11
+ MULTIPLY(z4
, FIX(1.586706681)); /* c1+c11 */
4577 tmp15
+= tmp14
- MULTIPLY(z1
, FIX(0.676326758)) - /* c7-c11 */
4578 MULTIPLY(z4
, FIX(1.982889723)); /* c5+c7 */
4582 z3
= MULTIPLY(z1
+ z2
, FIX_0_541196100
); /* c9 */
4583 tmp11
= z3
+ MULTIPLY(z1
, FIX_0_765366865
); /* c3-c9 */
4584 tmp14
= z3
- MULTIPLY(z2
, FIX_1_847759065
); /* c3+c9 */
4586 /* Final output stage */
4588 wsptr
[6*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
4589 wsptr
[6*11] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
4590 wsptr
[6*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
4591 wsptr
[6*10] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
4592 wsptr
[6*2] = (int) RIGHT_SHIFT(tmp22
+ tmp12
, CONST_BITS
-PASS1_BITS
);
4593 wsptr
[6*9] = (int) RIGHT_SHIFT(tmp22
- tmp12
, CONST_BITS
-PASS1_BITS
);
4594 wsptr
[6*3] = (int) RIGHT_SHIFT(tmp23
+ tmp13
, CONST_BITS
-PASS1_BITS
);
4595 wsptr
[6*8] = (int) RIGHT_SHIFT(tmp23
- tmp13
, CONST_BITS
-PASS1_BITS
);
4596 wsptr
[6*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
4597 wsptr
[6*7] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
4598 wsptr
[6*5] = (int) RIGHT_SHIFT(tmp25
+ tmp15
, CONST_BITS
-PASS1_BITS
);
4599 wsptr
[6*6] = (int) RIGHT_SHIFT(tmp25
- tmp15
, CONST_BITS
-PASS1_BITS
);
4602 /* Pass 2: process 12 rows from work array, store into output array.
4603 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4607 for (ctr
= 0; ctr
< 12; ctr
++) {
4608 outptr
= output_buf
[ctr
] + output_col
;
4612 /* Add range center and fudge factor for final descale and range-limit. */
4613 tmp10
= (INT32
) wsptr
[0] +
4614 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
4615 (ONE
<< (PASS1_BITS
+2)));
4616 tmp10
<<= CONST_BITS
;
4617 tmp12
= (INT32
) wsptr
[4];
4618 tmp20
= MULTIPLY(tmp12
, FIX(0.707106781)); /* c4 */
4619 tmp11
= tmp10
+ tmp20
;
4620 tmp21
= tmp10
- tmp20
- tmp20
;
4621 tmp20
= (INT32
) wsptr
[2];
4622 tmp10
= MULTIPLY(tmp20
, FIX(1.224744871)); /* c2 */
4623 tmp20
= tmp11
+ tmp10
;
4624 tmp22
= tmp11
- tmp10
;
4628 z1
= (INT32
) wsptr
[1];
4629 z2
= (INT32
) wsptr
[3];
4630 z3
= (INT32
) wsptr
[5];
4631 tmp11
= MULTIPLY(z1
+ z3
, FIX(0.366025404)); /* c5 */
4632 tmp10
= tmp11
+ ((z1
+ z2
) << CONST_BITS
);
4633 tmp12
= tmp11
+ ((z3
- z2
) << CONST_BITS
);
4634 tmp11
= (z1
- z2
- z3
) << CONST_BITS
;
4636 /* Final output stage */
4638 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp20
+ tmp10
,
4639 CONST_BITS
+PASS1_BITS
+3)
4641 outptr
[5] = range_limit
[(int) RIGHT_SHIFT(tmp20
- tmp10
,
4642 CONST_BITS
+PASS1_BITS
+3)
4644 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp21
+ tmp11
,
4645 CONST_BITS
+PASS1_BITS
+3)
4647 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp21
- tmp11
,
4648 CONST_BITS
+PASS1_BITS
+3)
4650 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp22
+ tmp12
,
4651 CONST_BITS
+PASS1_BITS
+3)
4653 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp22
- tmp12
,
4654 CONST_BITS
+PASS1_BITS
+3)
4657 wsptr
+= 6; /* advance pointer to next row */
4663 * Perform dequantization and inverse DCT on one block of coefficients,
4664 * producing a 5x10 output block.
4666 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4670 jpeg_idct_5x10 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
4671 JCOEFPTR coef_block
,
4672 JSAMPARRAY output_buf
, JDIMENSION output_col
)
4674 INT32 tmp10
, tmp11
, tmp12
, tmp13
, tmp14
;
4675 INT32 tmp20
, tmp21
, tmp22
, tmp23
, tmp24
;
4676 INT32 z1
, z2
, z3
, z4
, z5
;
4678 ISLOW_MULT_TYPE
* quantptr
;
4681 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
4683 int workspace
[5*10]; /* buffers data between passes */
4686 /* Pass 1: process columns from input, store into work array.
4687 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4691 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
4693 for (ctr
= 0; ctr
< 5; ctr
++, inptr
++, quantptr
++, wsptr
++) {
4696 z3
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
4698 /* Add fudge factor here for final descale. */
4699 z3
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
4700 z4
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
4701 z1
= MULTIPLY(z4
, FIX(1.144122806)); /* c4 */
4702 z2
= MULTIPLY(z4
, FIX(0.437016024)); /* c8 */
4706 tmp22
= RIGHT_SHIFT(z3
- ((z1
- z2
) << 1), /* c0 = (c4-c8)*2 */
4707 CONST_BITS
-PASS1_BITS
);
4709 z2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
4710 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
4712 z1
= MULTIPLY(z2
+ z3
, FIX(0.831253876)); /* c6 */
4713 tmp12
= z1
+ MULTIPLY(z2
, FIX(0.513743148)); /* c2-c6 */
4714 tmp13
= z1
- MULTIPLY(z3
, FIX(2.176250899)); /* c2+c6 */
4716 tmp20
= tmp10
+ tmp12
;
4717 tmp24
= tmp10
- tmp12
;
4718 tmp21
= tmp11
+ tmp13
;
4719 tmp23
= tmp11
- tmp13
;
4723 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
4724 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
4725 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
4726 z4
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
4731 tmp12
= MULTIPLY(tmp13
, FIX(0.309016994)); /* (c3-c7)/2 */
4732 z5
= z3
<< CONST_BITS
;
4734 z2
= MULTIPLY(tmp11
, FIX(0.951056516)); /* (c3+c7)/2 */
4737 tmp10
= MULTIPLY(z1
, FIX(1.396802247)) + z2
+ z4
; /* c1 */
4738 tmp14
= MULTIPLY(z1
, FIX(0.221231742)) - z2
+ z4
; /* c9 */
4740 z2
= MULTIPLY(tmp11
, FIX(0.587785252)); /* (c1-c9)/2 */
4741 z4
= z5
- tmp12
- (tmp13
<< (CONST_BITS
- 1));
4743 tmp12
= (z1
- tmp13
- z3
) << PASS1_BITS
;
4745 tmp11
= MULTIPLY(z1
, FIX(1.260073511)) - z2
- z4
; /* c3 */
4746 tmp13
= MULTIPLY(z1
, FIX(0.642039522)) - z2
+ z4
; /* c7 */
4748 /* Final output stage */
4750 wsptr
[5*0] = (int) RIGHT_SHIFT(tmp20
+ tmp10
, CONST_BITS
-PASS1_BITS
);
4751 wsptr
[5*9] = (int) RIGHT_SHIFT(tmp20
- tmp10
, CONST_BITS
-PASS1_BITS
);
4752 wsptr
[5*1] = (int) RIGHT_SHIFT(tmp21
+ tmp11
, CONST_BITS
-PASS1_BITS
);
4753 wsptr
[5*8] = (int) RIGHT_SHIFT(tmp21
- tmp11
, CONST_BITS
-PASS1_BITS
);
4754 wsptr
[5*2] = (int) (tmp22
+ tmp12
);
4755 wsptr
[5*7] = (int) (tmp22
- tmp12
);
4756 wsptr
[5*3] = (int) RIGHT_SHIFT(tmp23
+ tmp13
, CONST_BITS
-PASS1_BITS
);
4757 wsptr
[5*6] = (int) RIGHT_SHIFT(tmp23
- tmp13
, CONST_BITS
-PASS1_BITS
);
4758 wsptr
[5*4] = (int) RIGHT_SHIFT(tmp24
+ tmp14
, CONST_BITS
-PASS1_BITS
);
4759 wsptr
[5*5] = (int) RIGHT_SHIFT(tmp24
- tmp14
, CONST_BITS
-PASS1_BITS
);
4762 /* Pass 2: process 10 rows from work array, store into output array.
4763 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4767 for (ctr
= 0; ctr
< 10; ctr
++) {
4768 outptr
= output_buf
[ctr
] + output_col
;
4772 /* Add range center and fudge factor for final descale and range-limit. */
4773 tmp12
= (INT32
) wsptr
[0] +
4774 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
4775 (ONE
<< (PASS1_BITS
+2)));
4776 tmp12
<<= CONST_BITS
;
4777 tmp13
= (INT32
) wsptr
[2];
4778 tmp14
= (INT32
) wsptr
[4];
4779 z1
= MULTIPLY(tmp13
+ tmp14
, FIX(0.790569415)); /* (c2+c4)/2 */
4780 z2
= MULTIPLY(tmp13
- tmp14
, FIX(0.353553391)); /* (c2-c4)/2 */
4788 z2
= (INT32
) wsptr
[1];
4789 z3
= (INT32
) wsptr
[3];
4791 z1
= MULTIPLY(z2
+ z3
, FIX(0.831253876)); /* c3 */
4792 tmp13
= z1
+ MULTIPLY(z2
, FIX(0.513743148)); /* c1-c3 */
4793 tmp14
= z1
- MULTIPLY(z3
, FIX(2.176250899)); /* c1+c3 */
4795 /* Final output stage */
4797 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp13
,
4798 CONST_BITS
+PASS1_BITS
+3)
4800 outptr
[4] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp13
,
4801 CONST_BITS
+PASS1_BITS
+3)
4803 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp11
+ tmp14
,
4804 CONST_BITS
+PASS1_BITS
+3)
4806 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp11
- tmp14
,
4807 CONST_BITS
+PASS1_BITS
+3)
4809 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
,
4810 CONST_BITS
+PASS1_BITS
+3)
4813 wsptr
+= 5; /* advance pointer to next row */
4819 * Perform dequantization and inverse DCT on one block of coefficients,
4820 * producing a 4x8 output block.
4822 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4826 jpeg_idct_4x8 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
4827 JCOEFPTR coef_block
,
4828 JSAMPARRAY output_buf
, JDIMENSION output_col
)
4830 INT32 tmp0
, tmp1
, tmp2
, tmp3
;
4831 INT32 tmp10
, tmp11
, tmp12
, tmp13
;
4834 ISLOW_MULT_TYPE
* quantptr
;
4837 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
4839 int workspace
[4*8]; /* buffers data between passes */
4842 /* Pass 1: process columns from input, store into work array.
4843 * Note results are scaled up by sqrt(8) compared to a true IDCT;
4844 * furthermore, we scale the results by 2**PASS1_BITS.
4845 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4849 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
4851 for (ctr
= 4; ctr
> 0; ctr
--) {
4852 /* Due to quantization, we will usually find that many of the input
4853 * coefficients are zero, especially the AC terms. We can exploit this
4854 * by short-circuiting the IDCT calculation for any column in which all
4855 * the AC terms are zero. In that case each output is equal to the
4856 * DC coefficient (with scale factor as needed).
4857 * With typical images and quantization tables, half or more of the
4858 * column DCT calculations can be simplified this way.
4861 if (inptr
[DCTSIZE
*1] == 0 && inptr
[DCTSIZE
*2] == 0 &&
4862 inptr
[DCTSIZE
*3] == 0 && inptr
[DCTSIZE
*4] == 0 &&
4863 inptr
[DCTSIZE
*5] == 0 && inptr
[DCTSIZE
*6] == 0 &&
4864 inptr
[DCTSIZE
*7] == 0) {
4865 /* AC terms all zero */
4866 int dcval
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]) << PASS1_BITS
;
4877 inptr
++; /* advance pointers to next column */
4883 /* Even part: reverse the even part of the forward DCT.
4884 * The rotator is c(-6).
4887 z2
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
4888 z3
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
4891 /* Add fudge factor here for final descale. */
4892 z2
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
4897 z2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
4898 z3
= DEQUANTIZE(inptr
[DCTSIZE
*6], quantptr
[DCTSIZE
*6]);
4900 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
4901 tmp2
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
4902 tmp3
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
4904 tmp10
= tmp0
+ tmp2
;
4905 tmp13
= tmp0
- tmp2
;
4906 tmp11
= tmp1
+ tmp3
;
4907 tmp12
= tmp1
- tmp3
;
4909 /* Odd part per figure 8; the matrix is unitary and hence its
4910 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4913 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*7], quantptr
[DCTSIZE
*7]);
4914 tmp1
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
4915 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
4916 tmp3
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
4921 z1
= MULTIPLY(z2
+ z3
, FIX_1_175875602
); /* c3 */
4922 z2
= MULTIPLY(z2
, - FIX_1_961570560
); /* -c3-c5 */
4923 z3
= MULTIPLY(z3
, - FIX_0_390180644
); /* -c3+c5 */
4927 z1
= MULTIPLY(tmp0
+ tmp3
, - FIX_0_899976223
); /* -c3+c7 */
4928 tmp0
= MULTIPLY(tmp0
, FIX_0_298631336
); /* -c1+c3+c5-c7 */
4929 tmp3
= MULTIPLY(tmp3
, FIX_1_501321110
); /* c1+c3-c5-c7 */
4933 z1
= MULTIPLY(tmp1
+ tmp2
, - FIX_2_562915447
); /* -c1-c3 */
4934 tmp1
= MULTIPLY(tmp1
, FIX_2_053119869
); /* c1+c3-c5+c7 */
4935 tmp2
= MULTIPLY(tmp2
, FIX_3_072711026
); /* c1+c3+c5-c7 */
4939 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4941 wsptr
[4*0] = (int) RIGHT_SHIFT(tmp10
+ tmp3
, CONST_BITS
-PASS1_BITS
);
4942 wsptr
[4*7] = (int) RIGHT_SHIFT(tmp10
- tmp3
, CONST_BITS
-PASS1_BITS
);
4943 wsptr
[4*1] = (int) RIGHT_SHIFT(tmp11
+ tmp2
, CONST_BITS
-PASS1_BITS
);
4944 wsptr
[4*6] = (int) RIGHT_SHIFT(tmp11
- tmp2
, CONST_BITS
-PASS1_BITS
);
4945 wsptr
[4*2] = (int) RIGHT_SHIFT(tmp12
+ tmp1
, CONST_BITS
-PASS1_BITS
);
4946 wsptr
[4*5] = (int) RIGHT_SHIFT(tmp12
- tmp1
, CONST_BITS
-PASS1_BITS
);
4947 wsptr
[4*3] = (int) RIGHT_SHIFT(tmp13
+ tmp0
, CONST_BITS
-PASS1_BITS
);
4948 wsptr
[4*4] = (int) RIGHT_SHIFT(tmp13
- tmp0
, CONST_BITS
-PASS1_BITS
);
4950 inptr
++; /* advance pointers to next column */
4955 /* Pass 2: process 8 rows from work array, store into output array.
4956 * 4-point IDCT kernel,
4957 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4961 for (ctr
= 0; ctr
< 8; ctr
++) {
4962 outptr
= output_buf
[ctr
] + output_col
;
4966 /* Add range center and fudge factor for final descale and range-limit. */
4967 tmp0
= (INT32
) wsptr
[0] +
4968 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
4969 (ONE
<< (PASS1_BITS
+2)));
4970 tmp2
= (INT32
) wsptr
[2];
4972 tmp10
= (tmp0
+ tmp2
) << CONST_BITS
;
4973 tmp12
= (tmp0
- tmp2
) << CONST_BITS
;
4976 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4978 z2
= (INT32
) wsptr
[1];
4979 z3
= (INT32
) wsptr
[3];
4981 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
4982 tmp0
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
4983 tmp2
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
4985 /* Final output stage */
4987 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
4988 CONST_BITS
+PASS1_BITS
+3)
4990 outptr
[3] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
4991 CONST_BITS
+PASS1_BITS
+3)
4993 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp12
+ tmp2
,
4994 CONST_BITS
+PASS1_BITS
+3)
4996 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp12
- tmp2
,
4997 CONST_BITS
+PASS1_BITS
+3)
5000 wsptr
+= 4; /* advance pointer to next row */
5006 * Perform dequantization and inverse DCT on one block of coefficients,
5007 * producing a 3x6 output block.
5009 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5013 jpeg_idct_3x6 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
5014 JCOEFPTR coef_block
,
5015 JSAMPARRAY output_buf
, JDIMENSION output_col
)
5017 INT32 tmp0
, tmp1
, tmp2
, tmp10
, tmp11
, tmp12
;
5020 ISLOW_MULT_TYPE
* quantptr
;
5023 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
5025 int workspace
[3*6]; /* buffers data between passes */
5028 /* Pass 1: process columns from input, store into work array.
5029 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5033 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
5035 for (ctr
= 0; ctr
< 3; ctr
++, inptr
++, quantptr
++, wsptr
++) {
5038 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
5039 tmp0
<<= CONST_BITS
;
5040 /* Add fudge factor here for final descale. */
5041 tmp0
+= ONE
<< (CONST_BITS
-PASS1_BITS
-1);
5042 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*4], quantptr
[DCTSIZE
*4]);
5043 tmp10
= MULTIPLY(tmp2
, FIX(0.707106781)); /* c4 */
5044 tmp1
= tmp0
+ tmp10
;
5045 tmp11
= RIGHT_SHIFT(tmp0
- tmp10
- tmp10
, CONST_BITS
-PASS1_BITS
);
5046 tmp10
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
5047 tmp0
= MULTIPLY(tmp10
, FIX(1.224744871)); /* c2 */
5048 tmp10
= tmp1
+ tmp0
;
5049 tmp12
= tmp1
- tmp0
;
5053 z1
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
5054 z2
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
5055 z3
= DEQUANTIZE(inptr
[DCTSIZE
*5], quantptr
[DCTSIZE
*5]);
5056 tmp1
= MULTIPLY(z1
+ z3
, FIX(0.366025404)); /* c5 */
5057 tmp0
= tmp1
+ ((z1
+ z2
) << CONST_BITS
);
5058 tmp2
= tmp1
+ ((z3
- z2
) << CONST_BITS
);
5059 tmp1
= (z1
- z2
- z3
) << PASS1_BITS
;
5061 /* Final output stage */
5063 wsptr
[3*0] = (int) RIGHT_SHIFT(tmp10
+ tmp0
, CONST_BITS
-PASS1_BITS
);
5064 wsptr
[3*5] = (int) RIGHT_SHIFT(tmp10
- tmp0
, CONST_BITS
-PASS1_BITS
);
5065 wsptr
[3*1] = (int) (tmp11
+ tmp1
);
5066 wsptr
[3*4] = (int) (tmp11
- tmp1
);
5067 wsptr
[3*2] = (int) RIGHT_SHIFT(tmp12
+ tmp2
, CONST_BITS
-PASS1_BITS
);
5068 wsptr
[3*3] = (int) RIGHT_SHIFT(tmp12
- tmp2
, CONST_BITS
-PASS1_BITS
);
5071 /* Pass 2: process 6 rows from work array, store into output array.
5072 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5076 for (ctr
= 0; ctr
< 6; ctr
++) {
5077 outptr
= output_buf
[ctr
] + output_col
;
5081 /* Add range center and fudge factor for final descale and range-limit. */
5082 tmp0
= (INT32
) wsptr
[0] +
5083 ((((INT32
) RANGE_CENTER
) << (PASS1_BITS
+3)) +
5084 (ONE
<< (PASS1_BITS
+2)));
5085 tmp0
<<= CONST_BITS
;
5086 tmp2
= (INT32
) wsptr
[2];
5087 tmp12
= MULTIPLY(tmp2
, FIX(0.707106781)); /* c2 */
5088 tmp10
= tmp0
+ tmp12
;
5089 tmp2
= tmp0
- tmp12
- tmp12
;
5093 tmp12
= (INT32
) wsptr
[1];
5094 tmp0
= MULTIPLY(tmp12
, FIX(1.224744871)); /* c1 */
5096 /* Final output stage */
5098 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
,
5099 CONST_BITS
+PASS1_BITS
+3)
5101 outptr
[2] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
,
5102 CONST_BITS
+PASS1_BITS
+3)
5104 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp2
,
5105 CONST_BITS
+PASS1_BITS
+3)
5108 wsptr
+= 3; /* advance pointer to next row */
5114 * Perform dequantization and inverse DCT on one block of coefficients,
5115 * producing a 2x4 output block.
5117 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5121 jpeg_idct_2x4 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
5122 JCOEFPTR coef_block
,
5123 JSAMPARRAY output_buf
, JDIMENSION output_col
)
5125 INT32 tmp0
, tmp2
, tmp10
, tmp12
;
5128 ISLOW_MULT_TYPE
* quantptr
;
5131 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
5133 INT32 workspace
[2*4]; /* buffers data between passes */
5136 /* Pass 1: process columns from input, store into work array.
5137 * 4-point IDCT kernel,
5138 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5142 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
5144 for (ctr
= 0; ctr
< 2; ctr
++, inptr
++, quantptr
++, wsptr
++) {
5147 tmp0
= DEQUANTIZE(inptr
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
5148 tmp2
= DEQUANTIZE(inptr
[DCTSIZE
*2], quantptr
[DCTSIZE
*2]);
5150 tmp10
= (tmp0
+ tmp2
) << CONST_BITS
;
5151 tmp12
= (tmp0
- tmp2
) << CONST_BITS
;
5154 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5156 z2
= DEQUANTIZE(inptr
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
5157 z3
= DEQUANTIZE(inptr
[DCTSIZE
*3], quantptr
[DCTSIZE
*3]);
5159 z1
= MULTIPLY(z2
+ z3
, FIX_0_541196100
); /* c6 */
5160 tmp0
= z1
+ MULTIPLY(z2
, FIX_0_765366865
); /* c2-c6 */
5161 tmp2
= z1
- MULTIPLY(z3
, FIX_1_847759065
); /* c2+c6 */
5163 /* Final output stage */
5165 wsptr
[2*0] = tmp10
+ tmp0
;
5166 wsptr
[2*3] = tmp10
- tmp0
;
5167 wsptr
[2*1] = tmp12
+ tmp2
;
5168 wsptr
[2*2] = tmp12
- tmp2
;
5171 /* Pass 2: process 4 rows from work array, store into output array. */
5174 for (ctr
= 0; ctr
< 4; ctr
++) {
5175 outptr
= output_buf
[ctr
] + output_col
;
5179 /* Add range center and fudge factor for final descale and range-limit. */
5181 ((((INT32
) RANGE_CENTER
) << (CONST_BITS
+3)) +
5182 (ONE
<< (CONST_BITS
+2)));
5188 /* Final output stage */
5190 outptr
[0] = range_limit
[(int) RIGHT_SHIFT(tmp10
+ tmp0
, CONST_BITS
+3)
5192 outptr
[1] = range_limit
[(int) RIGHT_SHIFT(tmp10
- tmp0
, CONST_BITS
+3)
5195 wsptr
+= 2; /* advance pointer to next row */
5201 * Perform dequantization and inverse DCT on one block of coefficients,
5202 * producing a 1x2 output block.
5204 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5208 jpeg_idct_1x2 (j_decompress_ptr cinfo
, jpeg_component_info
* compptr
,
5209 JCOEFPTR coef_block
,
5210 JSAMPARRAY output_buf
, JDIMENSION output_col
)
5213 ISLOW_MULT_TYPE
* quantptr
;
5214 JSAMPLE
*range_limit
= IDCT_range_limit(cinfo
);
5217 /* Process 1 column from input, store into output array. */
5219 quantptr
= (ISLOW_MULT_TYPE
*) compptr
->dct_table
;
5223 tmp0
= DEQUANTIZE(coef_block
[DCTSIZE
*0], quantptr
[DCTSIZE
*0]);
5224 /* Add range center and fudge factor for final descale and range-limit. */
5225 tmp0
+= (((DCTELEM
) RANGE_CENTER
) << 3) + (1 << 2);
5229 tmp1
= DEQUANTIZE(coef_block
[DCTSIZE
*1], quantptr
[DCTSIZE
*1]);
5231 /* Final output stage */
5233 output_buf
[0][output_col
] =
5234 range_limit
[(int) IRIGHT_SHIFT(tmp0
+ tmp1
, 3) & RANGE_MASK
];
5235 output_buf
[1][output_col
] =
5236 range_limit
[(int) IRIGHT_SHIFT(tmp0
- tmp1
, 3) & RANGE_MASK
];
5239 #endif /* IDCT_SCALING_SUPPORTED */
5240 #endif /* DCT_ISLOW_SUPPORTED */