winex11.drv: Release window data before calling sync_window_cursor().
[wine.git] / libs / jpeg / jidctint.c
blobe30ec8c8a4666e7380dc6314242861e8d03c995e
1 /*
2 * jidctint.c
4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * Modification developed 2002-2018 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
9 * This file contains a slow-but-accurate integer implementation of the
10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11 * must also perform dequantization of the input coefficients.
13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14 * on each row (or vice versa, but it's more convenient to emit a row at
15 * a time). Direct algorithms are also available, but they are much more
16 * complex and seem not to be any faster when reduced to code.
18 * This implementation is based on an algorithm described in
19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22 * The primary algorithm described there uses 11 multiplies and 29 adds.
23 * We use their alternate method with 12 multiplies and 32 adds.
24 * The advantage of this method is that no data path contains more than one
25 * multiplication; this allows a very simple and accurate implementation in
26 * scaled fixed-point arithmetic, with a minimal number of shifts.
28 * We also provide IDCT routines with various output sample block sizes for
29 * direct resolution reduction or enlargement and for direct resolving the
30 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
33 * For N<8 we simply take the corresponding low-frequency coefficients of
34 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35 * to yield the downscaled outputs.
36 * This can be seen as direct low-pass downsampling from the DCT domain
37 * point of view rather than the usual spatial domain point of view,
38 * yielding significant computational savings and results at least
39 * as good as common bilinear (averaging) spatial downsampling.
41 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42 * lower frequencies and higher frequencies assumed to be zero.
43 * It turns out that the computational effort is similar to the 8x8 IDCT
44 * regarding the output size.
45 * Furthermore, the scaling and descaling is the same for all IDCT sizes.
47 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48 * since there would be too many additional constants to pre-calculate.
51 #define JPEG_INTERNALS
52 #include "jinclude.h"
53 #include "jpeglib.h"
54 #include "jdct.h" /* Private declarations for DCT subsystem */
56 #ifdef DCT_ISLOW_SUPPORTED
60 * This module is specialized to the case DCTSIZE = 8.
63 #if DCTSIZE != 8
64 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65 #endif
69 * The poop on this scaling stuff is as follows:
71 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72 * larger than the true IDCT outputs. The final outputs are therefore
73 * a factor of N larger than desired; since N=8 this can be cured by
74 * a simple right shift at the end of the algorithm. The advantage of
75 * this arrangement is that we save two multiplications per 1-D IDCT,
76 * because the y0 and y4 inputs need not be divided by sqrt(N).
78 * We have to do addition and subtraction of the integer inputs, which
79 * is no problem, and multiplication by fractional constants, which is
80 * a problem to do in integer arithmetic. We multiply all the constants
81 * by CONST_SCALE and convert them to integer constants (thus retaining
82 * CONST_BITS bits of precision in the constants). After doing a
83 * multiplication we have to divide the product by CONST_SCALE, with proper
84 * rounding, to produce the correct output. This division can be done
85 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
86 * as long as possible so that partial sums can be added together with
87 * full fractional precision.
89 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90 * they are represented to better-than-integral precision. These outputs
91 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92 * with the recommended scaling. (To scale up 12-bit sample data further, an
93 * intermediate INT32 array would be needed.)
95 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
96 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
97 * shows that the values given below are the most effective.
100 #if BITS_IN_JSAMPLE == 8
101 #define CONST_BITS 13
102 #define PASS1_BITS 2
103 #else
104 #define CONST_BITS 13
105 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
106 #endif
108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109 * causing a lot of useless floating-point operations at run time.
110 * To get around this we use the following pre-calculated constants.
111 * If you change CONST_BITS you may want to add appropriate values.
112 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
115 #if CONST_BITS == 13
116 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
117 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
118 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
119 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
120 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
121 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
122 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
123 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
124 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
125 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
126 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
127 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
128 #else
129 #define FIX_0_298631336 FIX(0.298631336)
130 #define FIX_0_390180644 FIX(0.390180644)
131 #define FIX_0_541196100 FIX(0.541196100)
132 #define FIX_0_765366865 FIX(0.765366865)
133 #define FIX_0_899976223 FIX(0.899976223)
134 #define FIX_1_175875602 FIX(1.175875602)
135 #define FIX_1_501321110 FIX(1.501321110)
136 #define FIX_1_847759065 FIX(1.847759065)
137 #define FIX_1_961570560 FIX(1.961570560)
138 #define FIX_2_053119869 FIX(2.053119869)
139 #define FIX_2_562915447 FIX(2.562915447)
140 #define FIX_3_072711026 FIX(3.072711026)
141 #endif
144 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
145 * For 8-bit samples with the recommended scaling, all the variable
146 * and constant values involved are no more than 16 bits wide, so a
147 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
148 * For 12-bit samples, a full 32-bit multiplication will be needed.
151 #if BITS_IN_JSAMPLE == 8
152 #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
153 #else
154 #define MULTIPLY(var,const) ((var) * (const))
155 #endif
158 /* Dequantize a coefficient by multiplying it by the multiplier-table
159 * entry; produce an int result. In this module, both inputs and result
160 * are 16 bits or less, so either int or short multiply will work.
163 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
167 * Perform dequantization and inverse DCT on one block of coefficients.
169 * Optimized algorithm with 12 multiplications in the 1-D kernel.
170 * cK represents sqrt(2) * cos(K*pi/16).
173 GLOBAL(void)
174 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
175 JCOEFPTR coef_block,
176 JSAMPARRAY output_buf, JDIMENSION output_col)
178 INT32 tmp0, tmp1, tmp2, tmp3;
179 INT32 tmp10, tmp11, tmp12, tmp13;
180 INT32 z1, z2, z3;
181 JCOEFPTR inptr;
182 ISLOW_MULT_TYPE * quantptr;
183 int * wsptr;
184 JSAMPROW outptr;
185 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
186 int ctr;
187 int workspace[DCTSIZE2]; /* buffers data between passes */
188 SHIFT_TEMPS
190 /* Pass 1: process columns from input, store into work array.
191 * Note results are scaled up by sqrt(8) compared to a true IDCT;
192 * furthermore, we scale the results by 2**PASS1_BITS.
195 inptr = coef_block;
196 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
197 wsptr = workspace;
198 for (ctr = DCTSIZE; ctr > 0; ctr--) {
199 /* Due to quantization, we will usually find that many of the input
200 * coefficients are zero, especially the AC terms. We can exploit this
201 * by short-circuiting the IDCT calculation for any column in which all
202 * the AC terms are zero. In that case each output is equal to the
203 * DC coefficient (with scale factor as needed).
204 * With typical images and quantization tables, half or more of the
205 * column DCT calculations can be simplified this way.
208 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
209 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
210 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
211 inptr[DCTSIZE*7] == 0) {
212 /* AC terms all zero */
213 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
215 wsptr[DCTSIZE*0] = dcval;
216 wsptr[DCTSIZE*1] = dcval;
217 wsptr[DCTSIZE*2] = dcval;
218 wsptr[DCTSIZE*3] = dcval;
219 wsptr[DCTSIZE*4] = dcval;
220 wsptr[DCTSIZE*5] = dcval;
221 wsptr[DCTSIZE*6] = dcval;
222 wsptr[DCTSIZE*7] = dcval;
224 inptr++; /* advance pointers to next column */
225 quantptr++;
226 wsptr++;
227 continue;
230 /* Even part: reverse the even part of the forward DCT.
231 * The rotator is c(-6).
234 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
236 z2 <<= CONST_BITS;
237 z3 <<= CONST_BITS;
238 /* Add fudge factor here for final descale. */
239 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
241 tmp0 = z2 + z3;
242 tmp1 = z2 - z3;
244 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
245 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
247 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
248 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
249 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
251 tmp10 = tmp0 + tmp2;
252 tmp13 = tmp0 - tmp2;
253 tmp11 = tmp1 + tmp3;
254 tmp12 = tmp1 - tmp3;
256 /* Odd part per figure 8; the matrix is unitary and hence its
257 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
260 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
261 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
262 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
263 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
265 z2 = tmp0 + tmp2;
266 z3 = tmp1 + tmp3;
268 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
269 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
270 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
271 z2 += z1;
272 z3 += z1;
274 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
275 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
276 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
277 tmp0 += z1 + z2;
278 tmp3 += z1 + z3;
280 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
281 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
282 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
283 tmp1 += z1 + z3;
284 tmp2 += z1 + z2;
286 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
288 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
289 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
290 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
291 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
292 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
293 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
294 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
295 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
297 inptr++; /* advance pointers to next column */
298 quantptr++;
299 wsptr++;
302 /* Pass 2: process rows from work array, store into output array.
303 * Note that we must descale the results by a factor of 8 == 2**3,
304 * and also undo the PASS1_BITS scaling.
307 wsptr = workspace;
308 for (ctr = 0; ctr < DCTSIZE; ctr++) {
309 outptr = output_buf[ctr] + output_col;
311 /* Add range center and fudge factor for final descale and range-limit. */
312 z2 = (INT32) wsptr[0] +
313 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
314 (ONE << (PASS1_BITS+2)));
316 /* Rows of zeroes can be exploited in the same way as we did with columns.
317 * However, the column calculation has created many nonzero AC terms, so
318 * the simplification applies less often (typically 5% to 10% of the time).
319 * On machines with very fast multiplication, it's possible that the
320 * test takes more time than it's worth. In that case this section
321 * may be commented out.
324 #ifndef NO_ZERO_ROW_TEST
325 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
326 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
327 /* AC terms all zero */
328 JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
329 & RANGE_MASK];
331 outptr[0] = dcval;
332 outptr[1] = dcval;
333 outptr[2] = dcval;
334 outptr[3] = dcval;
335 outptr[4] = dcval;
336 outptr[5] = dcval;
337 outptr[6] = dcval;
338 outptr[7] = dcval;
340 wsptr += DCTSIZE; /* advance pointer to next row */
341 continue;
343 #endif
345 /* Even part: reverse the even part of the forward DCT.
346 * The rotator is c(-6).
349 z3 = (INT32) wsptr[4];
351 tmp0 = (z2 + z3) << CONST_BITS;
352 tmp1 = (z2 - z3) << CONST_BITS;
354 z2 = (INT32) wsptr[2];
355 z3 = (INT32) wsptr[6];
357 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
358 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
359 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
361 tmp10 = tmp0 + tmp2;
362 tmp13 = tmp0 - tmp2;
363 tmp11 = tmp1 + tmp3;
364 tmp12 = tmp1 - tmp3;
366 /* Odd part per figure 8; the matrix is unitary and hence its
367 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
370 tmp0 = (INT32) wsptr[7];
371 tmp1 = (INT32) wsptr[5];
372 tmp2 = (INT32) wsptr[3];
373 tmp3 = (INT32) wsptr[1];
375 z2 = tmp0 + tmp2;
376 z3 = tmp1 + tmp3;
378 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
379 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
380 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
381 z2 += z1;
382 z3 += z1;
384 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
385 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
386 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
387 tmp0 += z1 + z2;
388 tmp3 += z1 + z3;
390 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
391 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
392 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
393 tmp1 += z1 + z3;
394 tmp2 += z1 + z2;
396 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
398 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
399 CONST_BITS+PASS1_BITS+3)
400 & RANGE_MASK];
401 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
402 CONST_BITS+PASS1_BITS+3)
403 & RANGE_MASK];
404 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
405 CONST_BITS+PASS1_BITS+3)
406 & RANGE_MASK];
407 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
408 CONST_BITS+PASS1_BITS+3)
409 & RANGE_MASK];
410 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
411 CONST_BITS+PASS1_BITS+3)
412 & RANGE_MASK];
413 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
414 CONST_BITS+PASS1_BITS+3)
415 & RANGE_MASK];
416 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
417 CONST_BITS+PASS1_BITS+3)
418 & RANGE_MASK];
419 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
420 CONST_BITS+PASS1_BITS+3)
421 & RANGE_MASK];
423 wsptr += DCTSIZE; /* advance pointer to next row */
427 #ifdef IDCT_SCALING_SUPPORTED
431 * Perform dequantization and inverse DCT on one block of coefficients,
432 * producing a reduced-size 7x7 output block.
434 * Optimized algorithm with 12 multiplications in the 1-D kernel.
435 * cK represents sqrt(2) * cos(K*pi/14).
438 GLOBAL(void)
439 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
440 JCOEFPTR coef_block,
441 JSAMPARRAY output_buf, JDIMENSION output_col)
443 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
444 INT32 z1, z2, z3;
445 JCOEFPTR inptr;
446 ISLOW_MULT_TYPE * quantptr;
447 int * wsptr;
448 JSAMPROW outptr;
449 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
450 int ctr;
451 int workspace[7*7]; /* buffers data between passes */
452 SHIFT_TEMPS
454 /* Pass 1: process columns from input, store into work array. */
456 inptr = coef_block;
457 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
458 wsptr = workspace;
459 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
460 /* Even part */
462 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
463 tmp13 <<= CONST_BITS;
464 /* Add fudge factor here for final descale. */
465 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
467 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
468 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
469 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
471 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
472 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
473 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
474 tmp0 = z1 + z3;
475 z2 -= tmp0;
476 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
477 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
478 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
479 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
481 /* Odd part */
483 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
484 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
485 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
487 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
488 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
489 tmp0 = tmp1 - tmp2;
490 tmp1 += tmp2;
491 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
492 tmp1 += tmp2;
493 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
494 tmp0 += z2;
495 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
497 /* Final output stage */
499 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
500 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
501 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
502 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
503 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
504 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
505 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
508 /* Pass 2: process 7 rows from work array, store into output array. */
510 wsptr = workspace;
511 for (ctr = 0; ctr < 7; ctr++) {
512 outptr = output_buf[ctr] + output_col;
514 /* Even part */
516 /* Add range center and fudge factor for final descale and range-limit. */
517 tmp13 = (INT32) wsptr[0] +
518 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
519 (ONE << (PASS1_BITS+2)));
520 tmp13 <<= CONST_BITS;
522 z1 = (INT32) wsptr[2];
523 z2 = (INT32) wsptr[4];
524 z3 = (INT32) wsptr[6];
526 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
527 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
528 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
529 tmp0 = z1 + z3;
530 z2 -= tmp0;
531 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
532 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
533 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
534 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
536 /* Odd part */
538 z1 = (INT32) wsptr[1];
539 z2 = (INT32) wsptr[3];
540 z3 = (INT32) wsptr[5];
542 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
543 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
544 tmp0 = tmp1 - tmp2;
545 tmp1 += tmp2;
546 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
547 tmp1 += tmp2;
548 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
549 tmp0 += z2;
550 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
552 /* Final output stage */
554 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
555 CONST_BITS+PASS1_BITS+3)
556 & RANGE_MASK];
557 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
558 CONST_BITS+PASS1_BITS+3)
559 & RANGE_MASK];
560 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
561 CONST_BITS+PASS1_BITS+3)
562 & RANGE_MASK];
563 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
564 CONST_BITS+PASS1_BITS+3)
565 & RANGE_MASK];
566 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
567 CONST_BITS+PASS1_BITS+3)
568 & RANGE_MASK];
569 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
570 CONST_BITS+PASS1_BITS+3)
571 & RANGE_MASK];
572 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
573 CONST_BITS+PASS1_BITS+3)
574 & RANGE_MASK];
576 wsptr += 7; /* advance pointer to next row */
582 * Perform dequantization and inverse DCT on one block of coefficients,
583 * producing a reduced-size 6x6 output block.
585 * Optimized algorithm with 3 multiplications in the 1-D kernel.
586 * cK represents sqrt(2) * cos(K*pi/12).
589 GLOBAL(void)
590 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
591 JCOEFPTR coef_block,
592 JSAMPARRAY output_buf, JDIMENSION output_col)
594 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
595 INT32 z1, z2, z3;
596 JCOEFPTR inptr;
597 ISLOW_MULT_TYPE * quantptr;
598 int * wsptr;
599 JSAMPROW outptr;
600 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
601 int ctr;
602 int workspace[6*6]; /* buffers data between passes */
603 SHIFT_TEMPS
605 /* Pass 1: process columns from input, store into work array. */
607 inptr = coef_block;
608 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
609 wsptr = workspace;
610 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
611 /* Even part */
613 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
614 tmp0 <<= CONST_BITS;
615 /* Add fudge factor here for final descale. */
616 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
617 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
618 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
619 tmp1 = tmp0 + tmp10;
620 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
621 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
622 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
623 tmp10 = tmp1 + tmp0;
624 tmp12 = tmp1 - tmp0;
626 /* Odd part */
628 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
629 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
630 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
631 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
632 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
633 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
634 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
636 /* Final output stage */
638 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
639 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
640 wsptr[6*1] = (int) (tmp11 + tmp1);
641 wsptr[6*4] = (int) (tmp11 - tmp1);
642 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
643 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
646 /* Pass 2: process 6 rows from work array, store into output array. */
648 wsptr = workspace;
649 for (ctr = 0; ctr < 6; ctr++) {
650 outptr = output_buf[ctr] + output_col;
652 /* Even part */
654 /* Add range center and fudge factor for final descale and range-limit. */
655 tmp0 = (INT32) wsptr[0] +
656 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
657 (ONE << (PASS1_BITS+2)));
658 tmp0 <<= CONST_BITS;
659 tmp2 = (INT32) wsptr[4];
660 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
661 tmp1 = tmp0 + tmp10;
662 tmp11 = tmp0 - tmp10 - tmp10;
663 tmp10 = (INT32) wsptr[2];
664 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
665 tmp10 = tmp1 + tmp0;
666 tmp12 = tmp1 - tmp0;
668 /* Odd part */
670 z1 = (INT32) wsptr[1];
671 z2 = (INT32) wsptr[3];
672 z3 = (INT32) wsptr[5];
673 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
674 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
675 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
676 tmp1 = (z1 - z2 - z3) << CONST_BITS;
678 /* Final output stage */
680 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
681 CONST_BITS+PASS1_BITS+3)
682 & RANGE_MASK];
683 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
684 CONST_BITS+PASS1_BITS+3)
685 & RANGE_MASK];
686 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
687 CONST_BITS+PASS1_BITS+3)
688 & RANGE_MASK];
689 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
690 CONST_BITS+PASS1_BITS+3)
691 & RANGE_MASK];
692 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
693 CONST_BITS+PASS1_BITS+3)
694 & RANGE_MASK];
695 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
696 CONST_BITS+PASS1_BITS+3)
697 & RANGE_MASK];
699 wsptr += 6; /* advance pointer to next row */
705 * Perform dequantization and inverse DCT on one block of coefficients,
706 * producing a reduced-size 5x5 output block.
708 * Optimized algorithm with 5 multiplications in the 1-D kernel.
709 * cK represents sqrt(2) * cos(K*pi/10).
712 GLOBAL(void)
713 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
714 JCOEFPTR coef_block,
715 JSAMPARRAY output_buf, JDIMENSION output_col)
717 INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
718 INT32 z1, z2, z3;
719 JCOEFPTR inptr;
720 ISLOW_MULT_TYPE * quantptr;
721 int * wsptr;
722 JSAMPROW outptr;
723 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
724 int ctr;
725 int workspace[5*5]; /* buffers data between passes */
726 SHIFT_TEMPS
728 /* Pass 1: process columns from input, store into work array. */
730 inptr = coef_block;
731 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
732 wsptr = workspace;
733 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
734 /* Even part */
736 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
737 tmp12 <<= CONST_BITS;
738 /* Add fudge factor here for final descale. */
739 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
740 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
741 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
742 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
743 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
744 z3 = tmp12 + z2;
745 tmp10 = z3 + z1;
746 tmp11 = z3 - z1;
747 tmp12 -= z2 << 2;
749 /* Odd part */
751 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
752 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
754 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
755 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
756 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
758 /* Final output stage */
760 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
761 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
762 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
763 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
764 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
767 /* Pass 2: process 5 rows from work array, store into output array. */
769 wsptr = workspace;
770 for (ctr = 0; ctr < 5; ctr++) {
771 outptr = output_buf[ctr] + output_col;
773 /* Even part */
775 /* Add range center and fudge factor for final descale and range-limit. */
776 tmp12 = (INT32) wsptr[0] +
777 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
778 (ONE << (PASS1_BITS+2)));
779 tmp12 <<= CONST_BITS;
780 tmp0 = (INT32) wsptr[2];
781 tmp1 = (INT32) wsptr[4];
782 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
783 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
784 z3 = tmp12 + z2;
785 tmp10 = z3 + z1;
786 tmp11 = z3 - z1;
787 tmp12 -= z2 << 2;
789 /* Odd part */
791 z2 = (INT32) wsptr[1];
792 z3 = (INT32) wsptr[3];
794 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
795 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
796 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
798 /* Final output stage */
800 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
801 CONST_BITS+PASS1_BITS+3)
802 & RANGE_MASK];
803 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
804 CONST_BITS+PASS1_BITS+3)
805 & RANGE_MASK];
806 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
807 CONST_BITS+PASS1_BITS+3)
808 & RANGE_MASK];
809 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
810 CONST_BITS+PASS1_BITS+3)
811 & RANGE_MASK];
812 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
813 CONST_BITS+PASS1_BITS+3)
814 & RANGE_MASK];
816 wsptr += 5; /* advance pointer to next row */
822 * Perform dequantization and inverse DCT on one block of coefficients,
823 * producing a reduced-size 4x4 output block.
825 * Optimized algorithm with 3 multiplications in the 1-D kernel.
826 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
829 GLOBAL(void)
830 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
831 JCOEFPTR coef_block,
832 JSAMPARRAY output_buf, JDIMENSION output_col)
834 INT32 tmp0, tmp2, tmp10, tmp12;
835 INT32 z1, z2, z3;
836 JCOEFPTR inptr;
837 ISLOW_MULT_TYPE * quantptr;
838 int * wsptr;
839 JSAMPROW outptr;
840 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
841 int ctr;
842 int workspace[4*4]; /* buffers data between passes */
843 SHIFT_TEMPS
845 /* Pass 1: process columns from input, store into work array. */
847 inptr = coef_block;
848 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
849 wsptr = workspace;
850 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
851 /* Even part */
853 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
854 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
856 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
857 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
859 /* Odd part */
860 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
862 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
863 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
865 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
866 /* Add fudge factor here for final descale. */
867 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
868 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
869 CONST_BITS-PASS1_BITS);
870 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
871 CONST_BITS-PASS1_BITS);
873 /* Final output stage */
875 wsptr[4*0] = (int) (tmp10 + tmp0);
876 wsptr[4*3] = (int) (tmp10 - tmp0);
877 wsptr[4*1] = (int) (tmp12 + tmp2);
878 wsptr[4*2] = (int) (tmp12 - tmp2);
881 /* Pass 2: process 4 rows from work array, store into output array. */
883 wsptr = workspace;
884 for (ctr = 0; ctr < 4; ctr++) {
885 outptr = output_buf[ctr] + output_col;
887 /* Even part */
889 /* Add range center and fudge factor for final descale and range-limit. */
890 tmp0 = (INT32) wsptr[0] +
891 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
892 (ONE << (PASS1_BITS+2)));
893 tmp2 = (INT32) wsptr[2];
895 tmp10 = (tmp0 + tmp2) << CONST_BITS;
896 tmp12 = (tmp0 - tmp2) << CONST_BITS;
898 /* Odd part */
899 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
901 z2 = (INT32) wsptr[1];
902 z3 = (INT32) wsptr[3];
904 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
905 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
906 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
908 /* Final output stage */
910 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
911 CONST_BITS+PASS1_BITS+3)
912 & RANGE_MASK];
913 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
914 CONST_BITS+PASS1_BITS+3)
915 & RANGE_MASK];
916 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
917 CONST_BITS+PASS1_BITS+3)
918 & RANGE_MASK];
919 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
920 CONST_BITS+PASS1_BITS+3)
921 & RANGE_MASK];
923 wsptr += 4; /* advance pointer to next row */
929 * Perform dequantization and inverse DCT on one block of coefficients,
930 * producing a reduced-size 3x3 output block.
932 * Optimized algorithm with 2 multiplications in the 1-D kernel.
933 * cK represents sqrt(2) * cos(K*pi/6).
936 GLOBAL(void)
937 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
938 JCOEFPTR coef_block,
939 JSAMPARRAY output_buf, JDIMENSION output_col)
941 INT32 tmp0, tmp2, tmp10, tmp12;
942 JCOEFPTR inptr;
943 ISLOW_MULT_TYPE * quantptr;
944 int * wsptr;
945 JSAMPROW outptr;
946 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
947 int ctr;
948 int workspace[3*3]; /* buffers data between passes */
949 SHIFT_TEMPS
951 /* Pass 1: process columns from input, store into work array. */
953 inptr = coef_block;
954 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
955 wsptr = workspace;
956 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
957 /* Even part */
959 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
960 tmp0 <<= CONST_BITS;
961 /* Add fudge factor here for final descale. */
962 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
963 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
964 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
965 tmp10 = tmp0 + tmp12;
966 tmp2 = tmp0 - tmp12 - tmp12;
968 /* Odd part */
970 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
971 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
973 /* Final output stage */
975 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
976 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
977 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
980 /* Pass 2: process 3 rows from work array, store into output array. */
982 wsptr = workspace;
983 for (ctr = 0; ctr < 3; ctr++) {
984 outptr = output_buf[ctr] + output_col;
986 /* Even part */
988 /* Add range center and fudge factor for final descale and range-limit. */
989 tmp0 = (INT32) wsptr[0] +
990 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
991 (ONE << (PASS1_BITS+2)));
992 tmp0 <<= CONST_BITS;
993 tmp2 = (INT32) wsptr[2];
994 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
995 tmp10 = tmp0 + tmp12;
996 tmp2 = tmp0 - tmp12 - tmp12;
998 /* Odd part */
1000 tmp12 = (INT32) wsptr[1];
1001 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1003 /* Final output stage */
1005 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1006 CONST_BITS+PASS1_BITS+3)
1007 & RANGE_MASK];
1008 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1009 CONST_BITS+PASS1_BITS+3)
1010 & RANGE_MASK];
1011 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1012 CONST_BITS+PASS1_BITS+3)
1013 & RANGE_MASK];
1015 wsptr += 3; /* advance pointer to next row */
1021 * Perform dequantization and inverse DCT on one block of coefficients,
1022 * producing a reduced-size 2x2 output block.
1024 * Multiplication-less algorithm.
1027 GLOBAL(void)
1028 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1029 JCOEFPTR coef_block,
1030 JSAMPARRAY output_buf, JDIMENSION output_col)
1032 DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1033 ISLOW_MULT_TYPE * quantptr;
1034 JSAMPROW outptr;
1035 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1036 ISHIFT_TEMPS
1038 /* Pass 1: process columns from input. */
1040 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1042 /* Column 0 */
1043 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1044 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1045 /* Add range center and fudge factor for final descale and range-limit. */
1046 tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1048 tmp0 = tmp4 + tmp5;
1049 tmp2 = tmp4 - tmp5;
1051 /* Column 1 */
1052 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1053 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1055 tmp1 = tmp4 + tmp5;
1056 tmp3 = tmp4 - tmp5;
1058 /* Pass 2: process 2 rows, store into output array. */
1060 /* Row 0 */
1061 outptr = output_buf[0] + output_col;
1063 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1064 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1066 /* Row 1 */
1067 outptr = output_buf[1] + output_col;
1069 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1070 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1075 * Perform dequantization and inverse DCT on one block of coefficients,
1076 * producing a reduced-size 1x1 output block.
1078 * We hardly need an inverse DCT routine for this: just take the
1079 * average pixel value, which is one-eighth of the DC coefficient.
1082 GLOBAL(void)
1083 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1084 JCOEFPTR coef_block,
1085 JSAMPARRAY output_buf, JDIMENSION output_col)
1087 DCTELEM dcval;
1088 ISLOW_MULT_TYPE * quantptr;
1089 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1090 ISHIFT_TEMPS
1092 /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1094 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1096 dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1097 /* Add range center and fudge factor for descale and range-limit. */
1098 dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1100 output_buf[0][output_col] =
1101 range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
1106 * Perform dequantization and inverse DCT on one block of coefficients,
1107 * producing a 9x9 output block.
1109 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1110 * cK represents sqrt(2) * cos(K*pi/18).
1113 GLOBAL(void)
1114 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1115 JCOEFPTR coef_block,
1116 JSAMPARRAY output_buf, JDIMENSION output_col)
1118 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1119 INT32 z1, z2, z3, z4;
1120 JCOEFPTR inptr;
1121 ISLOW_MULT_TYPE * quantptr;
1122 int * wsptr;
1123 JSAMPROW outptr;
1124 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1125 int ctr;
1126 int workspace[8*9]; /* buffers data between passes */
1127 SHIFT_TEMPS
1129 /* Pass 1: process columns from input, store into work array. */
1131 inptr = coef_block;
1132 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1133 wsptr = workspace;
1134 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1135 /* Even part */
1137 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1138 tmp0 <<= CONST_BITS;
1139 /* Add fudge factor here for final descale. */
1140 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1142 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1143 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1144 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1146 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1147 tmp1 = tmp0 + tmp3;
1148 tmp2 = tmp0 - tmp3 - tmp3;
1150 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1151 tmp11 = tmp2 + tmp0;
1152 tmp14 = tmp2 - tmp0 - tmp0;
1154 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1155 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1156 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1158 tmp10 = tmp1 + tmp0 - tmp3;
1159 tmp12 = tmp1 - tmp0 + tmp2;
1160 tmp13 = tmp1 - tmp2 + tmp3;
1162 /* Odd part */
1164 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1165 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1166 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1167 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1169 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1171 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1172 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1173 tmp0 = tmp2 + tmp3 - z2;
1174 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1175 tmp2 += z2 - tmp1;
1176 tmp3 += z2 + tmp1;
1177 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1179 /* Final output stage */
1181 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1182 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1183 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1184 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1185 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1186 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1187 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1188 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1189 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1192 /* Pass 2: process 9 rows from work array, store into output array. */
1194 wsptr = workspace;
1195 for (ctr = 0; ctr < 9; ctr++) {
1196 outptr = output_buf[ctr] + output_col;
1198 /* Even part */
1200 /* Add range center and fudge factor for final descale and range-limit. */
1201 tmp0 = (INT32) wsptr[0] +
1202 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1203 (ONE << (PASS1_BITS+2)));
1204 tmp0 <<= CONST_BITS;
1206 z1 = (INT32) wsptr[2];
1207 z2 = (INT32) wsptr[4];
1208 z3 = (INT32) wsptr[6];
1210 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1211 tmp1 = tmp0 + tmp3;
1212 tmp2 = tmp0 - tmp3 - tmp3;
1214 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1215 tmp11 = tmp2 + tmp0;
1216 tmp14 = tmp2 - tmp0 - tmp0;
1218 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1219 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1220 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1222 tmp10 = tmp1 + tmp0 - tmp3;
1223 tmp12 = tmp1 - tmp0 + tmp2;
1224 tmp13 = tmp1 - tmp2 + tmp3;
1226 /* Odd part */
1228 z1 = (INT32) wsptr[1];
1229 z2 = (INT32) wsptr[3];
1230 z3 = (INT32) wsptr[5];
1231 z4 = (INT32) wsptr[7];
1233 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1235 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1236 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1237 tmp0 = tmp2 + tmp3 - z2;
1238 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1239 tmp2 += z2 - tmp1;
1240 tmp3 += z2 + tmp1;
1241 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1243 /* Final output stage */
1245 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1246 CONST_BITS+PASS1_BITS+3)
1247 & RANGE_MASK];
1248 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1249 CONST_BITS+PASS1_BITS+3)
1250 & RANGE_MASK];
1251 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1252 CONST_BITS+PASS1_BITS+3)
1253 & RANGE_MASK];
1254 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1255 CONST_BITS+PASS1_BITS+3)
1256 & RANGE_MASK];
1257 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1258 CONST_BITS+PASS1_BITS+3)
1259 & RANGE_MASK];
1260 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1261 CONST_BITS+PASS1_BITS+3)
1262 & RANGE_MASK];
1263 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1264 CONST_BITS+PASS1_BITS+3)
1265 & RANGE_MASK];
1266 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1267 CONST_BITS+PASS1_BITS+3)
1268 & RANGE_MASK];
1269 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1270 CONST_BITS+PASS1_BITS+3)
1271 & RANGE_MASK];
1273 wsptr += 8; /* advance pointer to next row */
1279 * Perform dequantization and inverse DCT on one block of coefficients,
1280 * producing a 10x10 output block.
1282 * Optimized algorithm with 12 multiplications in the 1-D kernel.
1283 * cK represents sqrt(2) * cos(K*pi/20).
1286 GLOBAL(void)
1287 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1288 JCOEFPTR coef_block,
1289 JSAMPARRAY output_buf, JDIMENSION output_col)
1291 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1292 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1293 INT32 z1, z2, z3, z4, z5;
1294 JCOEFPTR inptr;
1295 ISLOW_MULT_TYPE * quantptr;
1296 int * wsptr;
1297 JSAMPROW outptr;
1298 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1299 int ctr;
1300 int workspace[8*10]; /* buffers data between passes */
1301 SHIFT_TEMPS
1303 /* Pass 1: process columns from input, store into work array. */
1305 inptr = coef_block;
1306 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1307 wsptr = workspace;
1308 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1309 /* Even part */
1311 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1312 z3 <<= CONST_BITS;
1313 /* Add fudge factor here for final descale. */
1314 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1315 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1316 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1317 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1318 tmp10 = z3 + z1;
1319 tmp11 = z3 - z2;
1321 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1322 CONST_BITS-PASS1_BITS);
1324 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1325 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1327 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1328 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1329 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1331 tmp20 = tmp10 + tmp12;
1332 tmp24 = tmp10 - tmp12;
1333 tmp21 = tmp11 + tmp13;
1334 tmp23 = tmp11 - tmp13;
1336 /* Odd part */
1338 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1339 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1340 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1341 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1343 tmp11 = z2 + z4;
1344 tmp13 = z2 - z4;
1346 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1347 z5 = z3 << CONST_BITS;
1349 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1350 z4 = z5 + tmp12;
1352 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1353 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1355 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1356 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1358 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1360 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1361 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1363 /* Final output stage */
1365 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1366 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1367 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1368 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1369 wsptr[8*2] = (int) (tmp22 + tmp12);
1370 wsptr[8*7] = (int) (tmp22 - tmp12);
1371 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1372 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1373 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1374 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1377 /* Pass 2: process 10 rows from work array, store into output array. */
1379 wsptr = workspace;
1380 for (ctr = 0; ctr < 10; ctr++) {
1381 outptr = output_buf[ctr] + output_col;
1383 /* Even part */
1385 /* Add range center and fudge factor for final descale and range-limit. */
1386 z3 = (INT32) wsptr[0] +
1387 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1388 (ONE << (PASS1_BITS+2)));
1389 z3 <<= CONST_BITS;
1390 z4 = (INT32) wsptr[4];
1391 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1392 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1393 tmp10 = z3 + z1;
1394 tmp11 = z3 - z2;
1396 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1398 z2 = (INT32) wsptr[2];
1399 z3 = (INT32) wsptr[6];
1401 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1402 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1403 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1405 tmp20 = tmp10 + tmp12;
1406 tmp24 = tmp10 - tmp12;
1407 tmp21 = tmp11 + tmp13;
1408 tmp23 = tmp11 - tmp13;
1410 /* Odd part */
1412 z1 = (INT32) wsptr[1];
1413 z2 = (INT32) wsptr[3];
1414 z3 = (INT32) wsptr[5];
1415 z3 <<= CONST_BITS;
1416 z4 = (INT32) wsptr[7];
1418 tmp11 = z2 + z4;
1419 tmp13 = z2 - z4;
1421 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1423 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1424 z4 = z3 + tmp12;
1426 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1427 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1429 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1430 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1432 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1434 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1435 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1437 /* Final output stage */
1439 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1440 CONST_BITS+PASS1_BITS+3)
1441 & RANGE_MASK];
1442 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1443 CONST_BITS+PASS1_BITS+3)
1444 & RANGE_MASK];
1445 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1446 CONST_BITS+PASS1_BITS+3)
1447 & RANGE_MASK];
1448 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1449 CONST_BITS+PASS1_BITS+3)
1450 & RANGE_MASK];
1451 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1452 CONST_BITS+PASS1_BITS+3)
1453 & RANGE_MASK];
1454 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1455 CONST_BITS+PASS1_BITS+3)
1456 & RANGE_MASK];
1457 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1458 CONST_BITS+PASS1_BITS+3)
1459 & RANGE_MASK];
1460 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1461 CONST_BITS+PASS1_BITS+3)
1462 & RANGE_MASK];
1463 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1464 CONST_BITS+PASS1_BITS+3)
1465 & RANGE_MASK];
1466 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1467 CONST_BITS+PASS1_BITS+3)
1468 & RANGE_MASK];
1470 wsptr += 8; /* advance pointer to next row */
1476 * Perform dequantization and inverse DCT on one block of coefficients,
1477 * producing an 11x11 output block.
1479 * Optimized algorithm with 24 multiplications in the 1-D kernel.
1480 * cK represents sqrt(2) * cos(K*pi/22).
1483 GLOBAL(void)
1484 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1485 JCOEFPTR coef_block,
1486 JSAMPARRAY output_buf, JDIMENSION output_col)
1488 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1489 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1490 INT32 z1, z2, z3, z4;
1491 JCOEFPTR inptr;
1492 ISLOW_MULT_TYPE * quantptr;
1493 int * wsptr;
1494 JSAMPROW outptr;
1495 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1496 int ctr;
1497 int workspace[8*11]; /* buffers data between passes */
1498 SHIFT_TEMPS
1500 /* Pass 1: process columns from input, store into work array. */
1502 inptr = coef_block;
1503 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1504 wsptr = workspace;
1505 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1506 /* Even part */
1508 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1509 tmp10 <<= CONST_BITS;
1510 /* Add fudge factor here for final descale. */
1511 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1513 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1514 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1515 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1517 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1518 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1519 z4 = z1 + z3;
1520 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1521 z4 -= z2;
1522 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1523 tmp21 = tmp20 + tmp23 + tmp25 -
1524 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1525 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1526 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1527 tmp24 += tmp25;
1528 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1529 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1530 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1531 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1533 /* Odd part */
1535 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1536 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1537 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1538 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1540 tmp11 = z1 + z2;
1541 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1542 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1543 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1544 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1545 tmp10 = tmp11 + tmp12 + tmp13 -
1546 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1547 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1548 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1549 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1550 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1551 tmp11 += z1;
1552 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1553 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1554 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1555 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1557 /* Final output stage */
1559 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1560 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1561 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1562 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1563 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1564 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1565 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1566 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1567 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1568 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1569 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1572 /* Pass 2: process 11 rows from work array, store into output array. */
1574 wsptr = workspace;
1575 for (ctr = 0; ctr < 11; ctr++) {
1576 outptr = output_buf[ctr] + output_col;
1578 /* Even part */
1580 /* Add range center and fudge factor for final descale and range-limit. */
1581 tmp10 = (INT32) wsptr[0] +
1582 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1583 (ONE << (PASS1_BITS+2)));
1584 tmp10 <<= CONST_BITS;
1586 z1 = (INT32) wsptr[2];
1587 z2 = (INT32) wsptr[4];
1588 z3 = (INT32) wsptr[6];
1590 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1591 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1592 z4 = z1 + z3;
1593 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1594 z4 -= z2;
1595 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1596 tmp21 = tmp20 + tmp23 + tmp25 -
1597 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1598 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1599 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1600 tmp24 += tmp25;
1601 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1602 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1603 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1604 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1606 /* Odd part */
1608 z1 = (INT32) wsptr[1];
1609 z2 = (INT32) wsptr[3];
1610 z3 = (INT32) wsptr[5];
1611 z4 = (INT32) wsptr[7];
1613 tmp11 = z1 + z2;
1614 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1615 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1616 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1617 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1618 tmp10 = tmp11 + tmp12 + tmp13 -
1619 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1620 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1621 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1622 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1623 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1624 tmp11 += z1;
1625 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1626 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1627 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1628 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1630 /* Final output stage */
1632 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1633 CONST_BITS+PASS1_BITS+3)
1634 & RANGE_MASK];
1635 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1636 CONST_BITS+PASS1_BITS+3)
1637 & RANGE_MASK];
1638 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1639 CONST_BITS+PASS1_BITS+3)
1640 & RANGE_MASK];
1641 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1642 CONST_BITS+PASS1_BITS+3)
1643 & RANGE_MASK];
1644 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1645 CONST_BITS+PASS1_BITS+3)
1646 & RANGE_MASK];
1647 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1648 CONST_BITS+PASS1_BITS+3)
1649 & RANGE_MASK];
1650 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1651 CONST_BITS+PASS1_BITS+3)
1652 & RANGE_MASK];
1653 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1654 CONST_BITS+PASS1_BITS+3)
1655 & RANGE_MASK];
1656 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1657 CONST_BITS+PASS1_BITS+3)
1658 & RANGE_MASK];
1659 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1660 CONST_BITS+PASS1_BITS+3)
1661 & RANGE_MASK];
1662 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1663 CONST_BITS+PASS1_BITS+3)
1664 & RANGE_MASK];
1666 wsptr += 8; /* advance pointer to next row */
1672 * Perform dequantization and inverse DCT on one block of coefficients,
1673 * producing a 12x12 output block.
1675 * Optimized algorithm with 15 multiplications in the 1-D kernel.
1676 * cK represents sqrt(2) * cos(K*pi/24).
1679 GLOBAL(void)
1680 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1681 JCOEFPTR coef_block,
1682 JSAMPARRAY output_buf, JDIMENSION output_col)
1684 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1685 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1686 INT32 z1, z2, z3, z4;
1687 JCOEFPTR inptr;
1688 ISLOW_MULT_TYPE * quantptr;
1689 int * wsptr;
1690 JSAMPROW outptr;
1691 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1692 int ctr;
1693 int workspace[8*12]; /* buffers data between passes */
1694 SHIFT_TEMPS
1696 /* Pass 1: process columns from input, store into work array. */
1698 inptr = coef_block;
1699 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1700 wsptr = workspace;
1701 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1702 /* Even part */
1704 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1705 z3 <<= CONST_BITS;
1706 /* Add fudge factor here for final descale. */
1707 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1709 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1710 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1712 tmp10 = z3 + z4;
1713 tmp11 = z3 - z4;
1715 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1716 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1717 z1 <<= CONST_BITS;
1718 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1719 z2 <<= CONST_BITS;
1721 tmp12 = z1 - z2;
1723 tmp21 = z3 + tmp12;
1724 tmp24 = z3 - tmp12;
1726 tmp12 = z4 + z2;
1728 tmp20 = tmp10 + tmp12;
1729 tmp25 = tmp10 - tmp12;
1731 tmp12 = z4 - z1 - z2;
1733 tmp22 = tmp11 + tmp12;
1734 tmp23 = tmp11 - tmp12;
1736 /* Odd part */
1738 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1739 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1740 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1741 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1743 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1744 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1746 tmp10 = z1 + z3;
1747 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1748 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1749 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1750 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1751 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1752 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1753 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1754 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1756 z1 -= z4;
1757 z2 -= z3;
1758 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1759 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1760 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1762 /* Final output stage */
1764 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1765 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1766 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1767 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1768 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1769 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1770 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1771 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1772 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1773 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1774 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1775 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1778 /* Pass 2: process 12 rows from work array, store into output array. */
1780 wsptr = workspace;
1781 for (ctr = 0; ctr < 12; ctr++) {
1782 outptr = output_buf[ctr] + output_col;
1784 /* Even part */
1786 /* Add range center and fudge factor for final descale and range-limit. */
1787 z3 = (INT32) wsptr[0] +
1788 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1789 (ONE << (PASS1_BITS+2)));
1790 z3 <<= CONST_BITS;
1792 z4 = (INT32) wsptr[4];
1793 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1795 tmp10 = z3 + z4;
1796 tmp11 = z3 - z4;
1798 z1 = (INT32) wsptr[2];
1799 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1800 z1 <<= CONST_BITS;
1801 z2 = (INT32) wsptr[6];
1802 z2 <<= CONST_BITS;
1804 tmp12 = z1 - z2;
1806 tmp21 = z3 + tmp12;
1807 tmp24 = z3 - tmp12;
1809 tmp12 = z4 + z2;
1811 tmp20 = tmp10 + tmp12;
1812 tmp25 = tmp10 - tmp12;
1814 tmp12 = z4 - z1 - z2;
1816 tmp22 = tmp11 + tmp12;
1817 tmp23 = tmp11 - tmp12;
1819 /* Odd part */
1821 z1 = (INT32) wsptr[1];
1822 z2 = (INT32) wsptr[3];
1823 z3 = (INT32) wsptr[5];
1824 z4 = (INT32) wsptr[7];
1826 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1827 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1829 tmp10 = z1 + z3;
1830 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1831 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1832 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1833 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1834 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1835 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1836 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1837 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1839 z1 -= z4;
1840 z2 -= z3;
1841 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1842 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1843 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1845 /* Final output stage */
1847 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1848 CONST_BITS+PASS1_BITS+3)
1849 & RANGE_MASK];
1850 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1851 CONST_BITS+PASS1_BITS+3)
1852 & RANGE_MASK];
1853 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1854 CONST_BITS+PASS1_BITS+3)
1855 & RANGE_MASK];
1856 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1857 CONST_BITS+PASS1_BITS+3)
1858 & RANGE_MASK];
1859 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1860 CONST_BITS+PASS1_BITS+3)
1861 & RANGE_MASK];
1862 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1863 CONST_BITS+PASS1_BITS+3)
1864 & RANGE_MASK];
1865 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1866 CONST_BITS+PASS1_BITS+3)
1867 & RANGE_MASK];
1868 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1869 CONST_BITS+PASS1_BITS+3)
1870 & RANGE_MASK];
1871 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1872 CONST_BITS+PASS1_BITS+3)
1873 & RANGE_MASK];
1874 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1875 CONST_BITS+PASS1_BITS+3)
1876 & RANGE_MASK];
1877 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1878 CONST_BITS+PASS1_BITS+3)
1879 & RANGE_MASK];
1880 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1881 CONST_BITS+PASS1_BITS+3)
1882 & RANGE_MASK];
1884 wsptr += 8; /* advance pointer to next row */
1890 * Perform dequantization and inverse DCT on one block of coefficients,
1891 * producing a 13x13 output block.
1893 * Optimized algorithm with 29 multiplications in the 1-D kernel.
1894 * cK represents sqrt(2) * cos(K*pi/26).
1897 GLOBAL(void)
1898 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1899 JCOEFPTR coef_block,
1900 JSAMPARRAY output_buf, JDIMENSION output_col)
1902 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1903 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1904 INT32 z1, z2, z3, z4;
1905 JCOEFPTR inptr;
1906 ISLOW_MULT_TYPE * quantptr;
1907 int * wsptr;
1908 JSAMPROW outptr;
1909 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1910 int ctr;
1911 int workspace[8*13]; /* buffers data between passes */
1912 SHIFT_TEMPS
1914 /* Pass 1: process columns from input, store into work array. */
1916 inptr = coef_block;
1917 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1918 wsptr = workspace;
1919 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1920 /* Even part */
1922 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1923 z1 <<= CONST_BITS;
1924 /* Add fudge factor here for final descale. */
1925 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1927 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1928 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1929 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1931 tmp10 = z3 + z4;
1932 tmp11 = z3 - z4;
1934 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1935 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1937 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1938 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1940 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1941 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1943 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1944 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1946 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1947 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1949 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1950 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1952 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1954 /* Odd part */
1956 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1957 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1958 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1959 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1961 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1962 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1963 tmp15 = z1 + z4;
1964 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1965 tmp10 = tmp11 + tmp12 + tmp13 -
1966 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1967 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
1968 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1969 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1970 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
1971 tmp11 += tmp14;
1972 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1973 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
1974 tmp12 += tmp14;
1975 tmp13 += tmp14;
1976 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1977 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1978 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1979 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1980 tmp14 += z1;
1981 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1982 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1984 /* Final output stage */
1986 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1987 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1988 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1989 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1990 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1991 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1992 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1993 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1994 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1995 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1996 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1997 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1998 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
2001 /* Pass 2: process 13 rows from work array, store into output array. */
2003 wsptr = workspace;
2004 for (ctr = 0; ctr < 13; ctr++) {
2005 outptr = output_buf[ctr] + output_col;
2007 /* Even part */
2009 /* Add range center and fudge factor for final descale and range-limit. */
2010 z1 = (INT32) wsptr[0] +
2011 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2012 (ONE << (PASS1_BITS+2)));
2013 z1 <<= CONST_BITS;
2015 z2 = (INT32) wsptr[2];
2016 z3 = (INT32) wsptr[4];
2017 z4 = (INT32) wsptr[6];
2019 tmp10 = z3 + z4;
2020 tmp11 = z3 - z4;
2022 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2023 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2025 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2026 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2028 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2029 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2031 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2032 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2034 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2035 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2037 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2038 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2040 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2042 /* Odd part */
2044 z1 = (INT32) wsptr[1];
2045 z2 = (INT32) wsptr[3];
2046 z3 = (INT32) wsptr[5];
2047 z4 = (INT32) wsptr[7];
2049 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2050 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2051 tmp15 = z1 + z4;
2052 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2053 tmp10 = tmp11 + tmp12 + tmp13 -
2054 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2055 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2056 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2057 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2058 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2059 tmp11 += tmp14;
2060 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2061 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2062 tmp12 += tmp14;
2063 tmp13 += tmp14;
2064 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2065 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2066 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2067 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2068 tmp14 += z1;
2069 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2070 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2072 /* Final output stage */
2074 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2075 CONST_BITS+PASS1_BITS+3)
2076 & RANGE_MASK];
2077 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2078 CONST_BITS+PASS1_BITS+3)
2079 & RANGE_MASK];
2080 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2081 CONST_BITS+PASS1_BITS+3)
2082 & RANGE_MASK];
2083 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2084 CONST_BITS+PASS1_BITS+3)
2085 & RANGE_MASK];
2086 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2087 CONST_BITS+PASS1_BITS+3)
2088 & RANGE_MASK];
2089 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2090 CONST_BITS+PASS1_BITS+3)
2091 & RANGE_MASK];
2092 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2093 CONST_BITS+PASS1_BITS+3)
2094 & RANGE_MASK];
2095 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2096 CONST_BITS+PASS1_BITS+3)
2097 & RANGE_MASK];
2098 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2099 CONST_BITS+PASS1_BITS+3)
2100 & RANGE_MASK];
2101 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2102 CONST_BITS+PASS1_BITS+3)
2103 & RANGE_MASK];
2104 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2105 CONST_BITS+PASS1_BITS+3)
2106 & RANGE_MASK];
2107 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2108 CONST_BITS+PASS1_BITS+3)
2109 & RANGE_MASK];
2110 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2111 CONST_BITS+PASS1_BITS+3)
2112 & RANGE_MASK];
2114 wsptr += 8; /* advance pointer to next row */
2120 * Perform dequantization and inverse DCT on one block of coefficients,
2121 * producing a 14x14 output block.
2123 * Optimized algorithm with 20 multiplications in the 1-D kernel.
2124 * cK represents sqrt(2) * cos(K*pi/28).
2127 GLOBAL(void)
2128 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2129 JCOEFPTR coef_block,
2130 JSAMPARRAY output_buf, JDIMENSION output_col)
2132 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2133 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2134 INT32 z1, z2, z3, z4;
2135 JCOEFPTR inptr;
2136 ISLOW_MULT_TYPE * quantptr;
2137 int * wsptr;
2138 JSAMPROW outptr;
2139 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2140 int ctr;
2141 int workspace[8*14]; /* buffers data between passes */
2142 SHIFT_TEMPS
2144 /* Pass 1: process columns from input, store into work array. */
2146 inptr = coef_block;
2147 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2148 wsptr = workspace;
2149 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2150 /* Even part */
2152 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2153 z1 <<= CONST_BITS;
2154 /* Add fudge factor here for final descale. */
2155 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2156 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2157 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2158 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2159 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2161 tmp10 = z1 + z2;
2162 tmp11 = z1 + z3;
2163 tmp12 = z1 - z4;
2165 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2166 CONST_BITS-PASS1_BITS);
2168 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2169 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2171 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2173 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2174 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2175 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2176 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2178 tmp20 = tmp10 + tmp13;
2179 tmp26 = tmp10 - tmp13;
2180 tmp21 = tmp11 + tmp14;
2181 tmp25 = tmp11 - tmp14;
2182 tmp22 = tmp12 + tmp15;
2183 tmp24 = tmp12 - tmp15;
2185 /* Odd part */
2187 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2188 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2189 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2190 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2191 tmp13 = z4 << CONST_BITS;
2193 tmp14 = z1 + z3;
2194 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2195 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2196 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2197 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2198 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2199 z1 -= z2;
2200 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2201 tmp16 += tmp15;
2202 z1 += z4;
2203 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2204 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2205 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2206 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2207 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2208 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2210 tmp13 = (z1 - z3) << PASS1_BITS;
2212 /* Final output stage */
2214 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2215 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2216 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2217 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2218 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2219 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2220 wsptr[8*3] = (int) (tmp23 + tmp13);
2221 wsptr[8*10] = (int) (tmp23 - tmp13);
2222 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2223 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2224 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2225 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2226 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2227 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2230 /* Pass 2: process 14 rows from work array, store into output array. */
2232 wsptr = workspace;
2233 for (ctr = 0; ctr < 14; ctr++) {
2234 outptr = output_buf[ctr] + output_col;
2236 /* Even part */
2238 /* Add range center and fudge factor for final descale and range-limit. */
2239 z1 = (INT32) wsptr[0] +
2240 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2241 (ONE << (PASS1_BITS+2)));
2242 z1 <<= CONST_BITS;
2243 z4 = (INT32) wsptr[4];
2244 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2245 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2246 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2248 tmp10 = z1 + z2;
2249 tmp11 = z1 + z3;
2250 tmp12 = z1 - z4;
2252 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2254 z1 = (INT32) wsptr[2];
2255 z2 = (INT32) wsptr[6];
2257 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2259 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2260 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2261 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2262 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2264 tmp20 = tmp10 + tmp13;
2265 tmp26 = tmp10 - tmp13;
2266 tmp21 = tmp11 + tmp14;
2267 tmp25 = tmp11 - tmp14;
2268 tmp22 = tmp12 + tmp15;
2269 tmp24 = tmp12 - tmp15;
2271 /* Odd part */
2273 z1 = (INT32) wsptr[1];
2274 z2 = (INT32) wsptr[3];
2275 z3 = (INT32) wsptr[5];
2276 z4 = (INT32) wsptr[7];
2277 z4 <<= CONST_BITS;
2279 tmp14 = z1 + z3;
2280 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2281 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2282 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2283 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2284 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2285 z1 -= z2;
2286 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2287 tmp16 += tmp15;
2288 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2289 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2290 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2291 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2292 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2293 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2295 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2297 /* Final output stage */
2299 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2300 CONST_BITS+PASS1_BITS+3)
2301 & RANGE_MASK];
2302 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2303 CONST_BITS+PASS1_BITS+3)
2304 & RANGE_MASK];
2305 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2306 CONST_BITS+PASS1_BITS+3)
2307 & RANGE_MASK];
2308 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2309 CONST_BITS+PASS1_BITS+3)
2310 & RANGE_MASK];
2311 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2312 CONST_BITS+PASS1_BITS+3)
2313 & RANGE_MASK];
2314 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2315 CONST_BITS+PASS1_BITS+3)
2316 & RANGE_MASK];
2317 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2318 CONST_BITS+PASS1_BITS+3)
2319 & RANGE_MASK];
2320 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2321 CONST_BITS+PASS1_BITS+3)
2322 & RANGE_MASK];
2323 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2324 CONST_BITS+PASS1_BITS+3)
2325 & RANGE_MASK];
2326 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2327 CONST_BITS+PASS1_BITS+3)
2328 & RANGE_MASK];
2329 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2330 CONST_BITS+PASS1_BITS+3)
2331 & RANGE_MASK];
2332 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2333 CONST_BITS+PASS1_BITS+3)
2334 & RANGE_MASK];
2335 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2336 CONST_BITS+PASS1_BITS+3)
2337 & RANGE_MASK];
2338 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2339 CONST_BITS+PASS1_BITS+3)
2340 & RANGE_MASK];
2342 wsptr += 8; /* advance pointer to next row */
2348 * Perform dequantization and inverse DCT on one block of coefficients,
2349 * producing a 15x15 output block.
2351 * Optimized algorithm with 22 multiplications in the 1-D kernel.
2352 * cK represents sqrt(2) * cos(K*pi/30).
2355 GLOBAL(void)
2356 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2357 JCOEFPTR coef_block,
2358 JSAMPARRAY output_buf, JDIMENSION output_col)
2360 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2361 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2362 INT32 z1, z2, z3, z4;
2363 JCOEFPTR inptr;
2364 ISLOW_MULT_TYPE * quantptr;
2365 int * wsptr;
2366 JSAMPROW outptr;
2367 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2368 int ctr;
2369 int workspace[8*15]; /* buffers data between passes */
2370 SHIFT_TEMPS
2372 /* Pass 1: process columns from input, store into work array. */
2374 inptr = coef_block;
2375 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2376 wsptr = workspace;
2377 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2378 /* Even part */
2380 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2381 z1 <<= CONST_BITS;
2382 /* Add fudge factor here for final descale. */
2383 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2385 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2386 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2387 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2389 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2390 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2392 tmp12 = z1 - tmp10;
2393 tmp13 = z1 + tmp11;
2394 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2396 z4 = z2 - z3;
2397 z3 += z2;
2398 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2399 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2400 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2402 tmp20 = tmp13 + tmp10 + tmp11;
2403 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2405 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2406 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2408 tmp25 = tmp13 - tmp10 - tmp11;
2409 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2411 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2412 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2414 tmp21 = tmp12 + tmp10 + tmp11;
2415 tmp24 = tmp13 - tmp10 + tmp11;
2416 tmp11 += tmp11;
2417 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2418 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2420 /* Odd part */
2422 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2423 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2424 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2425 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2426 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2428 tmp13 = z2 - z4;
2429 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2430 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2431 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2433 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2434 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2435 z2 = z1 - z4;
2436 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2438 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2439 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2440 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2441 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2442 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2443 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2445 /* Final output stage */
2447 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2448 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2449 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2450 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2451 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2452 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2453 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2454 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2455 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2456 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2457 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2458 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2459 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2460 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2461 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2464 /* Pass 2: process 15 rows from work array, store into output array. */
2466 wsptr = workspace;
2467 for (ctr = 0; ctr < 15; ctr++) {
2468 outptr = output_buf[ctr] + output_col;
2470 /* Even part */
2472 /* Add range center and fudge factor for final descale and range-limit. */
2473 z1 = (INT32) wsptr[0] +
2474 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2475 (ONE << (PASS1_BITS+2)));
2476 z1 <<= CONST_BITS;
2478 z2 = (INT32) wsptr[2];
2479 z3 = (INT32) wsptr[4];
2480 z4 = (INT32) wsptr[6];
2482 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2483 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2485 tmp12 = z1 - tmp10;
2486 tmp13 = z1 + tmp11;
2487 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2489 z4 = z2 - z3;
2490 z3 += z2;
2491 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2492 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2493 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2495 tmp20 = tmp13 + tmp10 + tmp11;
2496 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2498 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2499 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2501 tmp25 = tmp13 - tmp10 - tmp11;
2502 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2504 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2505 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2507 tmp21 = tmp12 + tmp10 + tmp11;
2508 tmp24 = tmp13 - tmp10 + tmp11;
2509 tmp11 += tmp11;
2510 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2511 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2513 /* Odd part */
2515 z1 = (INT32) wsptr[1];
2516 z2 = (INT32) wsptr[3];
2517 z4 = (INT32) wsptr[5];
2518 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2519 z4 = (INT32) wsptr[7];
2521 tmp13 = z2 - z4;
2522 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2523 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2524 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2526 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2527 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2528 z2 = z1 - z4;
2529 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2531 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2532 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2533 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2534 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2535 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2536 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2538 /* Final output stage */
2540 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2541 CONST_BITS+PASS1_BITS+3)
2542 & RANGE_MASK];
2543 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2544 CONST_BITS+PASS1_BITS+3)
2545 & RANGE_MASK];
2546 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2547 CONST_BITS+PASS1_BITS+3)
2548 & RANGE_MASK];
2549 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2550 CONST_BITS+PASS1_BITS+3)
2551 & RANGE_MASK];
2552 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2553 CONST_BITS+PASS1_BITS+3)
2554 & RANGE_MASK];
2555 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2556 CONST_BITS+PASS1_BITS+3)
2557 & RANGE_MASK];
2558 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2559 CONST_BITS+PASS1_BITS+3)
2560 & RANGE_MASK];
2561 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2562 CONST_BITS+PASS1_BITS+3)
2563 & RANGE_MASK];
2564 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2565 CONST_BITS+PASS1_BITS+3)
2566 & RANGE_MASK];
2567 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2568 CONST_BITS+PASS1_BITS+3)
2569 & RANGE_MASK];
2570 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2571 CONST_BITS+PASS1_BITS+3)
2572 & RANGE_MASK];
2573 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2574 CONST_BITS+PASS1_BITS+3)
2575 & RANGE_MASK];
2576 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2577 CONST_BITS+PASS1_BITS+3)
2578 & RANGE_MASK];
2579 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2580 CONST_BITS+PASS1_BITS+3)
2581 & RANGE_MASK];
2582 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2583 CONST_BITS+PASS1_BITS+3)
2584 & RANGE_MASK];
2586 wsptr += 8; /* advance pointer to next row */
2592 * Perform dequantization and inverse DCT on one block of coefficients,
2593 * producing a 16x16 output block.
2595 * Optimized algorithm with 28 multiplications in the 1-D kernel.
2596 * cK represents sqrt(2) * cos(K*pi/32).
2599 GLOBAL(void)
2600 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2601 JCOEFPTR coef_block,
2602 JSAMPARRAY output_buf, JDIMENSION output_col)
2604 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2605 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2606 INT32 z1, z2, z3, z4;
2607 JCOEFPTR inptr;
2608 ISLOW_MULT_TYPE * quantptr;
2609 int * wsptr;
2610 JSAMPROW outptr;
2611 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2612 int ctr;
2613 int workspace[8*16]; /* buffers data between passes */
2614 SHIFT_TEMPS
2616 /* Pass 1: process columns from input, store into work array. */
2618 inptr = coef_block;
2619 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2620 wsptr = workspace;
2621 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2622 /* Even part */
2624 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2625 tmp0 <<= CONST_BITS;
2626 /* Add fudge factor here for final descale. */
2627 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2629 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2630 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2631 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2633 tmp10 = tmp0 + tmp1;
2634 tmp11 = tmp0 - tmp1;
2635 tmp12 = tmp0 + tmp2;
2636 tmp13 = tmp0 - tmp2;
2638 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2639 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2640 z3 = z1 - z2;
2641 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2642 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2644 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2645 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2646 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2647 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2649 tmp20 = tmp10 + tmp0;
2650 tmp27 = tmp10 - tmp0;
2651 tmp21 = tmp12 + tmp1;
2652 tmp26 = tmp12 - tmp1;
2653 tmp22 = tmp13 + tmp2;
2654 tmp25 = tmp13 - tmp2;
2655 tmp23 = tmp11 + tmp3;
2656 tmp24 = tmp11 - tmp3;
2658 /* Odd part */
2660 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2661 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2662 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2663 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2665 tmp11 = z1 + z3;
2667 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2668 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2669 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2670 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2671 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2672 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2673 tmp0 = tmp1 + tmp2 + tmp3 -
2674 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2675 tmp13 = tmp10 + tmp11 + tmp12 -
2676 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2677 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2678 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2679 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2680 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2681 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2682 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2683 z2 += z4;
2684 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2685 tmp1 += z1;
2686 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2687 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2688 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2689 tmp12 += z2;
2690 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2691 tmp2 += z2;
2692 tmp3 += z2;
2693 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2694 tmp10 += z2;
2695 tmp11 += z2;
2697 /* Final output stage */
2699 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2700 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2701 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2702 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2703 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2704 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2705 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2706 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2707 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2708 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2709 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2710 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2711 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2712 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2713 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2714 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2717 /* Pass 2: process 16 rows from work array, store into output array. */
2719 wsptr = workspace;
2720 for (ctr = 0; ctr < 16; ctr++) {
2721 outptr = output_buf[ctr] + output_col;
2723 /* Even part */
2725 /* Add range center and fudge factor for final descale and range-limit. */
2726 tmp0 = (INT32) wsptr[0] +
2727 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2728 (ONE << (PASS1_BITS+2)));
2729 tmp0 <<= CONST_BITS;
2731 z1 = (INT32) wsptr[4];
2732 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2733 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2735 tmp10 = tmp0 + tmp1;
2736 tmp11 = tmp0 - tmp1;
2737 tmp12 = tmp0 + tmp2;
2738 tmp13 = tmp0 - tmp2;
2740 z1 = (INT32) wsptr[2];
2741 z2 = (INT32) wsptr[6];
2742 z3 = z1 - z2;
2743 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2744 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2746 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2747 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2748 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2749 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2751 tmp20 = tmp10 + tmp0;
2752 tmp27 = tmp10 - tmp0;
2753 tmp21 = tmp12 + tmp1;
2754 tmp26 = tmp12 - tmp1;
2755 tmp22 = tmp13 + tmp2;
2756 tmp25 = tmp13 - tmp2;
2757 tmp23 = tmp11 + tmp3;
2758 tmp24 = tmp11 - tmp3;
2760 /* Odd part */
2762 z1 = (INT32) wsptr[1];
2763 z2 = (INT32) wsptr[3];
2764 z3 = (INT32) wsptr[5];
2765 z4 = (INT32) wsptr[7];
2767 tmp11 = z1 + z3;
2769 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2770 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2771 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2772 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2773 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2774 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2775 tmp0 = tmp1 + tmp2 + tmp3 -
2776 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2777 tmp13 = tmp10 + tmp11 + tmp12 -
2778 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2779 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2780 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2781 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2782 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2783 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2784 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2785 z2 += z4;
2786 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2787 tmp1 += z1;
2788 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2789 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2790 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2791 tmp12 += z2;
2792 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2793 tmp2 += z2;
2794 tmp3 += z2;
2795 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2796 tmp10 += z2;
2797 tmp11 += z2;
2799 /* Final output stage */
2801 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2802 CONST_BITS+PASS1_BITS+3)
2803 & RANGE_MASK];
2804 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2805 CONST_BITS+PASS1_BITS+3)
2806 & RANGE_MASK];
2807 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2808 CONST_BITS+PASS1_BITS+3)
2809 & RANGE_MASK];
2810 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2811 CONST_BITS+PASS1_BITS+3)
2812 & RANGE_MASK];
2813 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2814 CONST_BITS+PASS1_BITS+3)
2815 & RANGE_MASK];
2816 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2817 CONST_BITS+PASS1_BITS+3)
2818 & RANGE_MASK];
2819 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2820 CONST_BITS+PASS1_BITS+3)
2821 & RANGE_MASK];
2822 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2823 CONST_BITS+PASS1_BITS+3)
2824 & RANGE_MASK];
2825 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2826 CONST_BITS+PASS1_BITS+3)
2827 & RANGE_MASK];
2828 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2829 CONST_BITS+PASS1_BITS+3)
2830 & RANGE_MASK];
2831 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2832 CONST_BITS+PASS1_BITS+3)
2833 & RANGE_MASK];
2834 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2835 CONST_BITS+PASS1_BITS+3)
2836 & RANGE_MASK];
2837 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2838 CONST_BITS+PASS1_BITS+3)
2839 & RANGE_MASK];
2840 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2841 CONST_BITS+PASS1_BITS+3)
2842 & RANGE_MASK];
2843 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2844 CONST_BITS+PASS1_BITS+3)
2845 & RANGE_MASK];
2846 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2847 CONST_BITS+PASS1_BITS+3)
2848 & RANGE_MASK];
2850 wsptr += 8; /* advance pointer to next row */
2856 * Perform dequantization and inverse DCT on one block of coefficients,
2857 * producing a 16x8 output block.
2859 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2862 GLOBAL(void)
2863 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2864 JCOEFPTR coef_block,
2865 JSAMPARRAY output_buf, JDIMENSION output_col)
2867 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2868 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2869 INT32 z1, z2, z3, z4;
2870 JCOEFPTR inptr;
2871 ISLOW_MULT_TYPE * quantptr;
2872 int * wsptr;
2873 JSAMPROW outptr;
2874 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2875 int ctr;
2876 int workspace[8*8]; /* buffers data between passes */
2877 SHIFT_TEMPS
2879 /* Pass 1: process columns from input, store into work array.
2880 * Note results are scaled up by sqrt(8) compared to a true IDCT;
2881 * furthermore, we scale the results by 2**PASS1_BITS.
2882 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2885 inptr = coef_block;
2886 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2887 wsptr = workspace;
2888 for (ctr = DCTSIZE; ctr > 0; ctr--) {
2889 /* Due to quantization, we will usually find that many of the input
2890 * coefficients are zero, especially the AC terms. We can exploit this
2891 * by short-circuiting the IDCT calculation for any column in which all
2892 * the AC terms are zero. In that case each output is equal to the
2893 * DC coefficient (with scale factor as needed).
2894 * With typical images and quantization tables, half or more of the
2895 * column DCT calculations can be simplified this way.
2898 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2899 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2900 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2901 inptr[DCTSIZE*7] == 0) {
2902 /* AC terms all zero */
2903 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
2905 wsptr[DCTSIZE*0] = dcval;
2906 wsptr[DCTSIZE*1] = dcval;
2907 wsptr[DCTSIZE*2] = dcval;
2908 wsptr[DCTSIZE*3] = dcval;
2909 wsptr[DCTSIZE*4] = dcval;
2910 wsptr[DCTSIZE*5] = dcval;
2911 wsptr[DCTSIZE*6] = dcval;
2912 wsptr[DCTSIZE*7] = dcval;
2914 inptr++; /* advance pointers to next column */
2915 quantptr++;
2916 wsptr++;
2917 continue;
2920 /* Even part: reverse the even part of the forward DCT.
2921 * The rotator is c(-6).
2924 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2925 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2926 z2 <<= CONST_BITS;
2927 z3 <<= CONST_BITS;
2928 /* Add fudge factor here for final descale. */
2929 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2931 tmp0 = z2 + z3;
2932 tmp1 = z2 - z3;
2934 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2935 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2937 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
2938 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
2939 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
2941 tmp10 = tmp0 + tmp2;
2942 tmp13 = tmp0 - tmp2;
2943 tmp11 = tmp1 + tmp3;
2944 tmp12 = tmp1 - tmp3;
2946 /* Odd part per figure 8; the matrix is unitary and hence its
2947 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2950 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2951 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2952 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2953 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2955 z2 = tmp0 + tmp2;
2956 z3 = tmp1 + tmp3;
2958 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
2959 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
2960 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
2961 z2 += z1;
2962 z3 += z1;
2964 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2965 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
2966 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
2967 tmp0 += z1 + z2;
2968 tmp3 += z1 + z3;
2970 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2971 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
2972 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
2973 tmp1 += z1 + z3;
2974 tmp2 += z1 + z2;
2976 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2978 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2979 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2980 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2981 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2982 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2983 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2984 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2985 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2987 inptr++; /* advance pointers to next column */
2988 quantptr++;
2989 wsptr++;
2992 /* Pass 2: process 8 rows from work array, store into output array.
2993 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2996 wsptr = workspace;
2997 for (ctr = 0; ctr < 8; ctr++) {
2998 outptr = output_buf[ctr] + output_col;
3000 /* Even part */
3002 /* Add range center and fudge factor for final descale and range-limit. */
3003 tmp0 = (INT32) wsptr[0] +
3004 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3005 (ONE << (PASS1_BITS+2)));
3006 tmp0 <<= CONST_BITS;
3008 z1 = (INT32) wsptr[4];
3009 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3010 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3012 tmp10 = tmp0 + tmp1;
3013 tmp11 = tmp0 - tmp1;
3014 tmp12 = tmp0 + tmp2;
3015 tmp13 = tmp0 - tmp2;
3017 z1 = (INT32) wsptr[2];
3018 z2 = (INT32) wsptr[6];
3019 z3 = z1 - z2;
3020 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3021 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3023 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3024 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3025 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3026 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3028 tmp20 = tmp10 + tmp0;
3029 tmp27 = tmp10 - tmp0;
3030 tmp21 = tmp12 + tmp1;
3031 tmp26 = tmp12 - tmp1;
3032 tmp22 = tmp13 + tmp2;
3033 tmp25 = tmp13 - tmp2;
3034 tmp23 = tmp11 + tmp3;
3035 tmp24 = tmp11 - tmp3;
3037 /* Odd part */
3039 z1 = (INT32) wsptr[1];
3040 z2 = (INT32) wsptr[3];
3041 z3 = (INT32) wsptr[5];
3042 z4 = (INT32) wsptr[7];
3044 tmp11 = z1 + z3;
3046 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3047 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3048 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3049 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3050 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3051 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3052 tmp0 = tmp1 + tmp2 + tmp3 -
3053 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3054 tmp13 = tmp10 + tmp11 + tmp12 -
3055 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3056 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3057 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3058 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3059 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3060 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3061 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3062 z2 += z4;
3063 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3064 tmp1 += z1;
3065 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3066 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3067 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3068 tmp12 += z2;
3069 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3070 tmp2 += z2;
3071 tmp3 += z2;
3072 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3073 tmp10 += z2;
3074 tmp11 += z2;
3076 /* Final output stage */
3078 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3079 CONST_BITS+PASS1_BITS+3)
3080 & RANGE_MASK];
3081 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3082 CONST_BITS+PASS1_BITS+3)
3083 & RANGE_MASK];
3084 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3085 CONST_BITS+PASS1_BITS+3)
3086 & RANGE_MASK];
3087 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3088 CONST_BITS+PASS1_BITS+3)
3089 & RANGE_MASK];
3090 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3091 CONST_BITS+PASS1_BITS+3)
3092 & RANGE_MASK];
3093 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3094 CONST_BITS+PASS1_BITS+3)
3095 & RANGE_MASK];
3096 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3097 CONST_BITS+PASS1_BITS+3)
3098 & RANGE_MASK];
3099 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3100 CONST_BITS+PASS1_BITS+3)
3101 & RANGE_MASK];
3102 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3103 CONST_BITS+PASS1_BITS+3)
3104 & RANGE_MASK];
3105 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3106 CONST_BITS+PASS1_BITS+3)
3107 & RANGE_MASK];
3108 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3109 CONST_BITS+PASS1_BITS+3)
3110 & RANGE_MASK];
3111 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3112 CONST_BITS+PASS1_BITS+3)
3113 & RANGE_MASK];
3114 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3115 CONST_BITS+PASS1_BITS+3)
3116 & RANGE_MASK];
3117 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3118 CONST_BITS+PASS1_BITS+3)
3119 & RANGE_MASK];
3120 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3121 CONST_BITS+PASS1_BITS+3)
3122 & RANGE_MASK];
3123 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3124 CONST_BITS+PASS1_BITS+3)
3125 & RANGE_MASK];
3127 wsptr += 8; /* advance pointer to next row */
3133 * Perform dequantization and inverse DCT on one block of coefficients,
3134 * producing a 14x7 output block.
3136 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3139 GLOBAL(void)
3140 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3141 JCOEFPTR coef_block,
3142 JSAMPARRAY output_buf, JDIMENSION output_col)
3144 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3145 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3146 INT32 z1, z2, z3, z4;
3147 JCOEFPTR inptr;
3148 ISLOW_MULT_TYPE * quantptr;
3149 int * wsptr;
3150 JSAMPROW outptr;
3151 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3152 int ctr;
3153 int workspace[8*7]; /* buffers data between passes */
3154 SHIFT_TEMPS
3156 /* Pass 1: process columns from input, store into work array.
3157 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3160 inptr = coef_block;
3161 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3162 wsptr = workspace;
3163 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3164 /* Even part */
3166 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3167 tmp23 <<= CONST_BITS;
3168 /* Add fudge factor here for final descale. */
3169 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3171 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3172 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3173 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3175 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3176 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3177 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3178 tmp10 = z1 + z3;
3179 z2 -= tmp10;
3180 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3181 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3182 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3183 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3185 /* Odd part */
3187 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3188 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3189 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3191 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3192 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3193 tmp10 = tmp11 - tmp12;
3194 tmp11 += tmp12;
3195 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3196 tmp11 += tmp12;
3197 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3198 tmp10 += z2;
3199 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3201 /* Final output stage */
3203 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3204 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3205 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3206 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3207 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3208 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3209 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3212 /* Pass 2: process 7 rows from work array, store into output array.
3213 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3216 wsptr = workspace;
3217 for (ctr = 0; ctr < 7; ctr++) {
3218 outptr = output_buf[ctr] + output_col;
3220 /* Even part */
3222 /* Add range center and fudge factor for final descale and range-limit. */
3223 z1 = (INT32) wsptr[0] +
3224 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3225 (ONE << (PASS1_BITS+2)));
3226 z1 <<= CONST_BITS;
3227 z4 = (INT32) wsptr[4];
3228 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3229 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3230 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3232 tmp10 = z1 + z2;
3233 tmp11 = z1 + z3;
3234 tmp12 = z1 - z4;
3236 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3238 z1 = (INT32) wsptr[2];
3239 z2 = (INT32) wsptr[6];
3241 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3243 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3244 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3245 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3246 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3248 tmp20 = tmp10 + tmp13;
3249 tmp26 = tmp10 - tmp13;
3250 tmp21 = tmp11 + tmp14;
3251 tmp25 = tmp11 - tmp14;
3252 tmp22 = tmp12 + tmp15;
3253 tmp24 = tmp12 - tmp15;
3255 /* Odd part */
3257 z1 = (INT32) wsptr[1];
3258 z2 = (INT32) wsptr[3];
3259 z3 = (INT32) wsptr[5];
3260 z4 = (INT32) wsptr[7];
3261 z4 <<= CONST_BITS;
3263 tmp14 = z1 + z3;
3264 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3265 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3266 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3267 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3268 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3269 z1 -= z2;
3270 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3271 tmp16 += tmp15;
3272 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3273 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3274 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3275 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3276 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3277 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3279 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3281 /* Final output stage */
3283 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3284 CONST_BITS+PASS1_BITS+3)
3285 & RANGE_MASK];
3286 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3287 CONST_BITS+PASS1_BITS+3)
3288 & RANGE_MASK];
3289 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3290 CONST_BITS+PASS1_BITS+3)
3291 & RANGE_MASK];
3292 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3293 CONST_BITS+PASS1_BITS+3)
3294 & RANGE_MASK];
3295 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3296 CONST_BITS+PASS1_BITS+3)
3297 & RANGE_MASK];
3298 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3299 CONST_BITS+PASS1_BITS+3)
3300 & RANGE_MASK];
3301 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3302 CONST_BITS+PASS1_BITS+3)
3303 & RANGE_MASK];
3304 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3305 CONST_BITS+PASS1_BITS+3)
3306 & RANGE_MASK];
3307 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3308 CONST_BITS+PASS1_BITS+3)
3309 & RANGE_MASK];
3310 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3311 CONST_BITS+PASS1_BITS+3)
3312 & RANGE_MASK];
3313 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3314 CONST_BITS+PASS1_BITS+3)
3315 & RANGE_MASK];
3316 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3317 CONST_BITS+PASS1_BITS+3)
3318 & RANGE_MASK];
3319 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3320 CONST_BITS+PASS1_BITS+3)
3321 & RANGE_MASK];
3322 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3323 CONST_BITS+PASS1_BITS+3)
3324 & RANGE_MASK];
3326 wsptr += 8; /* advance pointer to next row */
3332 * Perform dequantization and inverse DCT on one block of coefficients,
3333 * producing a 12x6 output block.
3335 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3338 GLOBAL(void)
3339 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3340 JCOEFPTR coef_block,
3341 JSAMPARRAY output_buf, JDIMENSION output_col)
3343 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3344 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3345 INT32 z1, z2, z3, z4;
3346 JCOEFPTR inptr;
3347 ISLOW_MULT_TYPE * quantptr;
3348 int * wsptr;
3349 JSAMPROW outptr;
3350 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3351 int ctr;
3352 int workspace[8*6]; /* buffers data between passes */
3353 SHIFT_TEMPS
3355 /* Pass 1: process columns from input, store into work array.
3356 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3359 inptr = coef_block;
3360 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3361 wsptr = workspace;
3362 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3363 /* Even part */
3365 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3366 tmp10 <<= CONST_BITS;
3367 /* Add fudge factor here for final descale. */
3368 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3369 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3370 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3371 tmp11 = tmp10 + tmp20;
3372 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3373 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3374 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3375 tmp20 = tmp11 + tmp10;
3376 tmp22 = tmp11 - tmp10;
3378 /* Odd part */
3380 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3381 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3382 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3383 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3384 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3385 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3386 tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3388 /* Final output stage */
3390 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3391 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3392 wsptr[8*1] = (int) (tmp21 + tmp11);
3393 wsptr[8*4] = (int) (tmp21 - tmp11);
3394 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3395 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3398 /* Pass 2: process 6 rows from work array, store into output array.
3399 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3402 wsptr = workspace;
3403 for (ctr = 0; ctr < 6; ctr++) {
3404 outptr = output_buf[ctr] + output_col;
3406 /* Even part */
3408 /* Add range center and fudge factor for final descale and range-limit. */
3409 z3 = (INT32) wsptr[0] +
3410 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3411 (ONE << (PASS1_BITS+2)));
3412 z3 <<= CONST_BITS;
3414 z4 = (INT32) wsptr[4];
3415 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3417 tmp10 = z3 + z4;
3418 tmp11 = z3 - z4;
3420 z1 = (INT32) wsptr[2];
3421 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3422 z1 <<= CONST_BITS;
3423 z2 = (INT32) wsptr[6];
3424 z2 <<= CONST_BITS;
3426 tmp12 = z1 - z2;
3428 tmp21 = z3 + tmp12;
3429 tmp24 = z3 - tmp12;
3431 tmp12 = z4 + z2;
3433 tmp20 = tmp10 + tmp12;
3434 tmp25 = tmp10 - tmp12;
3436 tmp12 = z4 - z1 - z2;
3438 tmp22 = tmp11 + tmp12;
3439 tmp23 = tmp11 - tmp12;
3441 /* Odd part */
3443 z1 = (INT32) wsptr[1];
3444 z2 = (INT32) wsptr[3];
3445 z3 = (INT32) wsptr[5];
3446 z4 = (INT32) wsptr[7];
3448 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3449 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3451 tmp10 = z1 + z3;
3452 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3453 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3454 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3455 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3456 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3457 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3458 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3459 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3461 z1 -= z4;
3462 z2 -= z3;
3463 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3464 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3465 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3467 /* Final output stage */
3469 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3470 CONST_BITS+PASS1_BITS+3)
3471 & RANGE_MASK];
3472 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3473 CONST_BITS+PASS1_BITS+3)
3474 & RANGE_MASK];
3475 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3476 CONST_BITS+PASS1_BITS+3)
3477 & RANGE_MASK];
3478 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3479 CONST_BITS+PASS1_BITS+3)
3480 & RANGE_MASK];
3481 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3482 CONST_BITS+PASS1_BITS+3)
3483 & RANGE_MASK];
3484 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3485 CONST_BITS+PASS1_BITS+3)
3486 & RANGE_MASK];
3487 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3488 CONST_BITS+PASS1_BITS+3)
3489 & RANGE_MASK];
3490 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3491 CONST_BITS+PASS1_BITS+3)
3492 & RANGE_MASK];
3493 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3494 CONST_BITS+PASS1_BITS+3)
3495 & RANGE_MASK];
3496 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3497 CONST_BITS+PASS1_BITS+3)
3498 & RANGE_MASK];
3499 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3500 CONST_BITS+PASS1_BITS+3)
3501 & RANGE_MASK];
3502 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3503 CONST_BITS+PASS1_BITS+3)
3504 & RANGE_MASK];
3506 wsptr += 8; /* advance pointer to next row */
3512 * Perform dequantization and inverse DCT on one block of coefficients,
3513 * producing a 10x5 output block.
3515 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3518 GLOBAL(void)
3519 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3520 JCOEFPTR coef_block,
3521 JSAMPARRAY output_buf, JDIMENSION output_col)
3523 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3524 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3525 INT32 z1, z2, z3, z4;
3526 JCOEFPTR inptr;
3527 ISLOW_MULT_TYPE * quantptr;
3528 int * wsptr;
3529 JSAMPROW outptr;
3530 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3531 int ctr;
3532 int workspace[8*5]; /* buffers data between passes */
3533 SHIFT_TEMPS
3535 /* Pass 1: process columns from input, store into work array.
3536 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3539 inptr = coef_block;
3540 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3541 wsptr = workspace;
3542 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3543 /* Even part */
3545 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3546 tmp12 <<= CONST_BITS;
3547 /* Add fudge factor here for final descale. */
3548 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3549 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3550 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3551 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3552 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3553 z3 = tmp12 + z2;
3554 tmp10 = z3 + z1;
3555 tmp11 = z3 - z1;
3556 tmp12 -= z2 << 2;
3558 /* Odd part */
3560 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3561 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3563 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3564 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3565 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3567 /* Final output stage */
3569 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3570 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3571 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3572 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3573 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3576 /* Pass 2: process 5 rows from work array, store into output array.
3577 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3580 wsptr = workspace;
3581 for (ctr = 0; ctr < 5; ctr++) {
3582 outptr = output_buf[ctr] + output_col;
3584 /* Even part */
3586 /* Add range center and fudge factor for final descale and range-limit. */
3587 z3 = (INT32) wsptr[0] +
3588 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3589 (ONE << (PASS1_BITS+2)));
3590 z3 <<= CONST_BITS;
3591 z4 = (INT32) wsptr[4];
3592 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3593 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3594 tmp10 = z3 + z1;
3595 tmp11 = z3 - z2;
3597 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3599 z2 = (INT32) wsptr[2];
3600 z3 = (INT32) wsptr[6];
3602 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3603 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3604 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3606 tmp20 = tmp10 + tmp12;
3607 tmp24 = tmp10 - tmp12;
3608 tmp21 = tmp11 + tmp13;
3609 tmp23 = tmp11 - tmp13;
3611 /* Odd part */
3613 z1 = (INT32) wsptr[1];
3614 z2 = (INT32) wsptr[3];
3615 z3 = (INT32) wsptr[5];
3616 z3 <<= CONST_BITS;
3617 z4 = (INT32) wsptr[7];
3619 tmp11 = z2 + z4;
3620 tmp13 = z2 - z4;
3622 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3624 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3625 z4 = z3 + tmp12;
3627 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3628 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3630 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3631 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3633 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3635 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3636 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3638 /* Final output stage */
3640 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3641 CONST_BITS+PASS1_BITS+3)
3642 & RANGE_MASK];
3643 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3644 CONST_BITS+PASS1_BITS+3)
3645 & RANGE_MASK];
3646 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3647 CONST_BITS+PASS1_BITS+3)
3648 & RANGE_MASK];
3649 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3650 CONST_BITS+PASS1_BITS+3)
3651 & RANGE_MASK];
3652 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3653 CONST_BITS+PASS1_BITS+3)
3654 & RANGE_MASK];
3655 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3656 CONST_BITS+PASS1_BITS+3)
3657 & RANGE_MASK];
3658 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3659 CONST_BITS+PASS1_BITS+3)
3660 & RANGE_MASK];
3661 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3662 CONST_BITS+PASS1_BITS+3)
3663 & RANGE_MASK];
3664 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3665 CONST_BITS+PASS1_BITS+3)
3666 & RANGE_MASK];
3667 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3668 CONST_BITS+PASS1_BITS+3)
3669 & RANGE_MASK];
3671 wsptr += 8; /* advance pointer to next row */
3677 * Perform dequantization and inverse DCT on one block of coefficients,
3678 * producing an 8x4 output block.
3680 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3683 GLOBAL(void)
3684 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3685 JCOEFPTR coef_block,
3686 JSAMPARRAY output_buf, JDIMENSION output_col)
3688 INT32 tmp0, tmp1, tmp2, tmp3;
3689 INT32 tmp10, tmp11, tmp12, tmp13;
3690 INT32 z1, z2, z3;
3691 JCOEFPTR inptr;
3692 ISLOW_MULT_TYPE * quantptr;
3693 int * wsptr;
3694 JSAMPROW outptr;
3695 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3696 int ctr;
3697 int workspace[8*4]; /* buffers data between passes */
3698 SHIFT_TEMPS
3700 /* Pass 1: process columns from input, store into work array.
3701 * 4-point IDCT kernel,
3702 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3705 inptr = coef_block;
3706 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3707 wsptr = workspace;
3708 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3709 /* Even part */
3711 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3712 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3714 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3715 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3717 /* Odd part */
3718 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3720 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3721 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3723 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3724 /* Add fudge factor here for final descale. */
3725 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3726 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3727 CONST_BITS-PASS1_BITS);
3728 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3729 CONST_BITS-PASS1_BITS);
3731 /* Final output stage */
3733 wsptr[8*0] = (int) (tmp10 + tmp0);
3734 wsptr[8*3] = (int) (tmp10 - tmp0);
3735 wsptr[8*1] = (int) (tmp12 + tmp2);
3736 wsptr[8*2] = (int) (tmp12 - tmp2);
3739 /* Pass 2: process rows from work array, store into output array.
3740 * Note that we must descale the results by a factor of 8 == 2**3,
3741 * and also undo the PASS1_BITS scaling.
3742 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3745 wsptr = workspace;
3746 for (ctr = 0; ctr < 4; ctr++) {
3747 outptr = output_buf[ctr] + output_col;
3749 /* Even part: reverse the even part of the forward DCT.
3750 * The rotator is c(-6).
3753 /* Add range center and fudge factor for final descale and range-limit. */
3754 z2 = (INT32) wsptr[0] +
3755 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3756 (ONE << (PASS1_BITS+2)));
3757 z3 = (INT32) wsptr[4];
3759 tmp0 = (z2 + z3) << CONST_BITS;
3760 tmp1 = (z2 - z3) << CONST_BITS;
3762 z2 = (INT32) wsptr[2];
3763 z3 = (INT32) wsptr[6];
3765 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3766 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3767 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3769 tmp10 = tmp0 + tmp2;
3770 tmp13 = tmp0 - tmp2;
3771 tmp11 = tmp1 + tmp3;
3772 tmp12 = tmp1 - tmp3;
3774 /* Odd part per figure 8; the matrix is unitary and hence its
3775 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3778 tmp0 = (INT32) wsptr[7];
3779 tmp1 = (INT32) wsptr[5];
3780 tmp2 = (INT32) wsptr[3];
3781 tmp3 = (INT32) wsptr[1];
3783 z2 = tmp0 + tmp2;
3784 z3 = tmp1 + tmp3;
3786 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3787 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3788 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3789 z2 += z1;
3790 z3 += z1;
3792 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3793 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3794 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3795 tmp0 += z1 + z2;
3796 tmp3 += z1 + z3;
3798 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3799 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3800 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3801 tmp1 += z1 + z3;
3802 tmp2 += z1 + z2;
3804 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3806 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3807 CONST_BITS+PASS1_BITS+3)
3808 & RANGE_MASK];
3809 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3810 CONST_BITS+PASS1_BITS+3)
3811 & RANGE_MASK];
3812 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3813 CONST_BITS+PASS1_BITS+3)
3814 & RANGE_MASK];
3815 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3816 CONST_BITS+PASS1_BITS+3)
3817 & RANGE_MASK];
3818 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3819 CONST_BITS+PASS1_BITS+3)
3820 & RANGE_MASK];
3821 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3822 CONST_BITS+PASS1_BITS+3)
3823 & RANGE_MASK];
3824 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3825 CONST_BITS+PASS1_BITS+3)
3826 & RANGE_MASK];
3827 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3828 CONST_BITS+PASS1_BITS+3)
3829 & RANGE_MASK];
3831 wsptr += DCTSIZE; /* advance pointer to next row */
3837 * Perform dequantization and inverse DCT on one block of coefficients,
3838 * producing a 6x3 output block.
3840 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3843 GLOBAL(void)
3844 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3845 JCOEFPTR coef_block,
3846 JSAMPARRAY output_buf, JDIMENSION output_col)
3848 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3849 INT32 z1, z2, z3;
3850 JCOEFPTR inptr;
3851 ISLOW_MULT_TYPE * quantptr;
3852 int * wsptr;
3853 JSAMPROW outptr;
3854 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3855 int ctr;
3856 int workspace[6*3]; /* buffers data between passes */
3857 SHIFT_TEMPS
3859 /* Pass 1: process columns from input, store into work array.
3860 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3863 inptr = coef_block;
3864 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3865 wsptr = workspace;
3866 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3867 /* Even part */
3869 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3870 tmp0 <<= CONST_BITS;
3871 /* Add fudge factor here for final descale. */
3872 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3873 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3874 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3875 tmp10 = tmp0 + tmp12;
3876 tmp2 = tmp0 - tmp12 - tmp12;
3878 /* Odd part */
3880 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3881 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3883 /* Final output stage */
3885 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3886 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3887 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3890 /* Pass 2: process 3 rows from work array, store into output array.
3891 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3894 wsptr = workspace;
3895 for (ctr = 0; ctr < 3; ctr++) {
3896 outptr = output_buf[ctr] + output_col;
3898 /* Even part */
3900 /* Add range center and fudge factor for final descale and range-limit. */
3901 tmp0 = (INT32) wsptr[0] +
3902 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3903 (ONE << (PASS1_BITS+2)));
3904 tmp0 <<= CONST_BITS;
3905 tmp2 = (INT32) wsptr[4];
3906 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3907 tmp1 = tmp0 + tmp10;
3908 tmp11 = tmp0 - tmp10 - tmp10;
3909 tmp10 = (INT32) wsptr[2];
3910 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3911 tmp10 = tmp1 + tmp0;
3912 tmp12 = tmp1 - tmp0;
3914 /* Odd part */
3916 z1 = (INT32) wsptr[1];
3917 z2 = (INT32) wsptr[3];
3918 z3 = (INT32) wsptr[5];
3919 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3920 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3921 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3922 tmp1 = (z1 - z2 - z3) << CONST_BITS;
3924 /* Final output stage */
3926 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3927 CONST_BITS+PASS1_BITS+3)
3928 & RANGE_MASK];
3929 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
3930 CONST_BITS+PASS1_BITS+3)
3931 & RANGE_MASK];
3932 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
3933 CONST_BITS+PASS1_BITS+3)
3934 & RANGE_MASK];
3935 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
3936 CONST_BITS+PASS1_BITS+3)
3937 & RANGE_MASK];
3938 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
3939 CONST_BITS+PASS1_BITS+3)
3940 & RANGE_MASK];
3941 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
3942 CONST_BITS+PASS1_BITS+3)
3943 & RANGE_MASK];
3945 wsptr += 6; /* advance pointer to next row */
3951 * Perform dequantization and inverse DCT on one block of coefficients,
3952 * producing a 4x2 output block.
3954 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
3957 GLOBAL(void)
3958 jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3959 JCOEFPTR coef_block,
3960 JSAMPARRAY output_buf, JDIMENSION output_col)
3962 INT32 tmp0, tmp2, tmp10, tmp12;
3963 INT32 z1, z2, z3;
3964 JCOEFPTR inptr;
3965 ISLOW_MULT_TYPE * quantptr;
3966 INT32 * wsptr;
3967 JSAMPROW outptr;
3968 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3969 int ctr;
3970 INT32 workspace[4*2]; /* buffers data between passes */
3971 SHIFT_TEMPS
3973 /* Pass 1: process columns from input, store into work array. */
3975 inptr = coef_block;
3976 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3977 wsptr = workspace;
3978 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3979 /* Even part */
3981 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3983 /* Odd part */
3985 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3987 /* Final output stage */
3989 wsptr[4*0] = tmp10 + tmp0;
3990 wsptr[4*1] = tmp10 - tmp0;
3993 /* Pass 2: process 2 rows from work array, store into output array.
3994 * 4-point IDCT kernel,
3995 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3998 wsptr = workspace;
3999 for (ctr = 0; ctr < 2; ctr++) {
4000 outptr = output_buf[ctr] + output_col;
4002 /* Even part */
4004 /* Add range center and fudge factor for final descale and range-limit. */
4005 tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
4006 tmp2 = wsptr[2];
4008 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4009 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4011 /* Odd part */
4012 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4014 z2 = wsptr[1];
4015 z3 = wsptr[3];
4017 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4018 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4019 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4021 /* Final output stage */
4023 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4024 CONST_BITS+3)
4025 & RANGE_MASK];
4026 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4027 CONST_BITS+3)
4028 & RANGE_MASK];
4029 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4030 CONST_BITS+3)
4031 & RANGE_MASK];
4032 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4033 CONST_BITS+3)
4034 & RANGE_MASK];
4036 wsptr += 4; /* advance pointer to next row */
4042 * Perform dequantization and inverse DCT on one block of coefficients,
4043 * producing a 2x1 output block.
4045 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4048 GLOBAL(void)
4049 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4050 JCOEFPTR coef_block,
4051 JSAMPARRAY output_buf, JDIMENSION output_col)
4053 DCTELEM tmp0, tmp1;
4054 ISLOW_MULT_TYPE * quantptr;
4055 JSAMPROW outptr;
4056 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4057 ISHIFT_TEMPS
4059 /* Pass 1: empty. */
4061 /* Pass 2: process 1 row from input, store into output array. */
4063 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4064 outptr = output_buf[0] + output_col;
4066 /* Even part */
4068 tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4069 /* Add range center and fudge factor for final descale and range-limit. */
4070 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
4072 /* Odd part */
4074 tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4076 /* Final output stage */
4078 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
4079 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
4084 * Perform dequantization and inverse DCT on one block of coefficients,
4085 * producing an 8x16 output block.
4087 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4090 GLOBAL(void)
4091 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4092 JCOEFPTR coef_block,
4093 JSAMPARRAY output_buf, JDIMENSION output_col)
4095 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4096 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4097 INT32 z1, z2, z3, z4;
4098 JCOEFPTR inptr;
4099 ISLOW_MULT_TYPE * quantptr;
4100 int * wsptr;
4101 JSAMPROW outptr;
4102 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4103 int ctr;
4104 int workspace[8*16]; /* buffers data between passes */
4105 SHIFT_TEMPS
4107 /* Pass 1: process columns from input, store into work array.
4108 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4111 inptr = coef_block;
4112 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4113 wsptr = workspace;
4114 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4115 /* Even part */
4117 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4118 tmp0 <<= CONST_BITS;
4119 /* Add fudge factor here for final descale. */
4120 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4122 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4123 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4124 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4126 tmp10 = tmp0 + tmp1;
4127 tmp11 = tmp0 - tmp1;
4128 tmp12 = tmp0 + tmp2;
4129 tmp13 = tmp0 - tmp2;
4131 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4132 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4133 z3 = z1 - z2;
4134 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4135 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4137 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4138 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4139 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4140 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4142 tmp20 = tmp10 + tmp0;
4143 tmp27 = tmp10 - tmp0;
4144 tmp21 = tmp12 + tmp1;
4145 tmp26 = tmp12 - tmp1;
4146 tmp22 = tmp13 + tmp2;
4147 tmp25 = tmp13 - tmp2;
4148 tmp23 = tmp11 + tmp3;
4149 tmp24 = tmp11 - tmp3;
4151 /* Odd part */
4153 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4154 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4155 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4156 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4158 tmp11 = z1 + z3;
4160 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4161 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4162 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4163 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4164 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4165 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4166 tmp0 = tmp1 + tmp2 + tmp3 -
4167 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4168 tmp13 = tmp10 + tmp11 + tmp12 -
4169 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4170 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4171 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4172 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4173 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4174 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4175 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4176 z2 += z4;
4177 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4178 tmp1 += z1;
4179 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4180 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4181 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4182 tmp12 += z2;
4183 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4184 tmp2 += z2;
4185 tmp3 += z2;
4186 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4187 tmp10 += z2;
4188 tmp11 += z2;
4190 /* Final output stage */
4192 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4193 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4194 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4195 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4196 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4197 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4198 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4199 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4200 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4201 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4202 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4203 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4204 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4205 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4206 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4207 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4210 /* Pass 2: process rows from work array, store into output array.
4211 * Note that we must descale the results by a factor of 8 == 2**3,
4212 * and also undo the PASS1_BITS scaling.
4213 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4216 wsptr = workspace;
4217 for (ctr = 0; ctr < 16; ctr++) {
4218 outptr = output_buf[ctr] + output_col;
4220 /* Even part: reverse the even part of the forward DCT.
4221 * The rotator is c(-6).
4224 /* Add range center and fudge factor for final descale and range-limit. */
4225 z2 = (INT32) wsptr[0] +
4226 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4227 (ONE << (PASS1_BITS+2)));
4228 z3 = (INT32) wsptr[4];
4230 tmp0 = (z2 + z3) << CONST_BITS;
4231 tmp1 = (z2 - z3) << CONST_BITS;
4233 z2 = (INT32) wsptr[2];
4234 z3 = (INT32) wsptr[6];
4236 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4237 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4238 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4240 tmp10 = tmp0 + tmp2;
4241 tmp13 = tmp0 - tmp2;
4242 tmp11 = tmp1 + tmp3;
4243 tmp12 = tmp1 - tmp3;
4245 /* Odd part per figure 8; the matrix is unitary and hence its
4246 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4249 tmp0 = (INT32) wsptr[7];
4250 tmp1 = (INT32) wsptr[5];
4251 tmp2 = (INT32) wsptr[3];
4252 tmp3 = (INT32) wsptr[1];
4254 z2 = tmp0 + tmp2;
4255 z3 = tmp1 + tmp3;
4257 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4258 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4259 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4260 z2 += z1;
4261 z3 += z1;
4263 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4264 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4265 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4266 tmp0 += z1 + z2;
4267 tmp3 += z1 + z3;
4269 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4270 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4271 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4272 tmp1 += z1 + z3;
4273 tmp2 += z1 + z2;
4275 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4277 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4278 CONST_BITS+PASS1_BITS+3)
4279 & RANGE_MASK];
4280 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4281 CONST_BITS+PASS1_BITS+3)
4282 & RANGE_MASK];
4283 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4284 CONST_BITS+PASS1_BITS+3)
4285 & RANGE_MASK];
4286 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4287 CONST_BITS+PASS1_BITS+3)
4288 & RANGE_MASK];
4289 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4290 CONST_BITS+PASS1_BITS+3)
4291 & RANGE_MASK];
4292 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4293 CONST_BITS+PASS1_BITS+3)
4294 & RANGE_MASK];
4295 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4296 CONST_BITS+PASS1_BITS+3)
4297 & RANGE_MASK];
4298 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4299 CONST_BITS+PASS1_BITS+3)
4300 & RANGE_MASK];
4302 wsptr += DCTSIZE; /* advance pointer to next row */
4308 * Perform dequantization and inverse DCT on one block of coefficients,
4309 * producing a 7x14 output block.
4311 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4314 GLOBAL(void)
4315 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4316 JCOEFPTR coef_block,
4317 JSAMPARRAY output_buf, JDIMENSION output_col)
4319 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4320 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4321 INT32 z1, z2, z3, z4;
4322 JCOEFPTR inptr;
4323 ISLOW_MULT_TYPE * quantptr;
4324 int * wsptr;
4325 JSAMPROW outptr;
4326 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4327 int ctr;
4328 int workspace[7*14]; /* buffers data between passes */
4329 SHIFT_TEMPS
4331 /* Pass 1: process columns from input, store into work array.
4332 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4335 inptr = coef_block;
4336 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4337 wsptr = workspace;
4338 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4339 /* Even part */
4341 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4342 z1 <<= CONST_BITS;
4343 /* Add fudge factor here for final descale. */
4344 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4345 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4346 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4347 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4348 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4350 tmp10 = z1 + z2;
4351 tmp11 = z1 + z3;
4352 tmp12 = z1 - z4;
4354 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4355 CONST_BITS-PASS1_BITS);
4357 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4358 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4360 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4362 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4363 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4364 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4365 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4367 tmp20 = tmp10 + tmp13;
4368 tmp26 = tmp10 - tmp13;
4369 tmp21 = tmp11 + tmp14;
4370 tmp25 = tmp11 - tmp14;
4371 tmp22 = tmp12 + tmp15;
4372 tmp24 = tmp12 - tmp15;
4374 /* Odd part */
4376 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4377 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4378 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4379 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4380 tmp13 = z4 << CONST_BITS;
4382 tmp14 = z1 + z3;
4383 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4384 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4385 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4386 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4387 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4388 z1 -= z2;
4389 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4390 tmp16 += tmp15;
4391 z1 += z4;
4392 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4393 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4394 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4395 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4396 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4397 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4399 tmp13 = (z1 - z3) << PASS1_BITS;
4401 /* Final output stage */
4403 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4404 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4405 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4406 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4407 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4408 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4409 wsptr[7*3] = (int) (tmp23 + tmp13);
4410 wsptr[7*10] = (int) (tmp23 - tmp13);
4411 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4412 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4413 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4414 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4415 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4416 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4419 /* Pass 2: process 14 rows from work array, store into output array.
4420 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4423 wsptr = workspace;
4424 for (ctr = 0; ctr < 14; ctr++) {
4425 outptr = output_buf[ctr] + output_col;
4427 /* Even part */
4429 /* Add range center and fudge factor for final descale and range-limit. */
4430 tmp23 = (INT32) wsptr[0] +
4431 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4432 (ONE << (PASS1_BITS+2)));
4433 tmp23 <<= CONST_BITS;
4435 z1 = (INT32) wsptr[2];
4436 z2 = (INT32) wsptr[4];
4437 z3 = (INT32) wsptr[6];
4439 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4440 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4441 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4442 tmp10 = z1 + z3;
4443 z2 -= tmp10;
4444 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4445 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4446 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4447 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4449 /* Odd part */
4451 z1 = (INT32) wsptr[1];
4452 z2 = (INT32) wsptr[3];
4453 z3 = (INT32) wsptr[5];
4455 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4456 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4457 tmp10 = tmp11 - tmp12;
4458 tmp11 += tmp12;
4459 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4460 tmp11 += tmp12;
4461 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4462 tmp10 += z2;
4463 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4465 /* Final output stage */
4467 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4468 CONST_BITS+PASS1_BITS+3)
4469 & RANGE_MASK];
4470 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4471 CONST_BITS+PASS1_BITS+3)
4472 & RANGE_MASK];
4473 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4474 CONST_BITS+PASS1_BITS+3)
4475 & RANGE_MASK];
4476 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4477 CONST_BITS+PASS1_BITS+3)
4478 & RANGE_MASK];
4479 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4480 CONST_BITS+PASS1_BITS+3)
4481 & RANGE_MASK];
4482 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4483 CONST_BITS+PASS1_BITS+3)
4484 & RANGE_MASK];
4485 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4486 CONST_BITS+PASS1_BITS+3)
4487 & RANGE_MASK];
4489 wsptr += 7; /* advance pointer to next row */
4495 * Perform dequantization and inverse DCT on one block of coefficients,
4496 * producing a 6x12 output block.
4498 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4501 GLOBAL(void)
4502 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4503 JCOEFPTR coef_block,
4504 JSAMPARRAY output_buf, JDIMENSION output_col)
4506 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4507 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4508 INT32 z1, z2, z3, z4;
4509 JCOEFPTR inptr;
4510 ISLOW_MULT_TYPE * quantptr;
4511 int * wsptr;
4512 JSAMPROW outptr;
4513 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4514 int ctr;
4515 int workspace[6*12]; /* buffers data between passes */
4516 SHIFT_TEMPS
4518 /* Pass 1: process columns from input, store into work array.
4519 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4522 inptr = coef_block;
4523 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4524 wsptr = workspace;
4525 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4526 /* Even part */
4528 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4529 z3 <<= CONST_BITS;
4530 /* Add fudge factor here for final descale. */
4531 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4533 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4534 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4536 tmp10 = z3 + z4;
4537 tmp11 = z3 - z4;
4539 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4540 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4541 z1 <<= CONST_BITS;
4542 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4543 z2 <<= CONST_BITS;
4545 tmp12 = z1 - z2;
4547 tmp21 = z3 + tmp12;
4548 tmp24 = z3 - tmp12;
4550 tmp12 = z4 + z2;
4552 tmp20 = tmp10 + tmp12;
4553 tmp25 = tmp10 - tmp12;
4555 tmp12 = z4 - z1 - z2;
4557 tmp22 = tmp11 + tmp12;
4558 tmp23 = tmp11 - tmp12;
4560 /* Odd part */
4562 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4563 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4564 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4565 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4567 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4568 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4570 tmp10 = z1 + z3;
4571 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4572 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4573 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4574 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4575 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4576 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4577 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4578 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4580 z1 -= z4;
4581 z2 -= z3;
4582 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4583 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4584 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4586 /* Final output stage */
4588 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4589 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4590 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4591 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4592 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4593 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4594 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4595 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4596 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4597 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4598 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4599 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4602 /* Pass 2: process 12 rows from work array, store into output array.
4603 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4606 wsptr = workspace;
4607 for (ctr = 0; ctr < 12; ctr++) {
4608 outptr = output_buf[ctr] + output_col;
4610 /* Even part */
4612 /* Add range center and fudge factor for final descale and range-limit. */
4613 tmp10 = (INT32) wsptr[0] +
4614 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4615 (ONE << (PASS1_BITS+2)));
4616 tmp10 <<= CONST_BITS;
4617 tmp12 = (INT32) wsptr[4];
4618 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4619 tmp11 = tmp10 + tmp20;
4620 tmp21 = tmp10 - tmp20 - tmp20;
4621 tmp20 = (INT32) wsptr[2];
4622 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4623 tmp20 = tmp11 + tmp10;
4624 tmp22 = tmp11 - tmp10;
4626 /* Odd part */
4628 z1 = (INT32) wsptr[1];
4629 z2 = (INT32) wsptr[3];
4630 z3 = (INT32) wsptr[5];
4631 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4632 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4633 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4634 tmp11 = (z1 - z2 - z3) << CONST_BITS;
4636 /* Final output stage */
4638 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4639 CONST_BITS+PASS1_BITS+3)
4640 & RANGE_MASK];
4641 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4642 CONST_BITS+PASS1_BITS+3)
4643 & RANGE_MASK];
4644 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4645 CONST_BITS+PASS1_BITS+3)
4646 & RANGE_MASK];
4647 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4648 CONST_BITS+PASS1_BITS+3)
4649 & RANGE_MASK];
4650 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4651 CONST_BITS+PASS1_BITS+3)
4652 & RANGE_MASK];
4653 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4654 CONST_BITS+PASS1_BITS+3)
4655 & RANGE_MASK];
4657 wsptr += 6; /* advance pointer to next row */
4663 * Perform dequantization and inverse DCT on one block of coefficients,
4664 * producing a 5x10 output block.
4666 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4669 GLOBAL(void)
4670 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4671 JCOEFPTR coef_block,
4672 JSAMPARRAY output_buf, JDIMENSION output_col)
4674 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4675 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4676 INT32 z1, z2, z3, z4, z5;
4677 JCOEFPTR inptr;
4678 ISLOW_MULT_TYPE * quantptr;
4679 int * wsptr;
4680 JSAMPROW outptr;
4681 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4682 int ctr;
4683 int workspace[5*10]; /* buffers data between passes */
4684 SHIFT_TEMPS
4686 /* Pass 1: process columns from input, store into work array.
4687 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4690 inptr = coef_block;
4691 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4692 wsptr = workspace;
4693 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4694 /* Even part */
4696 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4697 z3 <<= CONST_BITS;
4698 /* Add fudge factor here for final descale. */
4699 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4700 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4701 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4702 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4703 tmp10 = z3 + z1;
4704 tmp11 = z3 - z2;
4706 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4707 CONST_BITS-PASS1_BITS);
4709 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4710 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4712 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4713 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4714 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4716 tmp20 = tmp10 + tmp12;
4717 tmp24 = tmp10 - tmp12;
4718 tmp21 = tmp11 + tmp13;
4719 tmp23 = tmp11 - tmp13;
4721 /* Odd part */
4723 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4724 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4725 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4726 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4728 tmp11 = z2 + z4;
4729 tmp13 = z2 - z4;
4731 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4732 z5 = z3 << CONST_BITS;
4734 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4735 z4 = z5 + tmp12;
4737 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4738 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4740 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4741 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4743 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4745 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4746 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4748 /* Final output stage */
4750 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4751 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4752 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4753 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4754 wsptr[5*2] = (int) (tmp22 + tmp12);
4755 wsptr[5*7] = (int) (tmp22 - tmp12);
4756 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4757 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4758 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4759 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4762 /* Pass 2: process 10 rows from work array, store into output array.
4763 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4766 wsptr = workspace;
4767 for (ctr = 0; ctr < 10; ctr++) {
4768 outptr = output_buf[ctr] + output_col;
4770 /* Even part */
4772 /* Add range center and fudge factor for final descale and range-limit. */
4773 tmp12 = (INT32) wsptr[0] +
4774 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4775 (ONE << (PASS1_BITS+2)));
4776 tmp12 <<= CONST_BITS;
4777 tmp13 = (INT32) wsptr[2];
4778 tmp14 = (INT32) wsptr[4];
4779 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4780 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4781 z3 = tmp12 + z2;
4782 tmp10 = z3 + z1;
4783 tmp11 = z3 - z1;
4784 tmp12 -= z2 << 2;
4786 /* Odd part */
4788 z2 = (INT32) wsptr[1];
4789 z3 = (INT32) wsptr[3];
4791 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4792 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4793 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4795 /* Final output stage */
4797 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
4798 CONST_BITS+PASS1_BITS+3)
4799 & RANGE_MASK];
4800 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
4801 CONST_BITS+PASS1_BITS+3)
4802 & RANGE_MASK];
4803 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
4804 CONST_BITS+PASS1_BITS+3)
4805 & RANGE_MASK];
4806 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
4807 CONST_BITS+PASS1_BITS+3)
4808 & RANGE_MASK];
4809 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
4810 CONST_BITS+PASS1_BITS+3)
4811 & RANGE_MASK];
4813 wsptr += 5; /* advance pointer to next row */
4819 * Perform dequantization and inverse DCT on one block of coefficients,
4820 * producing a 4x8 output block.
4822 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4825 GLOBAL(void)
4826 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4827 JCOEFPTR coef_block,
4828 JSAMPARRAY output_buf, JDIMENSION output_col)
4830 INT32 tmp0, tmp1, tmp2, tmp3;
4831 INT32 tmp10, tmp11, tmp12, tmp13;
4832 INT32 z1, z2, z3;
4833 JCOEFPTR inptr;
4834 ISLOW_MULT_TYPE * quantptr;
4835 int * wsptr;
4836 JSAMPROW outptr;
4837 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4838 int ctr;
4839 int workspace[4*8]; /* buffers data between passes */
4840 SHIFT_TEMPS
4842 /* Pass 1: process columns from input, store into work array.
4843 * Note results are scaled up by sqrt(8) compared to a true IDCT;
4844 * furthermore, we scale the results by 2**PASS1_BITS.
4845 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4848 inptr = coef_block;
4849 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4850 wsptr = workspace;
4851 for (ctr = 4; ctr > 0; ctr--) {
4852 /* Due to quantization, we will usually find that many of the input
4853 * coefficients are zero, especially the AC terms. We can exploit this
4854 * by short-circuiting the IDCT calculation for any column in which all
4855 * the AC terms are zero. In that case each output is equal to the
4856 * DC coefficient (with scale factor as needed).
4857 * With typical images and quantization tables, half or more of the
4858 * column DCT calculations can be simplified this way.
4861 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4862 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4863 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4864 inptr[DCTSIZE*7] == 0) {
4865 /* AC terms all zero */
4866 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
4868 wsptr[4*0] = dcval;
4869 wsptr[4*1] = dcval;
4870 wsptr[4*2] = dcval;
4871 wsptr[4*3] = dcval;
4872 wsptr[4*4] = dcval;
4873 wsptr[4*5] = dcval;
4874 wsptr[4*6] = dcval;
4875 wsptr[4*7] = dcval;
4877 inptr++; /* advance pointers to next column */
4878 quantptr++;
4879 wsptr++;
4880 continue;
4883 /* Even part: reverse the even part of the forward DCT.
4884 * The rotator is c(-6).
4887 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4888 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4889 z2 <<= CONST_BITS;
4890 z3 <<= CONST_BITS;
4891 /* Add fudge factor here for final descale. */
4892 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4894 tmp0 = z2 + z3;
4895 tmp1 = z2 - z3;
4897 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4898 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4900 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4901 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4902 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4904 tmp10 = tmp0 + tmp2;
4905 tmp13 = tmp0 - tmp2;
4906 tmp11 = tmp1 + tmp3;
4907 tmp12 = tmp1 - tmp3;
4909 /* Odd part per figure 8; the matrix is unitary and hence its
4910 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4913 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4914 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4915 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4916 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4918 z2 = tmp0 + tmp2;
4919 z3 = tmp1 + tmp3;
4921 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4922 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4923 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4924 z2 += z1;
4925 z3 += z1;
4927 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4928 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4929 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4930 tmp0 += z1 + z2;
4931 tmp3 += z1 + z3;
4933 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4934 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4935 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4936 tmp1 += z1 + z3;
4937 tmp2 += z1 + z2;
4939 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4941 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4942 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4943 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4944 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4945 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4946 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4947 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4948 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4950 inptr++; /* advance pointers to next column */
4951 quantptr++;
4952 wsptr++;
4955 /* Pass 2: process 8 rows from work array, store into output array.
4956 * 4-point IDCT kernel,
4957 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4960 wsptr = workspace;
4961 for (ctr = 0; ctr < 8; ctr++) {
4962 outptr = output_buf[ctr] + output_col;
4964 /* Even part */
4966 /* Add range center and fudge factor for final descale and range-limit. */
4967 tmp0 = (INT32) wsptr[0] +
4968 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4969 (ONE << (PASS1_BITS+2)));
4970 tmp2 = (INT32) wsptr[2];
4972 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4973 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4975 /* Odd part */
4976 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4978 z2 = (INT32) wsptr[1];
4979 z3 = (INT32) wsptr[3];
4981 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4982 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4983 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4985 /* Final output stage */
4987 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4988 CONST_BITS+PASS1_BITS+3)
4989 & RANGE_MASK];
4990 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4991 CONST_BITS+PASS1_BITS+3)
4992 & RANGE_MASK];
4993 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4994 CONST_BITS+PASS1_BITS+3)
4995 & RANGE_MASK];
4996 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4997 CONST_BITS+PASS1_BITS+3)
4998 & RANGE_MASK];
5000 wsptr += 4; /* advance pointer to next row */
5006 * Perform dequantization and inverse DCT on one block of coefficients,
5007 * producing a 3x6 output block.
5009 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5012 GLOBAL(void)
5013 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5014 JCOEFPTR coef_block,
5015 JSAMPARRAY output_buf, JDIMENSION output_col)
5017 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5018 INT32 z1, z2, z3;
5019 JCOEFPTR inptr;
5020 ISLOW_MULT_TYPE * quantptr;
5021 int * wsptr;
5022 JSAMPROW outptr;
5023 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5024 int ctr;
5025 int workspace[3*6]; /* buffers data between passes */
5026 SHIFT_TEMPS
5028 /* Pass 1: process columns from input, store into work array.
5029 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5032 inptr = coef_block;
5033 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5034 wsptr = workspace;
5035 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5036 /* Even part */
5038 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5039 tmp0 <<= CONST_BITS;
5040 /* Add fudge factor here for final descale. */
5041 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5042 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5043 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5044 tmp1 = tmp0 + tmp10;
5045 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5046 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5047 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5048 tmp10 = tmp1 + tmp0;
5049 tmp12 = tmp1 - tmp0;
5051 /* Odd part */
5053 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5054 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5055 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5056 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5057 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5058 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5059 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5061 /* Final output stage */
5063 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5064 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5065 wsptr[3*1] = (int) (tmp11 + tmp1);
5066 wsptr[3*4] = (int) (tmp11 - tmp1);
5067 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5068 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5071 /* Pass 2: process 6 rows from work array, store into output array.
5072 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5075 wsptr = workspace;
5076 for (ctr = 0; ctr < 6; ctr++) {
5077 outptr = output_buf[ctr] + output_col;
5079 /* Even part */
5081 /* Add range center and fudge factor for final descale and range-limit. */
5082 tmp0 = (INT32) wsptr[0] +
5083 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5084 (ONE << (PASS1_BITS+2)));
5085 tmp0 <<= CONST_BITS;
5086 tmp2 = (INT32) wsptr[2];
5087 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5088 tmp10 = tmp0 + tmp12;
5089 tmp2 = tmp0 - tmp12 - tmp12;
5091 /* Odd part */
5093 tmp12 = (INT32) wsptr[1];
5094 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5096 /* Final output stage */
5098 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5099 CONST_BITS+PASS1_BITS+3)
5100 & RANGE_MASK];
5101 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5102 CONST_BITS+PASS1_BITS+3)
5103 & RANGE_MASK];
5104 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5105 CONST_BITS+PASS1_BITS+3)
5106 & RANGE_MASK];
5108 wsptr += 3; /* advance pointer to next row */
5114 * Perform dequantization and inverse DCT on one block of coefficients,
5115 * producing a 2x4 output block.
5117 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5120 GLOBAL(void)
5121 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5122 JCOEFPTR coef_block,
5123 JSAMPARRAY output_buf, JDIMENSION output_col)
5125 INT32 tmp0, tmp2, tmp10, tmp12;
5126 INT32 z1, z2, z3;
5127 JCOEFPTR inptr;
5128 ISLOW_MULT_TYPE * quantptr;
5129 INT32 * wsptr;
5130 JSAMPROW outptr;
5131 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5132 int ctr;
5133 INT32 workspace[2*4]; /* buffers data between passes */
5134 SHIFT_TEMPS
5136 /* Pass 1: process columns from input, store into work array.
5137 * 4-point IDCT kernel,
5138 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5141 inptr = coef_block;
5142 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5143 wsptr = workspace;
5144 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5145 /* Even part */
5147 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5148 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5150 tmp10 = (tmp0 + tmp2) << CONST_BITS;
5151 tmp12 = (tmp0 - tmp2) << CONST_BITS;
5153 /* Odd part */
5154 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5156 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5157 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5159 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5160 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5161 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5163 /* Final output stage */
5165 wsptr[2*0] = tmp10 + tmp0;
5166 wsptr[2*3] = tmp10 - tmp0;
5167 wsptr[2*1] = tmp12 + tmp2;
5168 wsptr[2*2] = tmp12 - tmp2;
5171 /* Pass 2: process 4 rows from work array, store into output array. */
5173 wsptr = workspace;
5174 for (ctr = 0; ctr < 4; ctr++) {
5175 outptr = output_buf[ctr] + output_col;
5177 /* Even part */
5179 /* Add range center and fudge factor for final descale and range-limit. */
5180 tmp10 = wsptr[0] +
5181 ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
5182 (ONE << (CONST_BITS+2)));
5184 /* Odd part */
5186 tmp0 = wsptr[1];
5188 /* Final output stage */
5190 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5191 & RANGE_MASK];
5192 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5193 & RANGE_MASK];
5195 wsptr += 2; /* advance pointer to next row */
5201 * Perform dequantization and inverse DCT on one block of coefficients,
5202 * producing a 1x2 output block.
5204 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5207 GLOBAL(void)
5208 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5209 JCOEFPTR coef_block,
5210 JSAMPARRAY output_buf, JDIMENSION output_col)
5212 DCTELEM tmp0, tmp1;
5213 ISLOW_MULT_TYPE * quantptr;
5214 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5215 ISHIFT_TEMPS
5217 /* Process 1 column from input, store into output array. */
5219 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5221 /* Even part */
5223 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5224 /* Add range center and fudge factor for final descale and range-limit. */
5225 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
5227 /* Odd part */
5229 tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5231 /* Final output stage */
5233 output_buf[0][output_col] =
5234 range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
5235 output_buf[1][output_col] =
5236 range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
5239 #endif /* IDCT_SCALING_SUPPORTED */
5240 #endif /* DCT_ISLOW_SUPPORTED */