4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * Modified 2003-2020 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
9 * This file contains the forward-DCT management logic.
10 * This code selects a particular DCT implementation to be used,
11 * and it performs related housekeeping chores including coefficient
15 #define JPEG_INTERNALS
18 #include "jdct.h" /* Private declarations for DCT subsystem */
21 /* Private subobject for this module */
24 struct jpeg_forward_dct pub
; /* public fields */
26 /* Pointer to the DCT routine actually in use */
27 forward_DCT_method_ptr do_dct
[MAX_COMPONENTS
];
29 #ifdef DCT_FLOAT_SUPPORTED
30 /* Same as above for the floating-point case. */
31 float_DCT_method_ptr do_float_dct
[MAX_COMPONENTS
];
35 typedef my_fdct_controller
* my_fdct_ptr
;
38 /* The allocated post-DCT divisor tables -- big enough for any
39 * supported variant and not identical to the quant table entries,
40 * because of scaling (especially for an unnormalized DCT) --
41 * are pointed to by dct_table in the per-component comp_info
42 * structures. Each table is given in normal array order.
46 DCTELEM int_array
[DCTSIZE2
];
47 #ifdef DCT_FLOAT_SUPPORTED
48 FAST_FLOAT float_array
[DCTSIZE2
];
53 /* The current scaled-DCT routines require ISLOW-style divisor tables,
54 * so be sure to compile that code if either ISLOW or SCALING is requested.
56 #ifdef DCT_ISLOW_SUPPORTED
57 #define PROVIDE_ISLOW_TABLES
59 #ifdef DCT_SCALING_SUPPORTED
60 #define PROVIDE_ISLOW_TABLES
66 * Perform forward DCT on one or more blocks of a component.
68 * The input samples are taken from the sample_data[] array starting at
69 * position start_col, and moving to the right for any additional blocks.
70 * The quantized coefficients are returned in coef_blocks[].
74 forward_DCT (j_compress_ptr cinfo
, jpeg_component_info
* compptr
,
75 JSAMPARRAY sample_data
, JBLOCKROW coef_blocks
,
76 JDIMENSION start_col
, JDIMENSION num_blocks
)
77 /* This version is used for integer DCT implementations. */
79 /* This routine is heavily used, so it's worth coding it tightly. */
80 my_fdct_ptr fdct
= (my_fdct_ptr
) cinfo
->fdct
;
81 forward_DCT_method_ptr do_dct
= fdct
->do_dct
[compptr
->component_index
];
82 DCTELEM
* divisors
= (DCTELEM
*) compptr
->dct_table
;
83 DCTELEM workspace
[DCTSIZE2
]; /* work area for FDCT subroutine */
86 for (bi
= 0; bi
< num_blocks
; bi
++, start_col
+= compptr
->DCT_h_scaled_size
) {
88 (*do_dct
) (workspace
, sample_data
, start_col
);
90 /* Quantize/descale the coefficients, and store into coef_blocks[] */
91 { register DCTELEM temp
, qval
;
93 register JCOEFPTR output_ptr
= coef_blocks
[bi
];
95 for (i
= 0; i
< DCTSIZE2
; i
++) {
98 /* Divide the coefficient value by qval, ensuring proper rounding.
99 * Since C does not specify the direction of rounding for negative
100 * quotients, we have to force the dividend positive for portability.
102 * In most files, at least half of the output values will be zero
103 * (at default quantization settings, more like three-quarters...)
104 * so we should ensure that this case is fast. On many machines,
105 * a comparison is enough cheaper than a divide to make a special test
106 * a win. Since both inputs will be nonnegative, we need only test
107 * for a < b to discover whether a/b is 0.
108 * If your machine's division is fast enough, define FAST_DIVIDE.
111 #define DIVIDE_BY(a,b) a /= b
113 #define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
117 temp
+= qval
>>1; /* for rounding */
118 DIVIDE_BY(temp
, qval
);
121 temp
+= qval
>>1; /* for rounding */
122 DIVIDE_BY(temp
, qval
);
124 output_ptr
[i
] = (JCOEF
) temp
;
131 #ifdef DCT_FLOAT_SUPPORTED
134 forward_DCT_float (j_compress_ptr cinfo
, jpeg_component_info
* compptr
,
135 JSAMPARRAY sample_data
, JBLOCKROW coef_blocks
,
136 JDIMENSION start_col
, JDIMENSION num_blocks
)
137 /* This version is used for floating-point DCT implementations. */
139 /* This routine is heavily used, so it's worth coding it tightly. */
140 my_fdct_ptr fdct
= (my_fdct_ptr
) cinfo
->fdct
;
141 float_DCT_method_ptr do_dct
= fdct
->do_float_dct
[compptr
->component_index
];
142 FAST_FLOAT
* divisors
= (FAST_FLOAT
*) compptr
->dct_table
;
143 FAST_FLOAT workspace
[DCTSIZE2
]; /* work area for FDCT subroutine */
146 for (bi
= 0; bi
< num_blocks
; bi
++, start_col
+= compptr
->DCT_h_scaled_size
) {
147 /* Perform the DCT */
148 (*do_dct
) (workspace
, sample_data
, start_col
);
150 /* Quantize/descale the coefficients, and store into coef_blocks[] */
151 { register FAST_FLOAT temp
;
153 register JCOEFPTR output_ptr
= coef_blocks
[bi
];
155 for (i
= 0; i
< DCTSIZE2
; i
++) {
156 /* Apply the quantization and scaling factor */
157 temp
= workspace
[i
] * divisors
[i
];
158 /* Round to nearest integer.
159 * Since C does not specify the direction of rounding for negative
160 * quotients, we have to force the dividend positive for portability.
161 * The maximum coefficient size is +-16K (for 12-bit data), so this
162 * code should work for either 16-bit or 32-bit ints.
164 output_ptr
[i
] = (JCOEF
) ((int) (temp
+ (FAST_FLOAT
) 16384.5) - 16384);
170 #endif /* DCT_FLOAT_SUPPORTED */
174 * Initialize for a processing pass.
175 * Verify that all referenced Q-tables are present, and set up
176 * the divisor table for each one.
177 * In the current implementation, DCT of all components is done during
178 * the first pass, even if only some components will be output in the
179 * first scan. Hence all components should be examined here.
183 start_pass_fdctmgr (j_compress_ptr cinfo
)
185 my_fdct_ptr fdct
= (my_fdct_ptr
) cinfo
->fdct
;
187 jpeg_component_info
*compptr
;
192 for (ci
= 0, compptr
= cinfo
->comp_info
; ci
< cinfo
->num_components
;
194 /* Select the proper DCT routine for this component's scaling */
195 switch ((compptr
->DCT_h_scaled_size
<< 8) + compptr
->DCT_v_scaled_size
) {
196 #ifdef DCT_SCALING_SUPPORTED
198 fdct
->do_dct
[ci
] = jpeg_fdct_1x1
;
199 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
202 fdct
->do_dct
[ci
] = jpeg_fdct_2x2
;
203 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
206 fdct
->do_dct
[ci
] = jpeg_fdct_3x3
;
207 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
210 fdct
->do_dct
[ci
] = jpeg_fdct_4x4
;
211 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
214 fdct
->do_dct
[ci
] = jpeg_fdct_5x5
;
215 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
218 fdct
->do_dct
[ci
] = jpeg_fdct_6x6
;
219 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
222 fdct
->do_dct
[ci
] = jpeg_fdct_7x7
;
223 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
226 fdct
->do_dct
[ci
] = jpeg_fdct_9x9
;
227 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
229 case ((10 << 8) + 10):
230 fdct
->do_dct
[ci
] = jpeg_fdct_10x10
;
231 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
233 case ((11 << 8) + 11):
234 fdct
->do_dct
[ci
] = jpeg_fdct_11x11
;
235 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
237 case ((12 << 8) + 12):
238 fdct
->do_dct
[ci
] = jpeg_fdct_12x12
;
239 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
241 case ((13 << 8) + 13):
242 fdct
->do_dct
[ci
] = jpeg_fdct_13x13
;
243 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
245 case ((14 << 8) + 14):
246 fdct
->do_dct
[ci
] = jpeg_fdct_14x14
;
247 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
249 case ((15 << 8) + 15):
250 fdct
->do_dct
[ci
] = jpeg_fdct_15x15
;
251 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
253 case ((16 << 8) + 16):
254 fdct
->do_dct
[ci
] = jpeg_fdct_16x16
;
255 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
257 case ((16 << 8) + 8):
258 fdct
->do_dct
[ci
] = jpeg_fdct_16x8
;
259 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
261 case ((14 << 8) + 7):
262 fdct
->do_dct
[ci
] = jpeg_fdct_14x7
;
263 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
265 case ((12 << 8) + 6):
266 fdct
->do_dct
[ci
] = jpeg_fdct_12x6
;
267 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
269 case ((10 << 8) + 5):
270 fdct
->do_dct
[ci
] = jpeg_fdct_10x5
;
271 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
274 fdct
->do_dct
[ci
] = jpeg_fdct_8x4
;
275 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
278 fdct
->do_dct
[ci
] = jpeg_fdct_6x3
;
279 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
282 fdct
->do_dct
[ci
] = jpeg_fdct_4x2
;
283 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
286 fdct
->do_dct
[ci
] = jpeg_fdct_2x1
;
287 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
289 case ((8 << 8) + 16):
290 fdct
->do_dct
[ci
] = jpeg_fdct_8x16
;
291 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
293 case ((7 << 8) + 14):
294 fdct
->do_dct
[ci
] = jpeg_fdct_7x14
;
295 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
297 case ((6 << 8) + 12):
298 fdct
->do_dct
[ci
] = jpeg_fdct_6x12
;
299 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
301 case ((5 << 8) + 10):
302 fdct
->do_dct
[ci
] = jpeg_fdct_5x10
;
303 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
306 fdct
->do_dct
[ci
] = jpeg_fdct_4x8
;
307 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
310 fdct
->do_dct
[ci
] = jpeg_fdct_3x6
;
311 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
314 fdct
->do_dct
[ci
] = jpeg_fdct_2x4
;
315 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
318 fdct
->do_dct
[ci
] = jpeg_fdct_1x2
;
319 method
= JDCT_ISLOW
; /* jfdctint uses islow-style table */
322 case ((DCTSIZE
<< 8) + DCTSIZE
):
323 switch (cinfo
->dct_method
) {
324 #ifdef DCT_ISLOW_SUPPORTED
326 fdct
->do_dct
[ci
] = jpeg_fdct_islow
;
330 #ifdef DCT_IFAST_SUPPORTED
332 fdct
->do_dct
[ci
] = jpeg_fdct_ifast
;
336 #ifdef DCT_FLOAT_SUPPORTED
338 fdct
->do_float_dct
[ci
] = jpeg_fdct_float
;
343 ERREXIT(cinfo
, JERR_NOT_COMPILED
);
347 ERREXIT2(cinfo
, JERR_BAD_DCTSIZE
,
348 compptr
->DCT_h_scaled_size
, compptr
->DCT_v_scaled_size
);
350 qtblno
= compptr
->quant_tbl_no
;
351 /* Make sure specified quantization table is present */
352 if (qtblno
< 0 || qtblno
>= NUM_QUANT_TBLS
||
353 cinfo
->quant_tbl_ptrs
[qtblno
] == NULL
)
354 ERREXIT1(cinfo
, JERR_NO_QUANT_TABLE
, qtblno
);
355 qtbl
= cinfo
->quant_tbl_ptrs
[qtblno
];
356 /* Create divisor table from quant table */
358 #ifdef PROVIDE_ISLOW_TABLES
360 /* For LL&M IDCT method, divisors are equal to raw quantization
361 * coefficients multiplied by 8 (to counteract scaling).
363 dtbl
= (DCTELEM
*) compptr
->dct_table
;
364 for (i
= 0; i
< DCTSIZE2
; i
++) {
366 ((DCTELEM
) qtbl
->quantval
[i
]) << (compptr
->component_needed
? 4 : 3);
368 fdct
->pub
.forward_DCT
[ci
] = forward_DCT
;
371 #ifdef DCT_IFAST_SUPPORTED
374 /* For AA&N IDCT method, divisors are equal to quantization
375 * coefficients scaled by scalefactor[row]*scalefactor[col], where
377 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
378 * We apply a further scale factor of 8.
380 #define CONST_BITS 14
381 static const INT16 aanscales
[DCTSIZE2
] = {
382 /* precomputed values scaled up by 14 bits */
383 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
384 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
385 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
386 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
387 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
388 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
389 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
390 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
394 dtbl
= (DCTELEM
*) compptr
->dct_table
;
395 for (i
= 0; i
< DCTSIZE2
; i
++) {
397 DESCALE(MULTIPLY16V16((INT32
) qtbl
->quantval
[i
],
398 (INT32
) aanscales
[i
]),
399 compptr
->component_needed
? CONST_BITS
-4 : CONST_BITS
-3);
402 fdct
->pub
.forward_DCT
[ci
] = forward_DCT
;
405 #ifdef DCT_FLOAT_SUPPORTED
408 /* For float AA&N IDCT method, divisors are equal to quantization
409 * coefficients scaled by scalefactor[row]*scalefactor[col], where
411 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
412 * We apply a further scale factor of 8.
413 * What's actually stored is 1/divisor so that the inner loop can
414 * use a multiplication rather than a division.
416 FAST_FLOAT
* fdtbl
= (FAST_FLOAT
*) compptr
->dct_table
;
418 static const double aanscalefactor
[DCTSIZE
] = {
419 1.0, 1.387039845, 1.306562965, 1.175875602,
420 1.0, 0.785694958, 0.541196100, 0.275899379
424 for (row
= 0; row
< DCTSIZE
; row
++) {
425 for (col
= 0; col
< DCTSIZE
; col
++) {
426 fdtbl
[i
] = (FAST_FLOAT
)
427 (1.0 / ((double) qtbl
->quantval
[i
] *
428 aanscalefactor
[row
] * aanscalefactor
[col
] *
429 (compptr
->component_needed
? 16.0 : 8.0)));
434 fdct
->pub
.forward_DCT
[ci
] = forward_DCT_float
;
438 ERREXIT(cinfo
, JERR_NOT_COMPILED
);
445 * Initialize FDCT manager.
449 jinit_forward_dct (j_compress_ptr cinfo
)
453 jpeg_component_info
*compptr
;
455 fdct
= (my_fdct_ptr
) (*cinfo
->mem
->alloc_small
)
456 ((j_common_ptr
) cinfo
, JPOOL_IMAGE
, SIZEOF(my_fdct_controller
));
457 cinfo
->fdct
= &fdct
->pub
;
458 fdct
->pub
.start_pass
= start_pass_fdctmgr
;
460 for (ci
= 0, compptr
= cinfo
->comp_info
; ci
< cinfo
->num_components
;
462 /* Allocate a divisor table for each component */
463 compptr
->dct_table
= (*cinfo
->mem
->alloc_small
)
464 ((j_common_ptr
) cinfo
, JPOOL_IMAGE
, SIZEOF(divisor_table
));