1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
6 #include "lib/jpegli/entropy_coding.h"
10 #include "lib/jpegli/encode_internal.h"
11 #include "lib/jpegli/error.h"
12 #include "lib/jpegli/huffman.h"
13 #include "lib/jxl/base/bits.h"
15 #undef HWY_TARGET_INCLUDE
16 #define HWY_TARGET_INCLUDE "lib/jpegli/entropy_coding.cc"
17 #include <hwy/foreach_target.h>
18 #include <hwy/highway.h>
20 #include "lib/jpegli/entropy_coding-inl.h"
22 HWY_BEFORE_NAMESPACE();
24 namespace HWY_NAMESPACE
{
26 void ComputeTokensSequential(const coeff_t
* block
, int last_dc
, int dc_ctx
,
27 int ac_ctx
, Token
** tokens_ptr
) {
28 ComputeTokensForBlock
<coeff_t
, true>(block
, last_dc
, dc_ctx
, ac_ctx
,
32 // NOLINTNEXTLINE(google-readability-namespace-comments)
33 } // namespace HWY_NAMESPACE
35 HWY_AFTER_NAMESPACE();
40 size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo
) {
41 int MCUs_per_row
= DivCeil(cinfo
->image_width
, 8 * cinfo
->max_h_samp_factor
);
42 size_t blocks_per_mcu
= 0;
43 for (int c
= 0; c
< cinfo
->num_components
; ++c
) {
44 jpeg_component_info
* comp
= &cinfo
->comp_info
[c
];
45 blocks_per_mcu
+= comp
->h_samp_factor
* comp
->v_samp_factor
;
47 return kDCTBlockSize
* blocks_per_mcu
* MCUs_per_row
;
50 size_t EstimateNumTokens(j_compress_ptr cinfo
, size_t mcu_y
, size_t ysize_mcus
,
51 size_t num_tokens
, size_t max_per_row
) {
54 estimate
= 16 * max_per_row
;
56 estimate
= (4 * ysize_mcus
* num_tokens
) / (3 * mcu_y
);
58 size_t mcus_left
= ysize_mcus
- mcu_y
;
59 return std::min(mcus_left
* max_per_row
,
60 std::max(max_per_row
, estimate
- num_tokens
));
64 HWY_EXPORT(ComputeTokensSequential
);
66 void TokenizeProgressiveDC(const coeff_t
* coeffs
, int context
, int Al
,
67 coeff_t
* last_dc_coeff
, Token
** next_token
) {
70 temp2
= coeffs
[0] >> Al
;
71 temp
= temp2
- *last_dc_coeff
;
72 *last_dc_coeff
= temp2
;
78 int nbits
= (temp
== 0) ? 0 : (jxl::FloorLog2Nonzero
<uint32_t>(temp
) + 1);
79 int bits
= temp2
& ((1 << nbits
) - 1);
80 *(*next_token
)++ = Token(context
, nbits
, bits
);
83 void TokenizeACProgressiveScan(j_compress_ptr cinfo
, int scan_index
,
84 int context
, ScanTokenInfo
* sti
) {
85 jpeg_comp_master
* m
= cinfo
->master
;
86 const jpeg_scan_info
* scan_info
= &cinfo
->scan_info
[scan_index
];
87 const int comp_idx
= scan_info
->component_index
[0];
88 const jpeg_component_info
* comp
= &cinfo
->comp_info
[comp_idx
];
89 const int Al
= scan_info
->Al
;
90 const int Ss
= scan_info
->Ss
;
91 const int Se
= scan_info
->Se
;
92 const size_t restart_interval
= sti
->restart_interval
;
93 int restarts_to_go
= restart_interval
;
94 size_t num_blocks
= comp
->height_in_blocks
* comp
->width_in_blocks
;
96 restart_interval
> 0 ? DivCeil(num_blocks
, restart_interval
) : 1;
97 size_t restart_idx
= 0;
99 TokenArray
* ta
= &m
->token_arrays
[m
->cur_token_array
];
100 sti
->token_offset
= m
->total_num_tokens
+ ta
->num_tokens
;
101 sti
->restarts
= Allocate
<size_t>(cinfo
, num_restarts
, JPOOL_IMAGE
);
102 for (JDIMENSION by
= 0; by
< comp
->height_in_blocks
; ++by
) {
103 JBLOCKARRAY ba
= (*cinfo
->mem
->access_virt_barray
)(
104 reinterpret_cast<j_common_ptr
>(cinfo
), m
->coeff_buffers
[comp_idx
], by
,
106 // Each coefficient can appear in at most one token, but we have to reserve
107 // one extra EOBrun token that was rolled over from the previous block-row
108 // and has to be flushed at the end.
109 int max_tokens_per_row
= 1 + comp
->width_in_blocks
* (Se
- Ss
+ 1);
110 if (ta
->num_tokens
+ max_tokens_per_row
> m
->num_tokens
) {
112 m
->total_num_tokens
+= ta
->num_tokens
;
113 ++m
->cur_token_array
;
114 ta
= &m
->token_arrays
[m
->cur_token_array
];
117 EstimateNumTokens(cinfo
, by
, comp
->height_in_blocks
,
118 m
->total_num_tokens
, max_tokens_per_row
);
119 ta
->tokens
= Allocate
<Token
>(cinfo
, m
->num_tokens
, JPOOL_IMAGE
);
120 m
->next_token
= ta
->tokens
;
122 for (JDIMENSION bx
= 0; bx
< comp
->width_in_blocks
; ++bx
) {
123 if (restart_interval
> 0 && restarts_to_go
== 0) {
125 int nbits
= jxl::FloorLog2Nonzero
<uint32_t>(eob_run
);
126 int symbol
= nbits
<< 4u;
128 Token(context
, symbol
, eob_run
& ((1 << nbits
) - 1));
131 ta
->num_tokens
= m
->next_token
- ta
->tokens
;
132 sti
->restarts
[restart_idx
++] = m
->total_num_tokens
+ ta
->num_tokens
;
133 restarts_to_go
= restart_interval
;
135 const coeff_t
* block
= &ba
[0][bx
][0];
140 int num_future_nzeros
= 0;
141 for (int k
= Ss
; k
<= Se
; ++k
) {
142 if ((temp
= block
[k
]) == 0) {
160 int nbits
= jxl::FloorLog2Nonzero
<uint32_t>(eob_run
);
161 int symbol
= nbits
<< 4u;
163 Token(context
, symbol
, eob_run
& ((1 << nbits
) - 1));
167 *m
->next_token
++ = Token(context
, 0xf0, 0);
170 int nbits
= jxl::FloorLog2Nonzero
<uint32_t>(temp
) + 1;
171 int symbol
= (r
<< 4u) + nbits
;
172 *m
->next_token
++ = Token(context
, symbol
, temp2
& ((1 << nbits
) - 1));
178 if (eob_run
== 0x7FFF) {
179 int nbits
= jxl::FloorLog2Nonzero
<uint32_t>(eob_run
);
180 int symbol
= nbits
<< 4u;
182 Token(context
, symbol
, eob_run
& ((1 << nbits
) - 1));
186 sti
->num_nonzeros
+= num_nzeros
;
187 sti
->num_future_nonzeros
+= num_future_nzeros
;
190 ta
->num_tokens
= m
->next_token
- ta
->tokens
;
193 int nbits
= jxl::FloorLog2Nonzero
<uint32_t>(eob_run
);
194 int symbol
= nbits
<< 4u;
195 *m
->next_token
++ = Token(context
, symbol
, eob_run
& ((1 << nbits
) - 1));
199 sti
->num_tokens
= m
->total_num_tokens
+ ta
->num_tokens
- sti
->token_offset
;
200 sti
->restarts
[restart_idx
++] = m
->total_num_tokens
+ ta
->num_tokens
;
203 void TokenizeACRefinementScan(j_compress_ptr cinfo
, int scan_index
,
204 ScanTokenInfo
* sti
) {
205 jpeg_comp_master
* m
= cinfo
->master
;
206 const jpeg_scan_info
* scan_info
= &cinfo
->scan_info
[scan_index
];
207 const int comp_idx
= scan_info
->component_index
[0];
208 const jpeg_component_info
* comp
= &cinfo
->comp_info
[comp_idx
];
209 const int Al
= scan_info
->Al
;
210 const int Ss
= scan_info
->Ss
;
211 const int Se
= scan_info
->Se
;
212 const size_t restart_interval
= sti
->restart_interval
;
213 int restarts_to_go
= restart_interval
;
217 size_t num_blocks
= comp
->height_in_blocks
* comp
->width_in_blocks
;
218 size_t num_restarts
=
219 restart_interval
> 0 ? DivCeil(num_blocks
, restart_interval
) : 1;
220 sti
->tokens
= m
->next_refinement_token
;
221 sti
->refbits
= m
->next_refinement_bit
;
222 sti
->eobruns
= Allocate
<uint16_t>(cinfo
, num_blocks
/ 2, JPOOL_IMAGE
);
223 sti
->restarts
= Allocate
<size_t>(cinfo
, num_restarts
, JPOOL_IMAGE
);
224 RefToken
* next_token
= sti
->tokens
;
225 RefToken
* next_eob_token
= next_token
;
226 uint8_t* next_ref_bit
= sti
->refbits
;
227 uint16_t* next_eobrun
= sti
->eobruns
;
228 size_t restart_idx
= 0;
229 for (JDIMENSION by
= 0; by
< comp
->height_in_blocks
; ++by
) {
230 JBLOCKARRAY ba
= (*cinfo
->mem
->access_virt_barray
)(
231 reinterpret_cast<j_common_ptr
>(cinfo
), m
->coeff_buffers
[comp_idx
], by
,
233 for (JDIMENSION bx
= 0; bx
< comp
->width_in_blocks
; ++bx
) {
234 if (restart_interval
> 0 && restarts_to_go
== 0) {
235 sti
->restarts
[restart_idx
++] = next_token
- sti
->tokens
;
236 restarts_to_go
= restart_interval
;
237 next_eob_token
= next_token
;
238 eob_run
= eob_refbits
= 0;
240 const coeff_t
* block
= &ba
[0][bx
][0];
241 int num_eob_refinement_bits
= 0;
242 int num_refinement_bits
= 0;
245 for (int k
= Ss
; k
<= Se
; ++k
) {
246 int absval
= block
[k
];
251 const int mask
= absval
>> (8 * sizeof(int) - 1);
261 token
.refbits
= num_refinement_bits
;
262 *next_token
++ = token
;
264 num_eob_refinement_bits
+= num_refinement_bits
;
265 num_refinement_bits
= 0;
268 *next_ref_bit
++ = absval
& 1u;
269 ++num_refinement_bits
;
272 int symbol
= (r
<< 4u) + 1 + ((mask
+ 1) << 1);
273 token
.symbol
= symbol
;
274 token
.refbits
= num_refinement_bits
;
275 *next_token
++ = token
;
277 num_refinement_bits
= 0;
278 num_eob_refinement_bits
= 0;
280 next_eob_token
= next_token
;
281 eob_run
= eob_refbits
= 0;
283 if (r
> 0 || num_eob_refinement_bits
+ num_refinement_bits
> 0) {
285 eob_refbits
+= num_eob_refinement_bits
+ num_refinement_bits
;
286 if (eob_refbits
> 255) {
288 eob_refbits
= num_eob_refinement_bits
+ num_refinement_bits
;
291 next_token
= next_eob_token
;
292 next_token
->refbits
= eob_refbits
;
294 next_token
->symbol
= 0;
295 } else if (eob_run
== 2) {
296 next_token
->symbol
= 16;
298 } else if ((eob_run
& (eob_run
- 1)) == 0) {
299 next_token
->symbol
+= 16;
305 if (eob_run
== 0x7fff) {
306 next_eob_token
= next_token
;
307 eob_run
= eob_refbits
= 0;
310 sti
->num_nonzeros
+= num_nzeros
;
314 sti
->num_tokens
= next_token
- sti
->tokens
;
315 sti
->restarts
[restart_idx
++] = sti
->num_tokens
;
316 m
->next_refinement_token
= next_token
;
317 m
->next_refinement_bit
= next_ref_bit
;
320 void TokenizeScan(j_compress_ptr cinfo
, size_t scan_index
, int ac_ctx_offset
,
321 ScanTokenInfo
* sti
) {
322 const jpeg_scan_info
* scan_info
= &cinfo
->scan_info
[scan_index
];
323 if (scan_info
->Ss
> 0) {
324 if (scan_info
->Ah
== 0) {
325 TokenizeACProgressiveScan(cinfo
, scan_index
, ac_ctx_offset
, sti
);
327 TokenizeACRefinementScan(cinfo
, scan_index
, sti
);
332 jpeg_comp_master
* m
= cinfo
->master
;
333 size_t restart_interval
= sti
->restart_interval
;
334 int restarts_to_go
= restart_interval
;
335 coeff_t last_dc_coeff
[MAX_COMPS_IN_SCAN
] = {0};
337 // "Non-interleaved" means color data comes in separate scans, in other words
338 // each scan can contain only one color component.
339 const bool is_interleaved
= (scan_info
->comps_in_scan
> 1);
340 const bool is_progressive
= cinfo
->progressive_mode
;
341 const int Ah
= scan_info
->Ah
;
342 const int Al
= scan_info
->Al
;
343 HWY_ALIGN
constexpr coeff_t kSinkBlock
[DCTSIZE2
] = {0};
345 size_t restart_idx
= 0;
346 TokenArray
* ta
= &m
->token_arrays
[m
->cur_token_array
];
347 sti
->token_offset
= Ah
> 0 ? 0 : m
->total_num_tokens
+ ta
->num_tokens
;
350 sti
->refbits
= Allocate
<uint8_t>(cinfo
, sti
->num_blocks
, JPOOL_IMAGE
);
351 } else if (cinfo
->progressive_mode
) {
352 if (ta
->num_tokens
+ sti
->num_blocks
> m
->num_tokens
) {
354 m
->total_num_tokens
+= ta
->num_tokens
;
355 ++m
->cur_token_array
;
356 ta
= &m
->token_arrays
[m
->cur_token_array
];
358 m
->num_tokens
= sti
->num_blocks
;
359 ta
->tokens
= Allocate
<Token
>(cinfo
, m
->num_tokens
, JPOOL_IMAGE
);
360 m
->next_token
= ta
->tokens
;
364 JBLOCKARRAY ba
[MAX_COMPS_IN_SCAN
];
365 size_t block_idx
= 0;
366 for (size_t mcu_y
= 0; mcu_y
< sti
->MCU_rows_in_scan
; ++mcu_y
) {
367 for (int i
= 0; i
< scan_info
->comps_in_scan
; ++i
) {
368 int comp_idx
= scan_info
->component_index
[i
];
369 jpeg_component_info
* comp
= &cinfo
->comp_info
[comp_idx
];
370 int n_blocks_y
= is_interleaved
? comp
->v_samp_factor
: 1;
371 int by0
= mcu_y
* n_blocks_y
;
372 int block_rows_left
= comp
->height_in_blocks
- by0
;
373 int max_block_rows
= std::min(n_blocks_y
, block_rows_left
);
374 ba
[i
] = (*cinfo
->mem
->access_virt_barray
)(
375 reinterpret_cast<j_common_ptr
>(cinfo
), m
->coeff_buffers
[comp_idx
],
376 by0
, max_block_rows
, false);
378 if (!cinfo
->progressive_mode
) {
379 int max_tokens_per_mcu_row
= MaxNumTokensPerMCURow(cinfo
);
380 if (ta
->num_tokens
+ max_tokens_per_mcu_row
> m
->num_tokens
) {
382 m
->total_num_tokens
+= ta
->num_tokens
;
383 ++m
->cur_token_array
;
384 ta
= &m
->token_arrays
[m
->cur_token_array
];
387 EstimateNumTokens(cinfo
, mcu_y
, sti
->MCU_rows_in_scan
,
388 m
->total_num_tokens
, max_tokens_per_mcu_row
);
389 ta
->tokens
= Allocate
<Token
>(cinfo
, m
->num_tokens
, JPOOL_IMAGE
);
390 m
->next_token
= ta
->tokens
;
393 for (size_t mcu_x
= 0; mcu_x
< sti
->MCUs_per_row
; ++mcu_x
) {
394 // Possibly emit a restart marker.
395 if (restart_interval
> 0 && restarts_to_go
== 0) {
396 restarts_to_go
= restart_interval
;
397 memset(last_dc_coeff
, 0, sizeof(last_dc_coeff
));
398 ta
->num_tokens
= m
->next_token
- ta
->tokens
;
399 sti
->restarts
[restart_idx
++] =
400 Ah
> 0 ? block_idx
: m
->total_num_tokens
+ ta
->num_tokens
;
403 for (int i
= 0; i
< scan_info
->comps_in_scan
; ++i
) {
404 int comp_idx
= scan_info
->component_index
[i
];
405 jpeg_component_info
* comp
= &cinfo
->comp_info
[comp_idx
];
406 int n_blocks_y
= is_interleaved
? comp
->v_samp_factor
: 1;
407 int n_blocks_x
= is_interleaved
? comp
->h_samp_factor
: 1;
408 for (int iy
= 0; iy
< n_blocks_y
; ++iy
) {
409 for (int ix
= 0; ix
< n_blocks_x
; ++ix
) {
410 size_t block_y
= mcu_y
* n_blocks_y
+ iy
;
411 size_t block_x
= mcu_x
* n_blocks_x
+ ix
;
412 const coeff_t
* block
;
413 if (block_x
>= comp
->width_in_blocks
||
414 block_y
>= comp
->height_in_blocks
) {
417 block
= &ba
[i
][iy
][block_x
][0];
419 if (!is_progressive
) {
420 HWY_DYNAMIC_DISPATCH(ComputeTokensSequential
)
421 (block
, last_dc_coeff
[i
], comp_idx
, ac_ctx_offset
+ i
,
423 last_dc_coeff
[i
] = block
[0];
426 TokenizeProgressiveDC(block
, comp_idx
, Al
, last_dc_coeff
+ i
,
429 sti
->refbits
[block_idx
] = (block
[0] >> Al
) & 1;
438 ta
->num_tokens
= m
->next_token
- ta
->tokens
;
440 JXL_DASSERT(block_idx
== sti
->num_blocks
);
442 Ah
> 0 ? sti
->num_blocks
443 : m
->total_num_tokens
+ ta
->num_tokens
- sti
->token_offset
;
444 sti
->restarts
[restart_idx
++] =
445 Ah
> 0 ? sti
->num_blocks
: m
->total_num_tokens
+ ta
->num_tokens
;
446 if (Ah
== 0 && cinfo
->progressive_mode
) {
447 JXL_DASSERT(sti
->num_blocks
== sti
->num_tokens
);
453 void TokenizeJpeg(j_compress_ptr cinfo
) {
454 jpeg_comp_master
* m
= cinfo
->master
;
455 std::vector
<int> processed(cinfo
->num_scans
);
456 size_t max_refinement_tokens
= 0;
457 size_t num_refinement_bits
= 0;
458 int num_refinement_scans
[DCTSIZE2
] = {};
459 int max_num_refinement_scans
= 0;
460 for (int i
= 0; i
< cinfo
->num_scans
; ++i
) {
461 const jpeg_scan_info
* si
= &cinfo
->scan_info
[i
];
462 ScanTokenInfo
* sti
= &m
->scan_token_info
[i
];
463 if (si
->Ss
> 0 && si
->Ah
== 0 && si
->Al
> 0) {
464 int offset
= m
->ac_ctx_offset
[i
];
465 TokenizeScan(cinfo
, i
, offset
, sti
);
467 max_refinement_tokens
+= sti
->num_future_nonzeros
;
468 for (int k
= si
->Ss
; k
<= si
->Se
; ++k
) {
469 num_refinement_scans
[k
] = si
->Al
;
471 max_num_refinement_scans
= std::max(max_num_refinement_scans
, si
->Al
);
472 num_refinement_bits
+= sti
->num_nonzeros
;
474 if (si
->Ss
> 0 && si
->Ah
> 0) {
475 int comp_idx
= si
->component_index
[0];
476 const jpeg_component_info
* comp
= &cinfo
->comp_info
[comp_idx
];
477 size_t num_blocks
= comp
->width_in_blocks
* comp
->height_in_blocks
;
478 max_refinement_tokens
+= (1 + (si
->Se
- si
->Ss
) / 16) * num_blocks
;
481 if (max_refinement_tokens
> 0) {
482 m
->next_refinement_token
=
483 Allocate
<RefToken
>(cinfo
, max_refinement_tokens
, JPOOL_IMAGE
);
485 for (int j
= 0; j
< max_num_refinement_scans
; ++j
) {
486 uint8_t* refinement_bits
=
487 Allocate
<uint8_t>(cinfo
, num_refinement_bits
, JPOOL_IMAGE
);
488 m
->next_refinement_bit
= refinement_bits
;
489 size_t new_refinement_bits
= 0;
490 for (int i
= 0; i
< cinfo
->num_scans
; ++i
) {
491 const jpeg_scan_info
* si
= &cinfo
->scan_info
[i
];
492 ScanTokenInfo
* sti
= &m
->scan_token_info
[i
];
493 if (si
->Ss
> 0 && si
->Ah
> 0 &&
494 si
->Ah
== num_refinement_scans
[si
->Ss
] - j
) {
495 int offset
= m
->ac_ctx_offset
[i
];
496 TokenizeScan(cinfo
, i
, offset
, sti
);
498 new_refinement_bits
+= sti
->num_nonzeros
;
501 JXL_DASSERT(m
->next_refinement_bit
==
502 refinement_bits
+ num_refinement_bits
);
503 num_refinement_bits
+= new_refinement_bits
;
505 for (int i
= 0; i
< cinfo
->num_scans
; ++i
) {
509 int offset
= m
->ac_ctx_offset
[i
];
510 TokenizeScan(cinfo
, i
, offset
, &m
->scan_token_info
[i
]);
518 int count
[kJpegHuffmanAlphabetSize
];
519 Histogram() { memset(count
, 0, sizeof(count
)); }
522 void BuildHistograms(j_compress_ptr cinfo
, Histogram
* histograms
) {
523 jpeg_comp_master
* m
= cinfo
->master
;
524 size_t num_token_arrays
= m
->cur_token_array
+ 1;
525 for (size_t i
= 0; i
< num_token_arrays
; ++i
) {
526 Token
* tokens
= m
->token_arrays
[i
].tokens
;
527 size_t num_tokens
= m
->token_arrays
[i
].num_tokens
;
528 for (size_t j
= 0; j
< num_tokens
; ++j
) {
530 ++histograms
[t
.context
].count
[t
.symbol
];
533 for (int i
= 0; i
< cinfo
->num_scans
; ++i
) {
534 const jpeg_scan_info
& si
= cinfo
->scan_info
[i
];
535 const ScanTokenInfo
& sti
= m
->scan_token_info
[i
];
536 if (si
.Ss
> 0 && si
.Ah
> 0) {
537 int context
= m
->ac_ctx_offset
[i
];
538 int* ac_histo
= &histograms
[context
].count
[0];
539 for (size_t j
= 0; j
< sti
.num_tokens
; ++j
) {
540 ++ac_histo
[sti
.tokens
[j
].symbol
& 253];
546 struct JpegClusteredHistograms
{
547 std::vector
<Histogram
> histograms
;
548 std::vector
<uint32_t> histogram_indexes
;
549 std::vector
<uint32_t> slot_ids
;
552 float HistogramCost(const Histogram
& histo
) {
553 std::vector
<uint32_t> counts(kJpegHuffmanAlphabetSize
+ 1);
554 std::vector
<uint8_t> depths(kJpegHuffmanAlphabetSize
+ 1);
555 for (size_t i
= 0; i
< kJpegHuffmanAlphabetSize
; ++i
) {
556 counts
[i
] = histo
.count
[i
];
558 counts
[kJpegHuffmanAlphabetSize
] = 1;
559 CreateHuffmanTree(counts
.data(), counts
.size(), kJpegHuffmanMaxBitLength
,
561 size_t header_bits
= (1 + kJpegHuffmanMaxBitLength
) * 8;
562 size_t data_bits
= 0;
563 for (size_t i
= 0; i
< kJpegHuffmanAlphabetSize
; ++i
) {
566 data_bits
+= counts
[i
] * depths
[i
];
569 return header_bits
+ data_bits
;
572 void AddHistograms(const Histogram
& a
, const Histogram
& b
, Histogram
* c
) {
573 for (size_t i
= 0; i
< kJpegHuffmanAlphabetSize
; ++i
) {
574 c
->count
[i
] = a
.count
[i
] + b
.count
[i
];
578 bool IsEmptyHistogram(const Histogram
& histo
) {
579 for (size_t i
= 0; i
< kJpegHuffmanAlphabetSize
; ++i
) {
580 if (histo
.count
[i
]) return false;
585 void ClusterJpegHistograms(const Histogram
* histograms
, size_t num
,
586 JpegClusteredHistograms
* clusters
) {
587 clusters
->histogram_indexes
.resize(num
);
588 std::vector
<uint32_t> slot_histograms
;
589 std::vector
<float> slot_costs
;
590 for (size_t i
= 0; i
< num
; ++i
) {
591 const Histogram
& cur
= histograms
[i
];
592 if (IsEmptyHistogram(cur
)) {
595 float best_cost
= HistogramCost(cur
);
596 size_t best_slot
= slot_histograms
.size();
597 for (size_t j
= 0; j
< slot_histograms
.size(); ++j
) {
598 size_t prev_idx
= slot_histograms
[j
];
599 const Histogram
& prev
= clusters
->histograms
[prev_idx
];
601 AddHistograms(prev
, cur
, &combined
);
602 float combined_cost
= HistogramCost(combined
);
603 float cost
= combined_cost
- slot_costs
[j
];
604 if (cost
< best_cost
) {
609 if (best_slot
== slot_histograms
.size()) {
610 // Create new histogram.
611 size_t histogram_index
= clusters
->histograms
.size();
612 clusters
->histograms
.push_back(cur
);
613 clusters
->histogram_indexes
[i
] = histogram_index
;
615 // We have a free slot, so we put the new histogram there.
616 slot_histograms
.push_back(histogram_index
);
617 slot_costs
.push_back(best_cost
);
619 // TODO(szabadka) Find the best histogram to replce.
620 best_slot
= (clusters
->slot_ids
.back() + 1) % 4;
622 slot_histograms
[best_slot
] = histogram_index
;
623 slot_costs
[best_slot
] = best_cost
;
624 clusters
->slot_ids
.push_back(best_slot
);
626 // Merge this histogram with a previous one.
627 size_t histogram_index
= slot_histograms
[best_slot
];
628 const Histogram
& prev
= clusters
->histograms
[histogram_index
];
629 AddHistograms(prev
, cur
, &clusters
->histograms
[histogram_index
]);
630 clusters
->histogram_indexes
[i
] = histogram_index
;
631 JXL_ASSERT(clusters
->slot_ids
[histogram_index
] == best_slot
);
632 slot_costs
[best_slot
] += best_cost
;
637 void CopyHuffmanTable(j_compress_ptr cinfo
, int index
, bool is_dc
,
638 int* inv_slot_map
, uint8_t* slot_id_map
,
639 JHUFF_TBL
* huffman_tables
, size_t* num_huffman_tables
) {
640 const char* type
= is_dc
? "DC" : "AC";
641 if (index
< 0 || index
>= NUM_HUFF_TBLS
) {
642 JPEGLI_ERROR("Invalid %s Huffman table index %d", type
, index
);
644 // Check if we have already copied this Huffman table.
645 int slot_idx
= index
+ (is_dc
? 0 : NUM_HUFF_TBLS
);
646 if (inv_slot_map
[slot_idx
] != -1) {
649 inv_slot_map
[slot_idx
] = *num_huffman_tables
;
650 // Look up and validate Huffman table.
652 is_dc
? cinfo
->dc_huff_tbl_ptrs
[index
] : cinfo
->ac_huff_tbl_ptrs
[index
];
653 if (table
== nullptr) {
654 JPEGLI_ERROR("Missing %s Huffman table %d", type
, index
);
656 ValidateHuffmanTable(reinterpret_cast<j_common_ptr
>(cinfo
), table
, is_dc
);
657 // Copy Huffman table to the end of the list and save slot id.
658 slot_id_map
[*num_huffman_tables
] = index
+ (is_dc
? 0 : 0x10);
659 memcpy(&huffman_tables
[*num_huffman_tables
], table
, sizeof(JHUFF_TBL
));
660 ++(*num_huffman_tables
);
663 void BuildJpegHuffmanTable(const Histogram
& histo
, JHUFF_TBL
* table
) {
664 std::vector
<uint32_t> counts(kJpegHuffmanAlphabetSize
+ 1);
665 std::vector
<uint8_t> depths(kJpegHuffmanAlphabetSize
+ 1);
666 for (size_t j
= 0; j
< kJpegHuffmanAlphabetSize
; ++j
) {
667 counts
[j
] = histo
.count
[j
];
669 counts
[kJpegHuffmanAlphabetSize
] = 1;
670 CreateHuffmanTree(counts
.data(), counts
.size(), kJpegHuffmanMaxBitLength
,
672 memset(table
, 0, sizeof(JHUFF_TBL
));
673 for (size_t i
= 0; i
< kJpegHuffmanAlphabetSize
; ++i
) {
675 ++table
->bits
[depths
[i
]];
678 int offset
[kJpegHuffmanMaxBitLength
+ 1] = {0};
679 for (size_t i
= 1; i
<= kJpegHuffmanMaxBitLength
; ++i
) {
680 offset
[i
] = offset
[i
- 1] + table
->bits
[i
- 1];
682 for (size_t i
= 0; i
< kJpegHuffmanAlphabetSize
; ++i
) {
684 table
->huffval
[offset
[depths
[i
]]++] = i
;
691 void CopyHuffmanTables(j_compress_ptr cinfo
) {
692 jpeg_comp_master
* m
= cinfo
->master
;
693 size_t max_huff_tables
= 2 * cinfo
->num_components
;
694 // Copy Huffman tables and save slot ids.
695 m
->huffman_tables
= Allocate
<JHUFF_TBL
>(cinfo
, max_huff_tables
, JPOOL_IMAGE
);
696 m
->slot_id_map
= Allocate
<uint8_t>(cinfo
, max_huff_tables
, JPOOL_IMAGE
);
697 m
->num_huffman_tables
= 0;
698 int inv_slot_map
[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
699 for (int c
= 0; c
< cinfo
->num_components
; ++c
) {
700 jpeg_component_info
* comp
= &cinfo
->comp_info
[c
];
701 CopyHuffmanTable(cinfo
, comp
->dc_tbl_no
, /*is_dc=*/true, &inv_slot_map
[0],
702 m
->slot_id_map
, m
->huffman_tables
, &m
->num_huffman_tables
);
703 CopyHuffmanTable(cinfo
, comp
->ac_tbl_no
, /*is_dc=*/false, &inv_slot_map
[0],
704 m
->slot_id_map
, m
->huffman_tables
, &m
->num_huffman_tables
);
706 // Compute context map.
707 m
->context_map
= Allocate
<uint8_t>(cinfo
, 8, JPOOL_IMAGE
);
708 memset(m
->context_map
, 0, 8);
709 for (int c
= 0; c
< cinfo
->num_components
; ++c
) {
710 m
->context_map
[c
] = inv_slot_map
[cinfo
->comp_info
[c
].dc_tbl_no
];
713 for (int i
= 0; i
< cinfo
->num_scans
; ++i
) {
714 const jpeg_scan_info
* si
= &cinfo
->scan_info
[i
];
716 for (int j
= 0; j
< si
->comps_in_scan
; ++j
) {
717 int c
= si
->component_index
[j
];
718 jpeg_component_info
* comp
= &cinfo
->comp_info
[c
];
719 m
->context_map
[ac_ctx
++] = inv_slot_map
[comp
->ac_tbl_no
+ 4];
725 void OptimizeHuffmanCodes(j_compress_ptr cinfo
) {
726 jpeg_comp_master
* m
= cinfo
->master
;
727 // Build DC and AC histograms.
728 std::vector
<Histogram
> histograms(m
->num_contexts
);
729 BuildHistograms(cinfo
, &histograms
[0]);
731 // Cluster DC histograms.
732 JpegClusteredHistograms dc_clusters
;
733 ClusterJpegHistograms(histograms
.data(), cinfo
->num_components
, &dc_clusters
);
735 // Cluster AC histograms.
736 JpegClusteredHistograms ac_clusters
;
737 ClusterJpegHistograms(histograms
.data() + 4, m
->num_contexts
- 4,
740 // Create Huffman tables and slot ids clusters.
741 size_t num_dc_huff
= dc_clusters
.histograms
.size();
742 m
->num_huffman_tables
= num_dc_huff
+ ac_clusters
.histograms
.size();
744 Allocate
<JHUFF_TBL
>(cinfo
, m
->num_huffman_tables
, JPOOL_IMAGE
);
745 m
->slot_id_map
= Allocate
<uint8_t>(cinfo
, m
->num_huffman_tables
, JPOOL_IMAGE
);
746 for (size_t i
= 0; i
< m
->num_huffman_tables
; ++i
) {
747 JHUFF_TBL huff_table
= {};
748 if (i
< dc_clusters
.histograms
.size()) {
749 m
->slot_id_map
[i
] = i
;
750 BuildJpegHuffmanTable(dc_clusters
.histograms
[i
], &huff_table
);
752 m
->slot_id_map
[i
] = 16 + ac_clusters
.slot_ids
[i
- num_dc_huff
];
753 BuildJpegHuffmanTable(ac_clusters
.histograms
[i
- num_dc_huff
],
756 memcpy(&m
->huffman_tables
[i
], &huff_table
, sizeof(huff_table
));
759 // Create context map from clustered histogram indexes.
760 m
->context_map
= Allocate
<uint8_t>(cinfo
, m
->num_contexts
, JPOOL_IMAGE
);
761 memset(m
->context_map
, 0, m
->num_contexts
);
762 for (size_t i
= 0; i
< m
->num_contexts
; ++i
) {
763 if (i
< (size_t)cinfo
->num_components
) {
764 m
->context_map
[i
] = dc_clusters
.histogram_indexes
[i
];
766 m
->context_map
[i
] = num_dc_huff
+ ac_clusters
.histogram_indexes
[i
- 4];
773 constexpr uint8_t kNumExtraBits
[256] = {
774 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
775 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
776 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
777 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
778 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
779 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
780 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
781 7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
782 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
783 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
784 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
785 11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
786 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
787 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
788 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
789 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
792 void BuildHuffmanCodeTable(const JHUFF_TBL
& table
, HuffmanCodeTable
* code
) {
793 int huff_code
[kJpegHuffmanAlphabetSize
];
794 // +1 for a sentinel element.
795 uint32_t huff_size
[kJpegHuffmanAlphabetSize
+ 1];
797 for (size_t l
= 1; l
<= kJpegHuffmanMaxBitLength
; ++l
) {
798 int i
= table
.bits
[l
];
799 while (i
--) huff_size
[p
++] = l
;
802 // Reuse sentinel element.
804 huff_size
[last_p
] = 0;
807 uint32_t si
= huff_size
[0];
809 while (huff_size
[p
]) {
810 while ((huff_size
[p
]) == si
) {
811 huff_code
[p
++] = next_code
;
817 for (p
= 0; p
< last_p
; p
++) {
818 int i
= table
.huffval
[p
];
819 int nbits
= kNumExtraBits
[i
];
820 code
->depth
[i
] = huff_size
[p
] + nbits
;
821 code
->code
[i
] = huff_code
[p
] << nbits
;
827 void InitEntropyCoder(j_compress_ptr cinfo
) {
828 jpeg_comp_master
* m
= cinfo
->master
;
830 Allocate
<HuffmanCodeTable
>(cinfo
, m
->num_huffman_tables
, JPOOL_IMAGE
);
831 for (size_t i
= 0; i
< m
->num_huffman_tables
; ++i
) {
832 BuildHuffmanCodeTable(m
->huffman_tables
[i
], &m
->coding_tables
[i
]);
836 } // namespace jpegli