Backed out 3 changesets (bug 1790375) for causing wd failures on fetch_error.py....
[gecko.git] / third_party / jpeg-xl / lib / jpegli / entropy_coding.cc
blob7e50bbc3a78a47d8feec907dc6a7ed2ef9ecff50
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
6 #include "lib/jpegli/entropy_coding.h"
8 #include <vector>
10 #include "lib/jpegli/encode_internal.h"
11 #include "lib/jpegli/error.h"
12 #include "lib/jpegli/huffman.h"
13 #include "lib/jxl/base/bits.h"
15 #undef HWY_TARGET_INCLUDE
16 #define HWY_TARGET_INCLUDE "lib/jpegli/entropy_coding.cc"
17 #include <hwy/foreach_target.h>
18 #include <hwy/highway.h>
20 #include "lib/jpegli/entropy_coding-inl.h"
22 HWY_BEFORE_NAMESPACE();
23 namespace jpegli {
24 namespace HWY_NAMESPACE {
26 void ComputeTokensSequential(const coeff_t* block, int last_dc, int dc_ctx,
27 int ac_ctx, Token** tokens_ptr) {
28 ComputeTokensForBlock<coeff_t, true>(block, last_dc, dc_ctx, ac_ctx,
29 tokens_ptr);
32 // NOLINTNEXTLINE(google-readability-namespace-comments)
33 } // namespace HWY_NAMESPACE
34 } // namespace jpegli
35 HWY_AFTER_NAMESPACE();
37 #if HWY_ONCE
38 namespace jpegli {
40 size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) {
41 int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
42 size_t blocks_per_mcu = 0;
43 for (int c = 0; c < cinfo->num_components; ++c) {
44 jpeg_component_info* comp = &cinfo->comp_info[c];
45 blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor;
47 return kDCTBlockSize * blocks_per_mcu * MCUs_per_row;
50 size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus,
51 size_t num_tokens, size_t max_per_row) {
52 size_t estimate;
53 if (mcu_y == 0) {
54 estimate = 16 * max_per_row;
55 } else {
56 estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y);
58 size_t mcus_left = ysize_mcus - mcu_y;
59 return std::min(mcus_left * max_per_row,
60 std::max(max_per_row, estimate - num_tokens));
63 namespace {
64 HWY_EXPORT(ComputeTokensSequential);
66 void TokenizeProgressiveDC(const coeff_t* coeffs, int context, int Al,
67 coeff_t* last_dc_coeff, Token** next_token) {
68 coeff_t temp2;
69 coeff_t temp;
70 temp2 = coeffs[0] >> Al;
71 temp = temp2 - *last_dc_coeff;
72 *last_dc_coeff = temp2;
73 temp2 = temp;
74 if (temp < 0) {
75 temp = -temp;
76 temp2--;
78 int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1);
79 int bits = temp2 & ((1 << nbits) - 1);
80 *(*next_token)++ = Token(context, nbits, bits);
83 void TokenizeACProgressiveScan(j_compress_ptr cinfo, int scan_index,
84 int context, ScanTokenInfo* sti) {
85 jpeg_comp_master* m = cinfo->master;
86 const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
87 const int comp_idx = scan_info->component_index[0];
88 const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
89 const int Al = scan_info->Al;
90 const int Ss = scan_info->Ss;
91 const int Se = scan_info->Se;
92 const size_t restart_interval = sti->restart_interval;
93 int restarts_to_go = restart_interval;
94 size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks;
95 size_t num_restarts =
96 restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1;
97 size_t restart_idx = 0;
98 int eob_run = 0;
99 TokenArray* ta = &m->token_arrays[m->cur_token_array];
100 sti->token_offset = m->total_num_tokens + ta->num_tokens;
101 sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE);
102 for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
103 JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
104 reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by,
105 1, false);
106 // Each coefficient can appear in at most one token, but we have to reserve
107 // one extra EOBrun token that was rolled over from the previous block-row
108 // and has to be flushed at the end.
109 int max_tokens_per_row = 1 + comp->width_in_blocks * (Se - Ss + 1);
110 if (ta->num_tokens + max_tokens_per_row > m->num_tokens) {
111 if (ta->tokens) {
112 m->total_num_tokens += ta->num_tokens;
113 ++m->cur_token_array;
114 ta = &m->token_arrays[m->cur_token_array];
116 m->num_tokens =
117 EstimateNumTokens(cinfo, by, comp->height_in_blocks,
118 m->total_num_tokens, max_tokens_per_row);
119 ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
120 m->next_token = ta->tokens;
122 for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
123 if (restart_interval > 0 && restarts_to_go == 0) {
124 if (eob_run > 0) {
125 int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
126 int symbol = nbits << 4u;
127 *m->next_token++ =
128 Token(context, symbol, eob_run & ((1 << nbits) - 1));
129 eob_run = 0;
131 ta->num_tokens = m->next_token - ta->tokens;
132 sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens;
133 restarts_to_go = restart_interval;
135 const coeff_t* block = &ba[0][bx][0];
136 coeff_t temp2;
137 coeff_t temp;
138 int r = 0;
139 int num_nzeros = 0;
140 int num_future_nzeros = 0;
141 for (int k = Ss; k <= Se; ++k) {
142 if ((temp = block[k]) == 0) {
143 r++;
144 continue;
146 if (temp < 0) {
147 temp = -temp;
148 temp >>= Al;
149 temp2 = ~temp;
150 } else {
151 temp >>= Al;
152 temp2 = temp;
154 if (temp == 0) {
155 r++;
156 num_future_nzeros++;
157 continue;
159 if (eob_run > 0) {
160 int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
161 int symbol = nbits << 4u;
162 *m->next_token++ =
163 Token(context, symbol, eob_run & ((1 << nbits) - 1));
164 eob_run = 0;
166 while (r > 15) {
167 *m->next_token++ = Token(context, 0xf0, 0);
168 r -= 16;
170 int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
171 int symbol = (r << 4u) + nbits;
172 *m->next_token++ = Token(context, symbol, temp2 & ((1 << nbits) - 1));
173 ++num_nzeros;
174 r = 0;
176 if (r > 0) {
177 ++eob_run;
178 if (eob_run == 0x7FFF) {
179 int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
180 int symbol = nbits << 4u;
181 *m->next_token++ =
182 Token(context, symbol, eob_run & ((1 << nbits) - 1));
183 eob_run = 0;
186 sti->num_nonzeros += num_nzeros;
187 sti->num_future_nonzeros += num_future_nzeros;
188 --restarts_to_go;
190 ta->num_tokens = m->next_token - ta->tokens;
192 if (eob_run > 0) {
193 int nbits = jxl::FloorLog2Nonzero<uint32_t>(eob_run);
194 int symbol = nbits << 4u;
195 *m->next_token++ = Token(context, symbol, eob_run & ((1 << nbits) - 1));
196 ++ta->num_tokens;
197 eob_run = 0;
199 sti->num_tokens = m->total_num_tokens + ta->num_tokens - sti->token_offset;
200 sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens;
203 void TokenizeACRefinementScan(j_compress_ptr cinfo, int scan_index,
204 ScanTokenInfo* sti) {
205 jpeg_comp_master* m = cinfo->master;
206 const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
207 const int comp_idx = scan_info->component_index[0];
208 const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
209 const int Al = scan_info->Al;
210 const int Ss = scan_info->Ss;
211 const int Se = scan_info->Se;
212 const size_t restart_interval = sti->restart_interval;
213 int restarts_to_go = restart_interval;
214 RefToken token;
215 int eob_run = 0;
216 int eob_refbits = 0;
217 size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks;
218 size_t num_restarts =
219 restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1;
220 sti->tokens = m->next_refinement_token;
221 sti->refbits = m->next_refinement_bit;
222 sti->eobruns = Allocate<uint16_t>(cinfo, num_blocks / 2, JPOOL_IMAGE);
223 sti->restarts = Allocate<size_t>(cinfo, num_restarts, JPOOL_IMAGE);
224 RefToken* next_token = sti->tokens;
225 RefToken* next_eob_token = next_token;
226 uint8_t* next_ref_bit = sti->refbits;
227 uint16_t* next_eobrun = sti->eobruns;
228 size_t restart_idx = 0;
229 for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
230 JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
231 reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx], by,
232 1, false);
233 for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
234 if (restart_interval > 0 && restarts_to_go == 0) {
235 sti->restarts[restart_idx++] = next_token - sti->tokens;
236 restarts_to_go = restart_interval;
237 next_eob_token = next_token;
238 eob_run = eob_refbits = 0;
240 const coeff_t* block = &ba[0][bx][0];
241 int num_eob_refinement_bits = 0;
242 int num_refinement_bits = 0;
243 int num_nzeros = 0;
244 int r = 0;
245 for (int k = Ss; k <= Se; ++k) {
246 int absval = block[k];
247 if (absval == 0) {
248 r++;
249 continue;
251 const int mask = absval >> (8 * sizeof(int) - 1);
252 absval += mask;
253 absval ^= mask;
254 absval >>= Al;
255 if (absval == 0) {
256 r++;
257 continue;
259 while (r > 15) {
260 token.symbol = 0xf0;
261 token.refbits = num_refinement_bits;
262 *next_token++ = token;
263 r -= 16;
264 num_eob_refinement_bits += num_refinement_bits;
265 num_refinement_bits = 0;
267 if (absval > 1) {
268 *next_ref_bit++ = absval & 1u;
269 ++num_refinement_bits;
270 continue;
272 int symbol = (r << 4u) + 1 + ((mask + 1) << 1);
273 token.symbol = symbol;
274 token.refbits = num_refinement_bits;
275 *next_token++ = token;
276 ++num_nzeros;
277 num_refinement_bits = 0;
278 num_eob_refinement_bits = 0;
279 r = 0;
280 next_eob_token = next_token;
281 eob_run = eob_refbits = 0;
283 if (r > 0 || num_eob_refinement_bits + num_refinement_bits > 0) {
284 ++eob_run;
285 eob_refbits += num_eob_refinement_bits + num_refinement_bits;
286 if (eob_refbits > 255) {
287 ++next_eob_token;
288 eob_refbits = num_eob_refinement_bits + num_refinement_bits;
289 eob_run = 1;
291 next_token = next_eob_token;
292 next_token->refbits = eob_refbits;
293 if (eob_run == 1) {
294 next_token->symbol = 0;
295 } else if (eob_run == 2) {
296 next_token->symbol = 16;
297 *next_eobrun++ = 0;
298 } else if ((eob_run & (eob_run - 1)) == 0) {
299 next_token->symbol += 16;
300 next_eobrun[-1] = 0;
301 } else {
302 ++next_eobrun[-1];
304 ++next_token;
305 if (eob_run == 0x7fff) {
306 next_eob_token = next_token;
307 eob_run = eob_refbits = 0;
310 sti->num_nonzeros += num_nzeros;
311 --restarts_to_go;
314 sti->num_tokens = next_token - sti->tokens;
315 sti->restarts[restart_idx++] = sti->num_tokens;
316 m->next_refinement_token = next_token;
317 m->next_refinement_bit = next_ref_bit;
320 void TokenizeScan(j_compress_ptr cinfo, size_t scan_index, int ac_ctx_offset,
321 ScanTokenInfo* sti) {
322 const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
323 if (scan_info->Ss > 0) {
324 if (scan_info->Ah == 0) {
325 TokenizeACProgressiveScan(cinfo, scan_index, ac_ctx_offset, sti);
326 } else {
327 TokenizeACRefinementScan(cinfo, scan_index, sti);
329 return;
332 jpeg_comp_master* m = cinfo->master;
333 size_t restart_interval = sti->restart_interval;
334 int restarts_to_go = restart_interval;
335 coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
337 // "Non-interleaved" means color data comes in separate scans, in other words
338 // each scan can contain only one color component.
339 const bool is_interleaved = (scan_info->comps_in_scan > 1);
340 const bool is_progressive = cinfo->progressive_mode;
341 const int Ah = scan_info->Ah;
342 const int Al = scan_info->Al;
343 HWY_ALIGN constexpr coeff_t kSinkBlock[DCTSIZE2] = {0};
345 size_t restart_idx = 0;
346 TokenArray* ta = &m->token_arrays[m->cur_token_array];
347 sti->token_offset = Ah > 0 ? 0 : m->total_num_tokens + ta->num_tokens;
349 if (Ah > 0) {
350 sti->refbits = Allocate<uint8_t>(cinfo, sti->num_blocks, JPOOL_IMAGE);
351 } else if (cinfo->progressive_mode) {
352 if (ta->num_tokens + sti->num_blocks > m->num_tokens) {
353 if (ta->tokens) {
354 m->total_num_tokens += ta->num_tokens;
355 ++m->cur_token_array;
356 ta = &m->token_arrays[m->cur_token_array];
358 m->num_tokens = sti->num_blocks;
359 ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
360 m->next_token = ta->tokens;
364 JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
365 size_t block_idx = 0;
366 for (size_t mcu_y = 0; mcu_y < sti->MCU_rows_in_scan; ++mcu_y) {
367 for (int i = 0; i < scan_info->comps_in_scan; ++i) {
368 int comp_idx = scan_info->component_index[i];
369 jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
370 int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
371 int by0 = mcu_y * n_blocks_y;
372 int block_rows_left = comp->height_in_blocks - by0;
373 int max_block_rows = std::min(n_blocks_y, block_rows_left);
374 ba[i] = (*cinfo->mem->access_virt_barray)(
375 reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx],
376 by0, max_block_rows, false);
378 if (!cinfo->progressive_mode) {
379 int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
380 if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) {
381 if (ta->tokens) {
382 m->total_num_tokens += ta->num_tokens;
383 ++m->cur_token_array;
384 ta = &m->token_arrays[m->cur_token_array];
386 m->num_tokens =
387 EstimateNumTokens(cinfo, mcu_y, sti->MCU_rows_in_scan,
388 m->total_num_tokens, max_tokens_per_mcu_row);
389 ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
390 m->next_token = ta->tokens;
393 for (size_t mcu_x = 0; mcu_x < sti->MCUs_per_row; ++mcu_x) {
394 // Possibly emit a restart marker.
395 if (restart_interval > 0 && restarts_to_go == 0) {
396 restarts_to_go = restart_interval;
397 memset(last_dc_coeff, 0, sizeof(last_dc_coeff));
398 ta->num_tokens = m->next_token - ta->tokens;
399 sti->restarts[restart_idx++] =
400 Ah > 0 ? block_idx : m->total_num_tokens + ta->num_tokens;
402 // Encode one MCU
403 for (int i = 0; i < scan_info->comps_in_scan; ++i) {
404 int comp_idx = scan_info->component_index[i];
405 jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
406 int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
407 int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1;
408 for (int iy = 0; iy < n_blocks_y; ++iy) {
409 for (int ix = 0; ix < n_blocks_x; ++ix) {
410 size_t block_y = mcu_y * n_blocks_y + iy;
411 size_t block_x = mcu_x * n_blocks_x + ix;
412 const coeff_t* block;
413 if (block_x >= comp->width_in_blocks ||
414 block_y >= comp->height_in_blocks) {
415 block = kSinkBlock;
416 } else {
417 block = &ba[i][iy][block_x][0];
419 if (!is_progressive) {
420 HWY_DYNAMIC_DISPATCH(ComputeTokensSequential)
421 (block, last_dc_coeff[i], comp_idx, ac_ctx_offset + i,
422 &m->next_token);
423 last_dc_coeff[i] = block[0];
424 } else {
425 if (Ah == 0) {
426 TokenizeProgressiveDC(block, comp_idx, Al, last_dc_coeff + i,
427 &m->next_token);
428 } else {
429 sti->refbits[block_idx] = (block[0] >> Al) & 1;
432 ++block_idx;
436 --restarts_to_go;
438 ta->num_tokens = m->next_token - ta->tokens;
440 JXL_DASSERT(block_idx == sti->num_blocks);
441 sti->num_tokens =
442 Ah > 0 ? sti->num_blocks
443 : m->total_num_tokens + ta->num_tokens - sti->token_offset;
444 sti->restarts[restart_idx++] =
445 Ah > 0 ? sti->num_blocks : m->total_num_tokens + ta->num_tokens;
446 if (Ah == 0 && cinfo->progressive_mode) {
447 JXL_DASSERT(sti->num_blocks == sti->num_tokens);
451 } // namespace
453 void TokenizeJpeg(j_compress_ptr cinfo) {
454 jpeg_comp_master* m = cinfo->master;
455 std::vector<int> processed(cinfo->num_scans);
456 size_t max_refinement_tokens = 0;
457 size_t num_refinement_bits = 0;
458 int num_refinement_scans[DCTSIZE2] = {};
459 int max_num_refinement_scans = 0;
460 for (int i = 0; i < cinfo->num_scans; ++i) {
461 const jpeg_scan_info* si = &cinfo->scan_info[i];
462 ScanTokenInfo* sti = &m->scan_token_info[i];
463 if (si->Ss > 0 && si->Ah == 0 && si->Al > 0) {
464 int offset = m->ac_ctx_offset[i];
465 TokenizeScan(cinfo, i, offset, sti);
466 processed[i] = 1;
467 max_refinement_tokens += sti->num_future_nonzeros;
468 for (int k = si->Ss; k <= si->Se; ++k) {
469 num_refinement_scans[k] = si->Al;
471 max_num_refinement_scans = std::max(max_num_refinement_scans, si->Al);
472 num_refinement_bits += sti->num_nonzeros;
474 if (si->Ss > 0 && si->Ah > 0) {
475 int comp_idx = si->component_index[0];
476 const jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
477 size_t num_blocks = comp->width_in_blocks * comp->height_in_blocks;
478 max_refinement_tokens += (1 + (si->Se - si->Ss) / 16) * num_blocks;
481 if (max_refinement_tokens > 0) {
482 m->next_refinement_token =
483 Allocate<RefToken>(cinfo, max_refinement_tokens, JPOOL_IMAGE);
485 for (int j = 0; j < max_num_refinement_scans; ++j) {
486 uint8_t* refinement_bits =
487 Allocate<uint8_t>(cinfo, num_refinement_bits, JPOOL_IMAGE);
488 m->next_refinement_bit = refinement_bits;
489 size_t new_refinement_bits = 0;
490 for (int i = 0; i < cinfo->num_scans; ++i) {
491 const jpeg_scan_info* si = &cinfo->scan_info[i];
492 ScanTokenInfo* sti = &m->scan_token_info[i];
493 if (si->Ss > 0 && si->Ah > 0 &&
494 si->Ah == num_refinement_scans[si->Ss] - j) {
495 int offset = m->ac_ctx_offset[i];
496 TokenizeScan(cinfo, i, offset, sti);
497 processed[i] = 1;
498 new_refinement_bits += sti->num_nonzeros;
501 JXL_DASSERT(m->next_refinement_bit ==
502 refinement_bits + num_refinement_bits);
503 num_refinement_bits += new_refinement_bits;
505 for (int i = 0; i < cinfo->num_scans; ++i) {
506 if (processed[i]) {
507 continue;
509 int offset = m->ac_ctx_offset[i];
510 TokenizeScan(cinfo, i, offset, &m->scan_token_info[i]);
511 processed[i] = 1;
515 namespace {
517 struct Histogram {
518 int count[kJpegHuffmanAlphabetSize];
519 Histogram() { memset(count, 0, sizeof(count)); }
522 void BuildHistograms(j_compress_ptr cinfo, Histogram* histograms) {
523 jpeg_comp_master* m = cinfo->master;
524 size_t num_token_arrays = m->cur_token_array + 1;
525 for (size_t i = 0; i < num_token_arrays; ++i) {
526 Token* tokens = m->token_arrays[i].tokens;
527 size_t num_tokens = m->token_arrays[i].num_tokens;
528 for (size_t j = 0; j < num_tokens; ++j) {
529 Token t = tokens[j];
530 ++histograms[t.context].count[t.symbol];
533 for (int i = 0; i < cinfo->num_scans; ++i) {
534 const jpeg_scan_info& si = cinfo->scan_info[i];
535 const ScanTokenInfo& sti = m->scan_token_info[i];
536 if (si.Ss > 0 && si.Ah > 0) {
537 int context = m->ac_ctx_offset[i];
538 int* ac_histo = &histograms[context].count[0];
539 for (size_t j = 0; j < sti.num_tokens; ++j) {
540 ++ac_histo[sti.tokens[j].symbol & 253];
546 struct JpegClusteredHistograms {
547 std::vector<Histogram> histograms;
548 std::vector<uint32_t> histogram_indexes;
549 std::vector<uint32_t> slot_ids;
552 float HistogramCost(const Histogram& histo) {
553 std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
554 std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
555 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
556 counts[i] = histo.count[i];
558 counts[kJpegHuffmanAlphabetSize] = 1;
559 CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
560 &depths[0]);
561 size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8;
562 size_t data_bits = 0;
563 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
564 if (depths[i] > 0) {
565 header_bits += 8;
566 data_bits += counts[i] * depths[i];
569 return header_bits + data_bits;
572 void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) {
573 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
574 c->count[i] = a.count[i] + b.count[i];
578 bool IsEmptyHistogram(const Histogram& histo) {
579 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
580 if (histo.count[i]) return false;
582 return true;
585 void ClusterJpegHistograms(const Histogram* histograms, size_t num,
586 JpegClusteredHistograms* clusters) {
587 clusters->histogram_indexes.resize(num);
588 std::vector<uint32_t> slot_histograms;
589 std::vector<float> slot_costs;
590 for (size_t i = 0; i < num; ++i) {
591 const Histogram& cur = histograms[i];
592 if (IsEmptyHistogram(cur)) {
593 continue;
595 float best_cost = HistogramCost(cur);
596 size_t best_slot = slot_histograms.size();
597 for (size_t j = 0; j < slot_histograms.size(); ++j) {
598 size_t prev_idx = slot_histograms[j];
599 const Histogram& prev = clusters->histograms[prev_idx];
600 Histogram combined;
601 AddHistograms(prev, cur, &combined);
602 float combined_cost = HistogramCost(combined);
603 float cost = combined_cost - slot_costs[j];
604 if (cost < best_cost) {
605 best_cost = cost;
606 best_slot = j;
609 if (best_slot == slot_histograms.size()) {
610 // Create new histogram.
611 size_t histogram_index = clusters->histograms.size();
612 clusters->histograms.push_back(cur);
613 clusters->histogram_indexes[i] = histogram_index;
614 if (best_slot < 4) {
615 // We have a free slot, so we put the new histogram there.
616 slot_histograms.push_back(histogram_index);
617 slot_costs.push_back(best_cost);
618 } else {
619 // TODO(szabadka) Find the best histogram to replce.
620 best_slot = (clusters->slot_ids.back() + 1) % 4;
622 slot_histograms[best_slot] = histogram_index;
623 slot_costs[best_slot] = best_cost;
624 clusters->slot_ids.push_back(best_slot);
625 } else {
626 // Merge this histogram with a previous one.
627 size_t histogram_index = slot_histograms[best_slot];
628 const Histogram& prev = clusters->histograms[histogram_index];
629 AddHistograms(prev, cur, &clusters->histograms[histogram_index]);
630 clusters->histogram_indexes[i] = histogram_index;
631 JXL_ASSERT(clusters->slot_ids[histogram_index] == best_slot);
632 slot_costs[best_slot] += best_cost;
637 void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc,
638 int* inv_slot_map, uint8_t* slot_id_map,
639 JHUFF_TBL* huffman_tables, size_t* num_huffman_tables) {
640 const char* type = is_dc ? "DC" : "AC";
641 if (index < 0 || index >= NUM_HUFF_TBLS) {
642 JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index);
644 // Check if we have already copied this Huffman table.
645 int slot_idx = index + (is_dc ? 0 : NUM_HUFF_TBLS);
646 if (inv_slot_map[slot_idx] != -1) {
647 return;
649 inv_slot_map[slot_idx] = *num_huffman_tables;
650 // Look up and validate Huffman table.
651 JHUFF_TBL* table =
652 is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index];
653 if (table == nullptr) {
654 JPEGLI_ERROR("Missing %s Huffman table %d", type, index);
656 ValidateHuffmanTable(reinterpret_cast<j_common_ptr>(cinfo), table, is_dc);
657 // Copy Huffman table to the end of the list and save slot id.
658 slot_id_map[*num_huffman_tables] = index + (is_dc ? 0 : 0x10);
659 memcpy(&huffman_tables[*num_huffman_tables], table, sizeof(JHUFF_TBL));
660 ++(*num_huffman_tables);
663 void BuildJpegHuffmanTable(const Histogram& histo, JHUFF_TBL* table) {
664 std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
665 std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
666 for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) {
667 counts[j] = histo.count[j];
669 counts[kJpegHuffmanAlphabetSize] = 1;
670 CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
671 &depths[0]);
672 memset(table, 0, sizeof(JHUFF_TBL));
673 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
674 if (depths[i] > 0) {
675 ++table->bits[depths[i]];
678 int offset[kJpegHuffmanMaxBitLength + 1] = {0};
679 for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
680 offset[i] = offset[i - 1] + table->bits[i - 1];
682 for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
683 if (depths[i] > 0) {
684 table->huffval[offset[depths[i]]++] = i;
689 } // namespace
691 void CopyHuffmanTables(j_compress_ptr cinfo) {
692 jpeg_comp_master* m = cinfo->master;
693 size_t max_huff_tables = 2 * cinfo->num_components;
694 // Copy Huffman tables and save slot ids.
695 m->huffman_tables = Allocate<JHUFF_TBL>(cinfo, max_huff_tables, JPOOL_IMAGE);
696 m->slot_id_map = Allocate<uint8_t>(cinfo, max_huff_tables, JPOOL_IMAGE);
697 m->num_huffman_tables = 0;
698 int inv_slot_map[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
699 for (int c = 0; c < cinfo->num_components; ++c) {
700 jpeg_component_info* comp = &cinfo->comp_info[c];
701 CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, &inv_slot_map[0],
702 m->slot_id_map, m->huffman_tables, &m->num_huffman_tables);
703 CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, &inv_slot_map[0],
704 m->slot_id_map, m->huffman_tables, &m->num_huffman_tables);
706 // Compute context map.
707 m->context_map = Allocate<uint8_t>(cinfo, 8, JPOOL_IMAGE);
708 memset(m->context_map, 0, 8);
709 for (int c = 0; c < cinfo->num_components; ++c) {
710 m->context_map[c] = inv_slot_map[cinfo->comp_info[c].dc_tbl_no];
712 int ac_ctx = 4;
713 for (int i = 0; i < cinfo->num_scans; ++i) {
714 const jpeg_scan_info* si = &cinfo->scan_info[i];
715 if (si->Se > 0) {
716 for (int j = 0; j < si->comps_in_scan; ++j) {
717 int c = si->component_index[j];
718 jpeg_component_info* comp = &cinfo->comp_info[c];
719 m->context_map[ac_ctx++] = inv_slot_map[comp->ac_tbl_no + 4];
725 void OptimizeHuffmanCodes(j_compress_ptr cinfo) {
726 jpeg_comp_master* m = cinfo->master;
727 // Build DC and AC histograms.
728 std::vector<Histogram> histograms(m->num_contexts);
729 BuildHistograms(cinfo, &histograms[0]);
731 // Cluster DC histograms.
732 JpegClusteredHistograms dc_clusters;
733 ClusterJpegHistograms(histograms.data(), cinfo->num_components, &dc_clusters);
735 // Cluster AC histograms.
736 JpegClusteredHistograms ac_clusters;
737 ClusterJpegHistograms(histograms.data() + 4, m->num_contexts - 4,
738 &ac_clusters);
740 // Create Huffman tables and slot ids clusters.
741 size_t num_dc_huff = dc_clusters.histograms.size();
742 m->num_huffman_tables = num_dc_huff + ac_clusters.histograms.size();
743 m->huffman_tables =
744 Allocate<JHUFF_TBL>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
745 m->slot_id_map = Allocate<uint8_t>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
746 for (size_t i = 0; i < m->num_huffman_tables; ++i) {
747 JHUFF_TBL huff_table = {};
748 if (i < dc_clusters.histograms.size()) {
749 m->slot_id_map[i] = i;
750 BuildJpegHuffmanTable(dc_clusters.histograms[i], &huff_table);
751 } else {
752 m->slot_id_map[i] = 16 + ac_clusters.slot_ids[i - num_dc_huff];
753 BuildJpegHuffmanTable(ac_clusters.histograms[i - num_dc_huff],
754 &huff_table);
756 memcpy(&m->huffman_tables[i], &huff_table, sizeof(huff_table));
759 // Create context map from clustered histogram indexes.
760 m->context_map = Allocate<uint8_t>(cinfo, m->num_contexts, JPOOL_IMAGE);
761 memset(m->context_map, 0, m->num_contexts);
762 for (size_t i = 0; i < m->num_contexts; ++i) {
763 if (i < (size_t)cinfo->num_components) {
764 m->context_map[i] = dc_clusters.histogram_indexes[i];
765 } else if (i >= 4) {
766 m->context_map[i] = num_dc_huff + ac_clusters.histogram_indexes[i - 4];
771 namespace {
773 constexpr uint8_t kNumExtraBits[256] = {
774 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
775 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
776 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
777 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
778 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
779 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
780 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
781 7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
782 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
783 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
784 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
785 11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
786 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
787 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
788 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
789 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
792 void BuildHuffmanCodeTable(const JHUFF_TBL& table, HuffmanCodeTable* code) {
793 int huff_code[kJpegHuffmanAlphabetSize];
794 // +1 for a sentinel element.
795 uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
796 int p = 0;
797 for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
798 int i = table.bits[l];
799 while (i--) huff_size[p++] = l;
802 // Reuse sentinel element.
803 int last_p = p;
804 huff_size[last_p] = 0;
806 int next_code = 0;
807 uint32_t si = huff_size[0];
808 p = 0;
809 while (huff_size[p]) {
810 while ((huff_size[p]) == si) {
811 huff_code[p++] = next_code;
812 next_code++;
814 next_code <<= 1;
815 si++;
817 for (p = 0; p < last_p; p++) {
818 int i = table.huffval[p];
819 int nbits = kNumExtraBits[i];
820 code->depth[i] = huff_size[p] + nbits;
821 code->code[i] = huff_code[p] << nbits;
825 } // namespace
827 void InitEntropyCoder(j_compress_ptr cinfo) {
828 jpeg_comp_master* m = cinfo->master;
829 m->coding_tables =
830 Allocate<HuffmanCodeTable>(cinfo, m->num_huffman_tables, JPOOL_IMAGE);
831 for (size_t i = 0; i < m->num_huffman_tables; ++i) {
832 BuildHuffmanCodeTable(m->huffman_tables[i], &m->coding_tables[i]);
836 } // namespace jpegli
837 #endif // HWY_ONCE