Backed out 3 changesets (bug 1790375) for causing wd failures on fetch_error.py....
[gecko.git] / third_party / jpeg-xl / lib / jpegli / encode_streaming.cc
blob89dbd813f8534944283e906b6e029164401a30a2
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
6 #include "lib/jpegli/encode_streaming.h"
8 #include <cmath>
10 #include "lib/jpegli/bit_writer.h"
11 #include "lib/jpegli/bitstream.h"
12 #include "lib/jpegli/entropy_coding.h"
13 #include "lib/jpegli/error.h"
14 #include "lib/jpegli/memory_manager.h"
15 #include "lib/jxl/base/bits.h"
17 #undef HWY_TARGET_INCLUDE
18 #define HWY_TARGET_INCLUDE "lib/jpegli/encode_streaming.cc"
19 #include <hwy/foreach_target.h>
20 #include <hwy/highway.h>
22 #include "lib/jpegli/dct-inl.h"
23 #include "lib/jpegli/entropy_coding-inl.h"
25 HWY_BEFORE_NAMESPACE();
26 namespace jpegli {
27 namespace HWY_NAMESPACE {
29 static const int kStreamingModeCoefficients = 0;
30 static const int kStreamingModeTokens = 1;
31 static const int kStreamingModeBits = 2;
33 namespace {
34 void ZigZagShuffle(int32_t* JXL_RESTRICT block) {
35 // TODO(szabadka) SIMDify this.
36 int32_t tmp[DCTSIZE2];
37 tmp[0] = block[0];
38 tmp[1] = block[1];
39 tmp[2] = block[8];
40 tmp[3] = block[16];
41 tmp[4] = block[9];
42 tmp[5] = block[2];
43 tmp[6] = block[3];
44 tmp[7] = block[10];
45 tmp[8] = block[17];
46 tmp[9] = block[24];
47 tmp[10] = block[32];
48 tmp[11] = block[25];
49 tmp[12] = block[18];
50 tmp[13] = block[11];
51 tmp[14] = block[4];
52 tmp[15] = block[5];
53 tmp[16] = block[12];
54 tmp[17] = block[19];
55 tmp[18] = block[26];
56 tmp[19] = block[33];
57 tmp[20] = block[40];
58 tmp[21] = block[48];
59 tmp[22] = block[41];
60 tmp[23] = block[34];
61 tmp[24] = block[27];
62 tmp[25] = block[20];
63 tmp[26] = block[13];
64 tmp[27] = block[6];
65 tmp[28] = block[7];
66 tmp[29] = block[14];
67 tmp[30] = block[21];
68 tmp[31] = block[28];
69 tmp[32] = block[35];
70 tmp[33] = block[42];
71 tmp[34] = block[49];
72 tmp[35] = block[56];
73 tmp[36] = block[57];
74 tmp[37] = block[50];
75 tmp[38] = block[43];
76 tmp[39] = block[36];
77 tmp[40] = block[29];
78 tmp[41] = block[22];
79 tmp[42] = block[15];
80 tmp[43] = block[23];
81 tmp[44] = block[30];
82 tmp[45] = block[37];
83 tmp[46] = block[44];
84 tmp[47] = block[51];
85 tmp[48] = block[58];
86 tmp[49] = block[59];
87 tmp[50] = block[52];
88 tmp[51] = block[45];
89 tmp[52] = block[38];
90 tmp[53] = block[31];
91 tmp[54] = block[39];
92 tmp[55] = block[46];
93 tmp[56] = block[53];
94 tmp[57] = block[60];
95 tmp[58] = block[61];
96 tmp[59] = block[54];
97 tmp[60] = block[47];
98 tmp[61] = block[55];
99 tmp[62] = block[62];
100 tmp[63] = block[63];
101 memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0]));
103 } // namespace
105 template <int kMode>
106 void ProcessiMCURow(j_compress_ptr cinfo) {
107 jpeg_comp_master* m = cinfo->master;
108 JpegBitWriter* bw = &m->bw;
109 int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
110 int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor);
111 int mcu_y = m->next_iMCU_row;
112 int32_t* block = m->block_tmp;
113 int32_t* symbols = m->block_tmp + DCTSIZE2;
114 int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2;
115 coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff;
116 bool adaptive_quant = m->use_adaptive_quantization && m->psnr_target == 0;
117 JBLOCKARRAY ba[kMaxComponents];
118 if (kMode == kStreamingModeCoefficients) {
119 for (int c = 0; c < cinfo->num_components; ++c) {
120 jpeg_component_info* comp = &cinfo->comp_info[c];
121 int by0 = mcu_y * comp->v_samp_factor;
122 int block_rows_left = comp->height_in_blocks - by0;
123 int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
124 ba[c] = (*cinfo->mem->access_virt_barray)(
125 reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0,
126 max_block_rows, true);
129 if (kMode == kStreamingModeTokens) {
130 TokenArray* ta = &m->token_arrays[m->cur_token_array];
131 int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
132 if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) {
133 if (ta->tokens) {
134 m->total_num_tokens += ta->num_tokens;
135 ++m->cur_token_array;
136 ta = &m->token_arrays[m->cur_token_array];
138 m->num_tokens =
139 EstimateNumTokens(cinfo, mcu_y, ysize_mcus, m->total_num_tokens,
140 max_tokens_per_mcu_row);
141 ta->tokens = Allocate<Token>(cinfo, m->num_tokens, JPOOL_IMAGE);
142 m->next_token = ta->tokens;
145 const float* imcu_start[kMaxComponents];
146 for (int c = 0; c < cinfo->num_components; ++c) {
147 jpeg_component_info* comp = &cinfo->comp_info[c];
148 imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE);
150 const float* qf = nullptr;
151 if (adaptive_quant) {
152 qf = m->quant_field.Row(0);
154 HuffmanCodeTable* dc_code = nullptr;
155 HuffmanCodeTable* ac_code = nullptr;
156 const size_t qf_stride = m->quant_field.stride();
157 for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
158 for (int c = 0; c < cinfo->num_components; ++c) {
159 jpeg_component_info* comp = &cinfo->comp_info[c];
160 if (kMode == kStreamingModeBits) {
161 dc_code = &m->coding_tables[m->context_map[c]];
162 ac_code = &m->coding_tables[m->context_map[c + 4]];
164 float* JXL_RESTRICT qmc = m->quant_mul[c];
165 const size_t stride = m->raw_data[c]->stride();
166 const int h_factor = m->h_factor[c];
167 const float* zero_bias_offset = m->zero_bias_offset[c];
168 const float* zero_bias_mul = m->zero_bias_mul[c];
169 float aq_strength = 0.0f;
170 for (int iy = 0; iy < comp->v_samp_factor; ++iy) {
171 for (int ix = 0; ix < comp->h_samp_factor; ++ix) {
172 size_t by = mcu_y * comp->v_samp_factor + iy;
173 size_t bx = mcu_x * comp->h_samp_factor + ix;
174 if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) {
175 if (kMode == kStreamingModeTokens) {
176 *m->next_token++ = Token(c, 0, 0);
177 *m->next_token++ = Token(c + 4, 0, 0);
178 } else if (kMode == kStreamingModeBits) {
179 WriteBits(bw, dc_code->depth[0], dc_code->code[0]);
180 WriteBits(bw, ac_code->depth[0], ac_code->code[0]);
182 continue;
184 if (adaptive_quant) {
185 aq_strength = qf[iy * qf_stride + bx * h_factor];
187 const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE;
188 ComputeCoefficientBlock(pixels, stride, qmc, last_dc_coeff[c],
189 aq_strength, zero_bias_offset, zero_bias_mul,
190 m->dct_buffer, block);
191 if (kMode == kStreamingModeCoefficients) {
192 JCOEF* cblock = &ba[c][iy][bx][0];
193 for (int k = 0; k < DCTSIZE2; ++k) {
194 cblock[k] = block[kJPEGNaturalOrder[k]];
197 block[0] -= last_dc_coeff[c];
198 last_dc_coeff[c] += block[0];
199 if (kMode == kStreamingModeTokens) {
200 ComputeTokensForBlock<int32_t, false>(block, 0, c, c + 4,
201 &m->next_token);
202 } else if (kMode == kStreamingModeBits) {
203 ZigZagShuffle(block);
204 const int num_nonzeros = CompactBlock(block, nonzero_idx);
205 const bool emit_eob = nonzero_idx[num_nonzeros - 1] < 1008;
206 ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols);
207 WriteBlock(symbols, block, num_nonzeros, emit_eob, dc_code, ac_code,
208 bw);
214 if (kMode == kStreamingModeTokens) {
215 TokenArray* ta = &m->token_arrays[m->cur_token_array];
216 ta->num_tokens = m->next_token - ta->tokens;
217 ScanTokenInfo* sti = &m->scan_token_info[0];
218 sti->num_tokens = m->total_num_tokens + ta->num_tokens;
219 sti->restarts[0] = sti->num_tokens;
223 void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) {
224 ProcessiMCURow<kStreamingModeCoefficients>(cinfo);
227 void ComputeTokensForiMCURow(j_compress_ptr cinfo) {
228 ProcessiMCURow<kStreamingModeTokens>(cinfo);
231 void WriteiMCURow(j_compress_ptr cinfo) {
232 ProcessiMCURow<kStreamingModeBits>(cinfo);
235 // NOLINTNEXTLINE(google-readability-namespace-comments)
236 } // namespace HWY_NAMESPACE
237 } // namespace jpegli
238 HWY_AFTER_NAMESPACE();
240 #if HWY_ONCE
241 namespace jpegli {
242 HWY_EXPORT(ComputeCoefficientsForiMCURow);
243 HWY_EXPORT(ComputeTokensForiMCURow);
244 HWY_EXPORT(WriteiMCURow);
246 void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) {
247 HWY_DYNAMIC_DISPATCH(ComputeCoefficientsForiMCURow)(cinfo);
250 void ComputeTokensForiMCURow(j_compress_ptr cinfo) {
251 HWY_DYNAMIC_DISPATCH(ComputeTokensForiMCURow)(cinfo);
254 void WriteiMCURow(j_compress_ptr cinfo) {
255 HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo);
258 } // namespace jpegli
259 #endif // HWY_ONCE