Merge autoland to mozilla-central. a=merge
[gecko.git] / third_party / jpeg-xl / lib / jpegli / encode_finish.cc
blob8cc3b8d27503be568f6c5802da802534187e43e6
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
6 #include "lib/jpegli/encode_finish.h"
8 #include <cmath>
9 #include <limits>
11 #include "lib/jpegli/error.h"
12 #include "lib/jpegli/memory_manager.h"
13 #include "lib/jpegli/quant.h"
15 #undef HWY_TARGET_INCLUDE
16 #define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc"
17 #include <hwy/foreach_target.h>
18 #include <hwy/highway.h>
20 #include "lib/jpegli/dct-inl.h"
22 HWY_BEFORE_NAMESPACE();
23 namespace jpegli {
24 namespace HWY_NAMESPACE {
26 // These templates are not found via ADL.
27 using hwy::HWY_NAMESPACE::GetLane;
29 using D = HWY_FULL(float);
30 using DI = HWY_FULL(int32_t);
31 using DI16 = Rebind<int16_t, HWY_FULL(int32_t)>;
33 void ReQuantizeBlock(int16_t* block, const float* qmc, float aq_strength,
34 const float* zero_bias_offset,
35 const float* zero_bias_mul) {
36 D d;
37 DI di;
38 DI16 di16;
39 const auto aq_mul = Set(d, aq_strength);
40 for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
41 const auto in = Load(di16, block + k);
42 const auto val = ConvertTo(d, PromoteTo(di, in));
43 const auto q = Load(d, qmc + k);
44 const auto qval = Mul(val, q);
45 const auto zb_offset = Load(d, zero_bias_offset + k);
46 const auto zb_mul = Load(d, zero_bias_mul + k);
47 const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
48 const auto nzero_mask = Ge(Abs(qval), threshold);
49 const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
50 Store(DemoteTo(di16, ConvertTo(di, iqval)), di16, block + k);
54 float BlockError(const int16_t* block, const float* qmc, const float* iqmc,
55 const float aq_strength, const float* zero_bias_offset,
56 const float* zero_bias_mul) {
57 D d;
58 DI di;
59 DI16 di16;
60 auto err = Zero(d);
61 const auto scale = Set(d, 1.0 / 16);
62 const auto aq_mul = Set(d, aq_strength);
63 for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
64 const auto in = Load(di16, block + k);
65 const auto val = ConvertTo(d, PromoteTo(di, in));
66 const auto q = Load(d, qmc + k);
67 const auto qval = Mul(val, q);
68 const auto zb_offset = Load(d, zero_bias_offset + k);
69 const auto zb_mul = Load(d, zero_bias_mul + k);
70 const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
71 const auto nzero_mask = Ge(Abs(qval), threshold);
72 const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
73 const auto invq = Load(d, iqmc + k);
74 const auto rval = Mul(iqval, invq);
75 const auto diff = Mul(Sub(val, rval), scale);
76 err = Add(err, Mul(diff, diff));
78 return GetLane(SumOfLanes(d, err));
81 void ComputeInverseWeights(const float* qmc, float* iqmc) {
82 for (int k = 0; k < 64; ++k) {
83 iqmc[k] = 1.0f / qmc[k];
87 float ComputePSNR(j_compress_ptr cinfo, int sampling) {
88 jpeg_comp_master* m = cinfo->master;
89 InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
90 double error = 0.0;
91 size_t num = 0;
92 for (int c = 0; c < cinfo->num_components; ++c) {
93 jpeg_component_info* comp = &cinfo->comp_info[c];
94 const float* qmc = m->quant_mul[c];
95 const int h_factor = m->h_factor[c];
96 const int v_factor = m->v_factor[c];
97 const float* zero_bias_offset = m->zero_bias_offset[c];
98 const float* zero_bias_mul = m->zero_bias_mul[c];
99 HWY_ALIGN float iqmc[64];
100 ComputeInverseWeights(qmc, iqmc);
101 for (JDIMENSION by = 0; by < comp->height_in_blocks; by += sampling) {
102 JBLOCKARRAY blocks = GetBlockRow(cinfo, c, by);
103 const float* qf = m->quant_field.Row(by * v_factor);
104 for (JDIMENSION bx = 0; bx < comp->width_in_blocks; bx += sampling) {
105 error += BlockError(&blocks[0][bx][0], qmc, iqmc, qf[bx * h_factor],
106 zero_bias_offset, zero_bias_mul);
107 num += DCTSIZE2;
111 return 4.3429448f * log(num / (error / 255. / 255.));
114 void ReQuantizeCoeffs(j_compress_ptr cinfo) {
115 jpeg_comp_master* m = cinfo->master;
116 InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
117 for (int c = 0; c < cinfo->num_components; ++c) {
118 jpeg_component_info* comp = &cinfo->comp_info[c];
119 const float* qmc = m->quant_mul[c];
120 const int h_factor = m->h_factor[c];
121 const int v_factor = m->v_factor[c];
122 const float* zero_bias_offset = m->zero_bias_offset[c];
123 const float* zero_bias_mul = m->zero_bias_mul[c];
124 for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
125 JBLOCKARRAY block = GetBlockRow(cinfo, c, by);
126 const float* qf = m->quant_field.Row(by * v_factor);
127 for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
128 ReQuantizeBlock(&block[0][bx][0], qmc, qf[bx * h_factor],
129 zero_bias_offset, zero_bias_mul);
135 // NOLINTNEXTLINE(google-readability-namespace-comments)
136 } // namespace HWY_NAMESPACE
137 } // namespace jpegli
138 HWY_AFTER_NAMESPACE();
140 #if HWY_ONCE
141 namespace jpegli {
142 namespace {
143 HWY_EXPORT(ComputePSNR);
144 HWY_EXPORT(ReQuantizeCoeffs);
146 void ReQuantizeCoeffs(j_compress_ptr cinfo) {
147 HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs)(cinfo);
150 float ComputePSNR(j_compress_ptr cinfo, int sampling) {
151 return HWY_DYNAMIC_DISPATCH(ComputePSNR)(cinfo, sampling);
154 void UpdateDistance(j_compress_ptr cinfo, float distance) {
155 float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
156 SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true);
159 float Clamp(float val, float minval, float maxval) {
160 return std::max(minval, std::min(maxval, val));
163 #define PSNR_SEARCH_DBG 0
165 float FindDistanceForPSNR(j_compress_ptr cinfo) {
166 constexpr int kMaxIters = 20;
167 const float psnr_target = cinfo->master->psnr_target;
168 const float tolerance = cinfo->master->psnr_tolerance;
169 const float min_dist = cinfo->master->min_distance;
170 const float max_dist = cinfo->master->max_distance;
171 float d = Clamp(1.0f, min_dist, max_dist);
172 for (int sampling : {4, 1}) {
173 float best_diff = std::numeric_limits<float>::max();
174 float best_distance = 0.0f;
175 float best_psnr = 0.0;
176 float dmin = min_dist;
177 float dmax = max_dist;
178 bool found_lower_bound = false;
179 bool found_upper_bound = false;
180 for (int i = 0; i < kMaxIters; ++i) {
181 UpdateDistance(cinfo, d);
182 float psnr = ComputePSNR(cinfo, sampling);
183 if (psnr > psnr_target) {
184 dmin = d;
185 found_lower_bound = true;
186 } else {
187 dmax = d;
188 found_upper_bound = true;
190 #if (PSNR_SEARCH_DBG > 1)
191 printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling, i, d, psnr);
192 if (found_upper_bound && found_lower_bound) {
193 printf(" d-interval: [ %7.4f .. %7.4f ]", dmin, dmax);
195 printf("\n");
196 #endif
197 float diff = std::abs(psnr - psnr_target);
198 if (diff < best_diff) {
199 best_diff = diff;
200 best_distance = d;
201 best_psnr = psnr;
203 if (diff < tolerance * psnr_target || dmin == dmax) {
204 break;
206 if (!found_lower_bound || !found_upper_bound) {
207 d *= std::exp(0.15f * (psnr - psnr_target));
208 } else {
209 d = 0.5f * (dmin + dmax);
211 d = Clamp(d, min_dist, max_dist);
213 d = best_distance;
214 if (sampling == 1 && PSNR_SEARCH_DBG) {
215 printf("Final PSNR %.2f at distance %.4f\n", best_psnr, d);
216 } else {
217 (void)best_psnr;
220 return d;
223 } // namespace
225 void QuantizetoPSNR(j_compress_ptr cinfo) {
226 float distance = FindDistanceForPSNR(cinfo);
227 UpdateDistance(cinfo, distance);
228 ReQuantizeCoeffs(cinfo);
231 } // namespace jpegli
232 #endif // HWY_ONCE