1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
6 #include "lib/jpegli/encode_finish.h"
11 #include "lib/jpegli/error.h"
12 #include "lib/jpegli/memory_manager.h"
13 #include "lib/jpegli/quant.h"
15 #undef HWY_TARGET_INCLUDE
16 #define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc"
17 #include <hwy/foreach_target.h>
18 #include <hwy/highway.h>
20 #include "lib/jpegli/dct-inl.h"
22 HWY_BEFORE_NAMESPACE();
24 namespace HWY_NAMESPACE
{
26 // These templates are not found via ADL.
27 using hwy::HWY_NAMESPACE::GetLane
;
29 using D
= HWY_FULL(float);
30 using DI
= HWY_FULL(int32_t);
31 using DI16
= Rebind
<int16_t, HWY_FULL(int32_t)>;
33 void ReQuantizeBlock(int16_t* block
, const float* qmc
, float aq_strength
,
34 const float* zero_bias_offset
,
35 const float* zero_bias_mul
) {
39 const auto aq_mul
= Set(d
, aq_strength
);
40 for (size_t k
= 0; k
< DCTSIZE2
; k
+= Lanes(d
)) {
41 const auto in
= Load(di16
, block
+ k
);
42 const auto val
= ConvertTo(d
, PromoteTo(di
, in
));
43 const auto q
= Load(d
, qmc
+ k
);
44 const auto qval
= Mul(val
, q
);
45 const auto zb_offset
= Load(d
, zero_bias_offset
+ k
);
46 const auto zb_mul
= Load(d
, zero_bias_mul
+ k
);
47 const auto threshold
= Add(zb_offset
, Mul(zb_mul
, aq_mul
));
48 const auto nzero_mask
= Ge(Abs(qval
), threshold
);
49 const auto iqval
= IfThenElseZero(nzero_mask
, Round(qval
));
50 Store(DemoteTo(di16
, ConvertTo(di
, iqval
)), di16
, block
+ k
);
54 float BlockError(const int16_t* block
, const float* qmc
, const float* iqmc
,
55 const float aq_strength
, const float* zero_bias_offset
,
56 const float* zero_bias_mul
) {
61 const auto scale
= Set(d
, 1.0 / 16);
62 const auto aq_mul
= Set(d
, aq_strength
);
63 for (size_t k
= 0; k
< DCTSIZE2
; k
+= Lanes(d
)) {
64 const auto in
= Load(di16
, block
+ k
);
65 const auto val
= ConvertTo(d
, PromoteTo(di
, in
));
66 const auto q
= Load(d
, qmc
+ k
);
67 const auto qval
= Mul(val
, q
);
68 const auto zb_offset
= Load(d
, zero_bias_offset
+ k
);
69 const auto zb_mul
= Load(d
, zero_bias_mul
+ k
);
70 const auto threshold
= Add(zb_offset
, Mul(zb_mul
, aq_mul
));
71 const auto nzero_mask
= Ge(Abs(qval
), threshold
);
72 const auto iqval
= IfThenElseZero(nzero_mask
, Round(qval
));
73 const auto invq
= Load(d
, iqmc
+ k
);
74 const auto rval
= Mul(iqval
, invq
);
75 const auto diff
= Mul(Sub(val
, rval
), scale
);
76 err
= Add(err
, Mul(diff
, diff
));
78 return GetLane(SumOfLanes(d
, err
));
81 void ComputeInverseWeights(const float* qmc
, float* iqmc
) {
82 for (int k
= 0; k
< 64; ++k
) {
83 iqmc
[k
] = 1.0f
/ qmc
[k
];
87 float ComputePSNR(j_compress_ptr cinfo
, int sampling
) {
88 jpeg_comp_master
* m
= cinfo
->master
;
89 InitQuantizer(cinfo
, QuantPass::SEARCH_SECOND_PASS
);
92 for (int c
= 0; c
< cinfo
->num_components
; ++c
) {
93 jpeg_component_info
* comp
= &cinfo
->comp_info
[c
];
94 const float* qmc
= m
->quant_mul
[c
];
95 const int h_factor
= m
->h_factor
[c
];
96 const int v_factor
= m
->v_factor
[c
];
97 const float* zero_bias_offset
= m
->zero_bias_offset
[c
];
98 const float* zero_bias_mul
= m
->zero_bias_mul
[c
];
99 HWY_ALIGN
float iqmc
[64];
100 ComputeInverseWeights(qmc
, iqmc
);
101 for (JDIMENSION by
= 0; by
< comp
->height_in_blocks
; by
+= sampling
) {
102 JBLOCKARRAY blocks
= GetBlockRow(cinfo
, c
, by
);
103 const float* qf
= m
->quant_field
.Row(by
* v_factor
);
104 for (JDIMENSION bx
= 0; bx
< comp
->width_in_blocks
; bx
+= sampling
) {
105 error
+= BlockError(&blocks
[0][bx
][0], qmc
, iqmc
, qf
[bx
* h_factor
],
106 zero_bias_offset
, zero_bias_mul
);
111 return 4.3429448f
* log(num
/ (error
/ 255. / 255.));
114 void ReQuantizeCoeffs(j_compress_ptr cinfo
) {
115 jpeg_comp_master
* m
= cinfo
->master
;
116 InitQuantizer(cinfo
, QuantPass::SEARCH_SECOND_PASS
);
117 for (int c
= 0; c
< cinfo
->num_components
; ++c
) {
118 jpeg_component_info
* comp
= &cinfo
->comp_info
[c
];
119 const float* qmc
= m
->quant_mul
[c
];
120 const int h_factor
= m
->h_factor
[c
];
121 const int v_factor
= m
->v_factor
[c
];
122 const float* zero_bias_offset
= m
->zero_bias_offset
[c
];
123 const float* zero_bias_mul
= m
->zero_bias_mul
[c
];
124 for (JDIMENSION by
= 0; by
< comp
->height_in_blocks
; ++by
) {
125 JBLOCKARRAY block
= GetBlockRow(cinfo
, c
, by
);
126 const float* qf
= m
->quant_field
.Row(by
* v_factor
);
127 for (JDIMENSION bx
= 0; bx
< comp
->width_in_blocks
; ++bx
) {
128 ReQuantizeBlock(&block
[0][bx
][0], qmc
, qf
[bx
* h_factor
],
129 zero_bias_offset
, zero_bias_mul
);
135 // NOLINTNEXTLINE(google-readability-namespace-comments)
136 } // namespace HWY_NAMESPACE
137 } // namespace jpegli
138 HWY_AFTER_NAMESPACE();
143 HWY_EXPORT(ComputePSNR
);
144 HWY_EXPORT(ReQuantizeCoeffs
);
146 void ReQuantizeCoeffs(j_compress_ptr cinfo
) {
147 HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs
)(cinfo
);
150 float ComputePSNR(j_compress_ptr cinfo
, int sampling
) {
151 return HWY_DYNAMIC_DISPATCH(ComputePSNR
)(cinfo
, sampling
);
154 void UpdateDistance(j_compress_ptr cinfo
, float distance
) {
155 float distances
[NUM_QUANT_TBLS
] = {distance
, distance
, distance
};
156 SetQuantMatrices(cinfo
, distances
, /*add_two_chroma_tables=*/true);
159 float Clamp(float val
, float minval
, float maxval
) {
160 return std::max(minval
, std::min(maxval
, val
));
163 #define PSNR_SEARCH_DBG 0
165 float FindDistanceForPSNR(j_compress_ptr cinfo
) {
166 constexpr int kMaxIters
= 20;
167 const float psnr_target
= cinfo
->master
->psnr_target
;
168 const float tolerance
= cinfo
->master
->psnr_tolerance
;
169 const float min_dist
= cinfo
->master
->min_distance
;
170 const float max_dist
= cinfo
->master
->max_distance
;
171 float d
= Clamp(1.0f
, min_dist
, max_dist
);
172 for (int sampling
: {4, 1}) {
173 float best_diff
= std::numeric_limits
<float>::max();
174 float best_distance
= 0.0f
;
175 float best_psnr
= 0.0;
176 float dmin
= min_dist
;
177 float dmax
= max_dist
;
178 bool found_lower_bound
= false;
179 bool found_upper_bound
= false;
180 for (int i
= 0; i
< kMaxIters
; ++i
) {
181 UpdateDistance(cinfo
, d
);
182 float psnr
= ComputePSNR(cinfo
, sampling
);
183 if (psnr
> psnr_target
) {
185 found_lower_bound
= true;
188 found_upper_bound
= true;
190 #if (PSNR_SEARCH_DBG > 1)
191 printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling
, i
, d
, psnr
);
192 if (found_upper_bound
&& found_lower_bound
) {
193 printf(" d-interval: [ %7.4f .. %7.4f ]", dmin
, dmax
);
197 float diff
= std::abs(psnr
- psnr_target
);
198 if (diff
< best_diff
) {
203 if (diff
< tolerance
* psnr_target
|| dmin
== dmax
) {
206 if (!found_lower_bound
|| !found_upper_bound
) {
207 d
*= std::exp(0.15f
* (psnr
- psnr_target
));
209 d
= 0.5f
* (dmin
+ dmax
);
211 d
= Clamp(d
, min_dist
, max_dist
);
214 if (sampling
== 1 && PSNR_SEARCH_DBG
) {
215 printf("Final PSNR %.2f at distance %.4f\n", best_psnr
, d
);
225 void QuantizetoPSNR(j_compress_ptr cinfo
) {
226 float distance
= FindDistanceForPSNR(cinfo
);
227 UpdateDistance(cinfo
, distance
);
228 ReQuantizeCoeffs(cinfo
);
231 } // namespace jpegli