third_party/jpeg-xl/lib/jpegli/encode_finish.cc

   1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
   2 //
   3 // Use of this source code is governed by a BSD-style
   4 // license that can be found in the LICENSE file.
   5
   6 #include "lib/jpegli/encode_finish.h"
   7
   8 #include <cmath>
   9 #include <limits>
  10
  11 #include "lib/jpegli/error.h"
  12 #include "lib/jpegli/memory_manager.h"
  13 #include "lib/jpegli/quant.h"
  14
  15 #undef HWY_TARGET_INCLUDE
  16 #define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc"
  17 #include <hwy/foreach_target.h>
  18 #include <hwy/highway.h>
  19
  20 #include "lib/jpegli/dct-inl.h"
  21
  22 HWY_BEFORE_NAMESPACE();
  23 namespace jpegli {
  24 namespace HWY_NAMESPACE {
  25
  26 // These templates are not found via ADL.
  27 using hwy::HWY_NAMESPACE::GetLane;
  28
  29 using D = HWY_FULL(float);
  30 using DI = HWY_FULL(int32_t);
  31 using DI16 = Rebind<int16_t, HWY_FULL(int32_t)>;
  32
  33 void ReQuantizeBlock(int16_t* block, const float* qmc, float aq_strength,
  34                      const float* zero_bias_offset,
  35                      const float* zero_bias_mul) {
  36   D d;
  37   DI di;
  38   DI16 di16;
  39   const auto aq_mul = Set(d, aq_strength);
  40   for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
  41     const auto in = Load(di16, block + k);
  42     const auto val = ConvertTo(d, PromoteTo(di, in));
  43     const auto q = Load(d, qmc + k);
  44     const auto qval = Mul(val, q);
  45     const auto zb_offset = Load(d, zero_bias_offset + k);
  46     const auto zb_mul = Load(d, zero_bias_mul + k);
  47     const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
  48     const auto nzero_mask = Ge(Abs(qval), threshold);
  49     const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
  50     Store(DemoteTo(di16, ConvertTo(di, iqval)), di16, block + k);
  51   }
  52 }
  53
  54 float BlockError(const int16_t* block, const float* qmc, const float* iqmc,
  55                  const float aq_strength, const float* zero_bias_offset,
  56                  const float* zero_bias_mul) {
  57   D d;
  58   DI di;
  59   DI16 di16;
  60   auto err = Zero(d);
  61   const auto scale = Set(d, 1.0 / 16);
  62   const auto aq_mul = Set(d, aq_strength);
  63   for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
  64     const auto in = Load(di16, block + k);
  65     const auto val = ConvertTo(d, PromoteTo(di, in));
  66     const auto q = Load(d, qmc + k);
  67     const auto qval = Mul(val, q);
  68     const auto zb_offset = Load(d, zero_bias_offset + k);
  69     const auto zb_mul = Load(d, zero_bias_mul + k);
  70     const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
  71     const auto nzero_mask = Ge(Abs(qval), threshold);
  72     const auto iqval = IfThenElseZero(nzero_mask, Round(qval));
  73     const auto invq = Load(d, iqmc + k);
  74     const auto rval = Mul(iqval, invq);
  75     const auto diff = Mul(Sub(val, rval), scale);
  76     err = Add(err, Mul(diff, diff));
  77   }
  78   return GetLane(SumOfLanes(d, err));
  79 }
  80
  81 void ComputeInverseWeights(const float* qmc, float* iqmc) {
  82   for (int k = 0; k < 64; ++k) {
  83     iqmc[k] = 1.0f / qmc[k];
  84   }
  85 }
  86
  87 float ComputePSNR(j_compress_ptr cinfo, int sampling) {
  88   jpeg_comp_master* m = cinfo->master;
  89   InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
  90   double error = 0.0;
  91   size_t num = 0;
  92   for (int c = 0; c < cinfo->num_components; ++c) {
  93     jpeg_component_info* comp = &cinfo->comp_info[c];
  94     const float* qmc = m->quant_mul[c];
  95     const int h_factor = m->h_factor[c];
  96     const int v_factor = m->v_factor[c];
  97     const float* zero_bias_offset = m->zero_bias_offset[c];
  98     const float* zero_bias_mul = m->zero_bias_mul[c];
  99     HWY_ALIGN float iqmc[64];
 100     ComputeInverseWeights(qmc, iqmc);
 101     for (JDIMENSION by = 0; by < comp->height_in_blocks; by += sampling) {
 102       JBLOCKARRAY blocks = GetBlockRow(cinfo, c, by);
 103       const float* qf = m->quant_field.Row(by * v_factor);
 104       for (JDIMENSION bx = 0; bx < comp->width_in_blocks; bx += sampling) {
 105         error += BlockError(&blocks[0][bx][0], qmc, iqmc, qf[bx * h_factor],
 106                             zero_bias_offset, zero_bias_mul);
 107         num += DCTSIZE2;
 108       }
 109     }
 110   }
 111   return 4.3429448f * log(num / (error / 255. / 255.));
 112 }
 113
 114 void ReQuantizeCoeffs(j_compress_ptr cinfo) {
 115   jpeg_comp_master* m = cinfo->master;
 116   InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS);
 117   for (int c = 0; c < cinfo->num_components; ++c) {
 118     jpeg_component_info* comp = &cinfo->comp_info[c];
 119     const float* qmc = m->quant_mul[c];
 120     const int h_factor = m->h_factor[c];
 121     const int v_factor = m->v_factor[c];
 122     const float* zero_bias_offset = m->zero_bias_offset[c];
 123     const float* zero_bias_mul = m->zero_bias_mul[c];
 124     for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) {
 125       JBLOCKARRAY block = GetBlockRow(cinfo, c, by);
 126       const float* qf = m->quant_field.Row(by * v_factor);
 127       for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) {
 128         ReQuantizeBlock(&block[0][bx][0], qmc, qf[bx * h_factor],
 129                         zero_bias_offset, zero_bias_mul);
 130       }
 131     }
 132   }
 133 }
 134
 135 // NOLINTNEXTLINE(google-readability-namespace-comments)
 136 }  // namespace HWY_NAMESPACE
 137 }  // namespace jpegli
 138 HWY_AFTER_NAMESPACE();
 139
 140 #if HWY_ONCE
 141 namespace jpegli {
 142 namespace {
 143 HWY_EXPORT(ComputePSNR);
 144 HWY_EXPORT(ReQuantizeCoeffs);
 145
 146 void ReQuantizeCoeffs(j_compress_ptr cinfo) {
 147   HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs)(cinfo);
 148 }
 149
 150 float ComputePSNR(j_compress_ptr cinfo, int sampling) {
 151   return HWY_DYNAMIC_DISPATCH(ComputePSNR)(cinfo, sampling);
 152 }
 153
 154 void UpdateDistance(j_compress_ptr cinfo, float distance) {
 155   float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
 156   SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true);
 157 }
 158
 159 float Clamp(float val, float minval, float maxval) {
 160   return std::max(minval, std::min(maxval, val));
 161 }
 162
 163 #define PSNR_SEARCH_DBG 0
 164
 165 float FindDistanceForPSNR(j_compress_ptr cinfo) {
 166   constexpr int kMaxIters = 20;
 167   const float psnr_target = cinfo->master->psnr_target;
 168   const float tolerance = cinfo->master->psnr_tolerance;
 169   const float min_dist = cinfo->master->min_distance;
 170   const float max_dist = cinfo->master->max_distance;
 171   float d = Clamp(1.0f, min_dist, max_dist);
 172   for (int sampling : {4, 1}) {
 173     float best_diff = std::numeric_limits<float>::max();
 174     float best_distance = 0.0f;
 175     float best_psnr = 0.0;
 176     float dmin = min_dist;
 177     float dmax = max_dist;
 178     bool found_lower_bound = false;
 179     bool found_upper_bound = false;
 180     for (int i = 0; i < kMaxIters; ++i) {
 181       UpdateDistance(cinfo, d);
 182       float psnr = ComputePSNR(cinfo, sampling);
 183       if (psnr > psnr_target) {
 184         dmin = d;
 185         found_lower_bound = true;
 186       } else {
 187         dmax = d;
 188         found_upper_bound = true;
 189       }
 190 #if (PSNR_SEARCH_DBG > 1)
 191       printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling, i, d, psnr);
 192       if (found_upper_bound && found_lower_bound) {
 193         printf("    d-interval: [ %7.4f .. %7.4f ]", dmin, dmax);
 194       }
 195       printf("\n");
 196 #endif
 197       float diff = std::abs(psnr - psnr_target);
 198       if (diff < best_diff) {
 199         best_diff = diff;
 200         best_distance = d;
 201         best_psnr = psnr;
 202       }
 203       if (diff < tolerance * psnr_target || dmin == dmax) {
 204         break;
 205       }
 206       if (!found_lower_bound || !found_upper_bound) {
 207         d *= std::exp(0.15f * (psnr - psnr_target));
 208       } else {
 209         d = 0.5f * (dmin + dmax);
 210       }
 211       d = Clamp(d, min_dist, max_dist);
 212     }
 213     d = best_distance;
 214     if (sampling == 1 && PSNR_SEARCH_DBG) {
 215       printf("Final PSNR %.2f at distance %.4f\n", best_psnr, d);
 216     } else {
 217       (void)best_psnr;
 218     }
 219   }
 220   return d;
 221 }
 222
 223 }  // namespace
 224
 225 void QuantizetoPSNR(j_compress_ptr cinfo) {
 226   float distance = FindDistanceForPSNR(cinfo);
 227   UpdateDistance(cinfo, distance);
 228   ReQuantizeCoeffs(cinfo);
 229 }
 230
 231 }  // namespace jpegli
 232 #endif  // HWY_ONCE