Backed out 3 changesets (bug 1790375) for causing wd failures on fetch_error.py....
[gecko.git] / third_party / jpeg-xl / lib / jpegli / upsample.cc
blob5559aa78a65e6b342a3a640c5097bfdfc255602f
1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
6 #include "lib/jpegli/upsample.h"
8 #include <string.h>
10 #undef HWY_TARGET_INCLUDE
11 #define HWY_TARGET_INCLUDE "lib/jpegli/upsample.cc"
12 #include <hwy/foreach_target.h>
13 #include <hwy/highway.h>
15 HWY_BEFORE_NAMESPACE();
16 namespace jpegli {
17 namespace HWY_NAMESPACE {
19 // These templates are not found via ADL.
20 using hwy::HWY_NAMESPACE::Mul;
21 using hwy::HWY_NAMESPACE::MulAdd;
22 using hwy::HWY_NAMESPACE::Vec;
24 #if HWY_CAP_GE512
25 using hwy::HWY_NAMESPACE::Half;
26 using hwy::HWY_NAMESPACE::Vec;
27 template <size_t i, class DF, class V>
28 HWY_INLINE Vec<Half<Half<DF>>> Quarter(const DF df, V v) {
29 using HF = Half<DF>;
30 using HHF = Half<HF>;
31 auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v);
32 return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half);
35 template <class DF, class V>
36 HWY_INLINE Vec<DF> Concat4(const DF df, V v0, V v1, V v2, V v3) {
37 using HF = Half<DF>;
38 return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0));
41 #endif
43 // Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be
44 // aligned.
45 template <class DF, class V, typename T>
46 void StoreInterleaved(const DF df, V v0, V v1, T* mem) {
47 static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types");
48 #if HWY_TARGET == HWY_SCALAR
49 Store(v0, df, mem);
50 Store(v1, df, mem + 1);
51 #elif !HWY_CAP_GE256
52 Store(InterleaveLower(df, v0, v1), df, mem);
53 Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df));
54 #else
55 if (!HWY_CAP_GE512 || Lanes(df) == 8) {
56 auto t0 = InterleaveLower(df, v0, v1);
57 auto t1 = InterleaveUpper(df, v0, v1);
58 Store(ConcatLowerLower(df, t1, t0), df, mem);
59 Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df));
60 } else {
61 #if HWY_CAP_GE512
62 auto t0 = InterleaveLower(df, v0, v1);
63 auto t1 = InterleaveUpper(df, v0, v1);
64 Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1),
65 Quarter<1>(df, t0), Quarter<1>(df, t1)),
66 df, mem);
67 Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1),
68 Quarter<3>(df, t0), Quarter<3>(df, t1)),
69 df, mem + Lanes(df));
70 #endif
72 #endif
75 void Upsample2Horizontal(float* JXL_RESTRICT row,
76 float* JXL_RESTRICT scratch_space, size_t len_out) {
77 HWY_FULL(float) df;
78 auto threefour = Set(df, 0.75f);
79 auto onefour = Set(df, 0.25f);
80 const size_t len_in = (len_out + 1) >> 1;
81 memcpy(scratch_space, row, len_in * sizeof(row[0]));
82 scratch_space[-1] = scratch_space[0];
83 scratch_space[len_in] = scratch_space[len_in - 1];
84 for (size_t x = 0; x < len_in; x += Lanes(df)) {
85 auto current = Mul(Load(df, scratch_space + x), threefour);
86 auto prev = LoadU(df, scratch_space + x - 1);
87 auto next = LoadU(df, scratch_space + x + 1);
88 auto left = MulAdd(onefour, prev, current);
89 auto right = MulAdd(onefour, next, current);
90 StoreInterleaved(df, left, right, row + x * 2);
94 void Upsample2Vertical(const float* JXL_RESTRICT row_top,
95 const float* JXL_RESTRICT row_mid,
96 const float* JXL_RESTRICT row_bot,
97 float* JXL_RESTRICT row_out0,
98 float* JXL_RESTRICT row_out1, size_t len) {
99 HWY_FULL(float) df;
100 auto threefour = Set(df, 0.75f);
101 auto onefour = Set(df, 0.25f);
102 for (size_t x = 0; x < len; x += Lanes(df)) {
103 auto it = Load(df, row_top + x);
104 auto im = Load(df, row_mid + x);
105 auto ib = Load(df, row_bot + x);
106 auto im_scaled = Mul(im, threefour);
107 Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x);
108 Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x);
112 // NOLINTNEXTLINE(google-readability-namespace-comments)
113 } // namespace HWY_NAMESPACE
114 } // namespace jpegli
115 HWY_AFTER_NAMESPACE();
117 #if HWY_ONCE
118 namespace jpegli {
120 HWY_EXPORT(Upsample2Horizontal);
121 HWY_EXPORT(Upsample2Vertical);
123 void Upsample2Horizontal(float* JXL_RESTRICT row,
124 float* JXL_RESTRICT scratch_space, size_t len_out) {
125 return HWY_DYNAMIC_DISPATCH(Upsample2Horizontal)(row, scratch_space, len_out);
128 void Upsample2Vertical(const float* JXL_RESTRICT row_top,
129 const float* JXL_RESTRICT row_mid,
130 const float* JXL_RESTRICT row_bot,
131 float* JXL_RESTRICT row_out0,
132 float* JXL_RESTRICT row_out1, size_t len) {
133 return HWY_DYNAMIC_DISPATCH(Upsample2Vertical)(row_top, row_mid, row_bot,
134 row_out0, row_out1, len);
136 } // namespace jpegli
137 #endif // HWY_ONCE