Bug 1893155 - Part 6: Correct constant for minimum epoch day. r=spidermonkey-reviewer...
[gecko.git] / third_party / dav1d / src / loopfilter_tmpl.c
blob7cc89643e41eadfcf013eb110ce11db069e82eb7
1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "config.h"
30 #include <stdlib.h>
32 #include "common/attributes.h"
33 #include "common/intops.h"
35 #include "src/loopfilter.h"
37 static NOINLINE void
38 loop_filter(pixel *dst, int E, int I, int H,
39 const ptrdiff_t stridea, const ptrdiff_t strideb, const int wd
40 HIGHBD_DECL_SUFFIX)
42 const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
43 const int F = 1 << bitdepth_min_8;
44 E <<= bitdepth_min_8;
45 I <<= bitdepth_min_8;
46 H <<= bitdepth_min_8;
48 for (int i = 0; i < 4; i++, dst += stridea) {
49 int p6, p5, p4, p3, p2;
50 int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
51 int q0 = dst[strideb * +0], q1 = dst[strideb * +1];
52 int q2, q3, q4, q5, q6;
53 int fm, flat8out, flat8in;
55 fm = abs(p1 - p0) <= I && abs(q1 - q0) <= I &&
56 abs(p0 - q0) * 2 + (abs(p1 - q1) >> 1) <= E;
58 if (wd > 4) {
59 p2 = dst[strideb * -3];
60 q2 = dst[strideb * +2];
62 fm &= abs(p2 - p1) <= I && abs(q2 - q1) <= I;
64 if (wd > 6) {
65 p3 = dst[strideb * -4];
66 q3 = dst[strideb * +3];
68 fm &= abs(p3 - p2) <= I && abs(q3 - q2) <= I;
71 if (!fm) continue;
73 if (wd >= 16) {
74 p6 = dst[strideb * -7];
75 p5 = dst[strideb * -6];
76 p4 = dst[strideb * -5];
77 q4 = dst[strideb * +4];
78 q5 = dst[strideb * +5];
79 q6 = dst[strideb * +6];
81 flat8out = abs(p6 - p0) <= F && abs(p5 - p0) <= F &&
82 abs(p4 - p0) <= F && abs(q4 - q0) <= F &&
83 abs(q5 - q0) <= F && abs(q6 - q0) <= F;
86 if (wd >= 6)
87 flat8in = abs(p2 - p0) <= F && abs(p1 - p0) <= F &&
88 abs(q1 - q0) <= F && abs(q2 - q0) <= F;
90 if (wd >= 8)
91 flat8in &= abs(p3 - p0) <= F && abs(q3 - q0) <= F;
93 if (wd >= 16 && (flat8out & flat8in)) {
94 dst[strideb * -6] = (p6 + p6 + p6 + p6 + p6 + p6 * 2 + p5 * 2 +
95 p4 * 2 + p3 + p2 + p1 + p0 + q0 + 8) >> 4;
96 dst[strideb * -5] = (p6 + p6 + p6 + p6 + p6 + p5 * 2 + p4 * 2 +
97 p3 * 2 + p2 + p1 + p0 + q0 + q1 + 8) >> 4;
98 dst[strideb * -4] = (p6 + p6 + p6 + p6 + p5 + p4 * 2 + p3 * 2 +
99 p2 * 2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4;
100 dst[strideb * -3] = (p6 + p6 + p6 + p5 + p4 + p3 * 2 + p2 * 2 +
101 p1 * 2 + p0 + q0 + q1 + q2 + q3 + 8) >> 4;
102 dst[strideb * -2] = (p6 + p6 + p5 + p4 + p3 + p2 * 2 + p1 * 2 +
103 p0 * 2 + q0 + q1 + q2 + q3 + q4 + 8) >> 4;
104 dst[strideb * -1] = (p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
105 q0 * 2 + q1 + q2 + q3 + q4 + q5 + 8) >> 4;
106 dst[strideb * +0] = (p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
107 q1 * 2 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
108 dst[strideb * +1] = (p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
109 q2 * 2 + q3 + q4 + q5 + q6 + q6 + 8) >> 4;
110 dst[strideb * +2] = (p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 +
111 q3 * 2 + q4 + q5 + q6 + q6 + q6 + 8) >> 4;
112 dst[strideb * +3] = (p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 +
113 q4 * 2 + q5 + q6 + q6 + q6 + q6 + 8) >> 4;
114 dst[strideb * +4] = (p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 +
115 q5 * 2 + q6 + q6 + q6 + q6 + q6 + 8) >> 4;
116 dst[strideb * +5] = (p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 +
117 q6 * 2 + q6 + q6 + q6 + q6 + q6 + 8) >> 4;
118 } else if (wd >= 8 && flat8in) {
119 dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3;
120 dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3;
121 dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3;
122 dst[strideb * +0] = (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3;
123 dst[strideb * +1] = (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4) >> 3;
124 dst[strideb * +2] = (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
125 } else if (wd == 6 && flat8in) {
126 dst[strideb * -2] = (p2 + 2 * p2 + 2 * p1 + 2 * p0 + q0 + 4) >> 3;
127 dst[strideb * -1] = (p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3;
128 dst[strideb * +0] = (p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3;
129 dst[strideb * +1] = (p0 + 2 * q0 + 2 * q1 + 2 * q2 + q2 + 4) >> 3;
130 } else {
131 const int hev = abs(p1 - p0) > H || abs(q1 - q0) > H;
133 #define iclip_diff(v) iclip(v, -128 * (1 << bitdepth_min_8), \
134 128 * (1 << bitdepth_min_8) - 1)
136 if (hev) {
137 int f = iclip_diff(p1 - q1), f1, f2;
138 f = iclip_diff(3 * (q0 - p0) + f);
140 f1 = imin(f + 4, (128 << bitdepth_min_8) - 1) >> 3;
141 f2 = imin(f + 3, (128 << bitdepth_min_8) - 1) >> 3;
143 dst[strideb * -1] = iclip_pixel(p0 + f2);
144 dst[strideb * +0] = iclip_pixel(q0 - f1);
145 } else {
146 int f = iclip_diff(3 * (q0 - p0)), f1, f2;
148 f1 = imin(f + 4, (128 << bitdepth_min_8) - 1) >> 3;
149 f2 = imin(f + 3, (128 << bitdepth_min_8) - 1) >> 3;
151 dst[strideb * -1] = iclip_pixel(p0 + f2);
152 dst[strideb * +0] = iclip_pixel(q0 - f1);
154 f = (f1 + 1) >> 1;
155 dst[strideb * -2] = iclip_pixel(p1 + f);
156 dst[strideb * +1] = iclip_pixel(q1 - f);
158 #undef iclip_diff
163 static void loop_filter_h_sb128y_c(pixel *dst, const ptrdiff_t stride,
164 const uint32_t *const vmask,
165 const uint8_t (*l)[4], ptrdiff_t b4_stride,
166 const Av1FilterLUT *lut, const int h
167 HIGHBD_DECL_SUFFIX)
169 const unsigned vm = vmask[0] | vmask[1] | vmask[2];
170 for (unsigned y = 1; vm & ~(y - 1);
171 y <<= 1, dst += 4 * PXSTRIDE(stride), l += b4_stride)
173 if (vm & y) {
174 const int L = l[0][0] ? l[0][0] : l[-1][0];
175 if (!L) continue;
176 const int H = L >> 4;
177 const int E = lut->e[L], I = lut->i[L];
178 const int idx = (vmask[2] & y) ? 2 : !!(vmask[1] & y);
179 loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 << idx
180 HIGHBD_TAIL_SUFFIX);
185 static void loop_filter_v_sb128y_c(pixel *dst, const ptrdiff_t stride,
186 const uint32_t *const vmask,
187 const uint8_t (*l)[4], ptrdiff_t b4_stride,
188 const Av1FilterLUT *lut, const int w
189 HIGHBD_DECL_SUFFIX)
191 const unsigned vm = vmask[0] | vmask[1] | vmask[2];
192 for (unsigned x = 1; vm & ~(x - 1); x <<= 1, dst += 4, l++) {
193 if (vm & x) {
194 const int L = l[0][0] ? l[0][0] : l[-b4_stride][0];
195 if (!L) continue;
196 const int H = L >> 4;
197 const int E = lut->e[L], I = lut->i[L];
198 const int idx = (vmask[2] & x) ? 2 : !!(vmask[1] & x);
199 loop_filter(dst, E, I, H, 1, PXSTRIDE(stride), 4 << idx
200 HIGHBD_TAIL_SUFFIX);
205 static void loop_filter_h_sb128uv_c(pixel *dst, const ptrdiff_t stride,
206 const uint32_t *const vmask,
207 const uint8_t (*l)[4], ptrdiff_t b4_stride,
208 const Av1FilterLUT *lut, const int h
209 HIGHBD_DECL_SUFFIX)
211 const unsigned vm = vmask[0] | vmask[1];
212 for (unsigned y = 1; vm & ~(y - 1);
213 y <<= 1, dst += 4 * PXSTRIDE(stride), l += b4_stride)
215 if (vm & y) {
216 const int L = l[0][0] ? l[0][0] : l[-1][0];
217 if (!L) continue;
218 const int H = L >> 4;
219 const int E = lut->e[L], I = lut->i[L];
220 const int idx = !!(vmask[1] & y);
221 loop_filter(dst, E, I, H, PXSTRIDE(stride), 1, 4 + 2 * idx
222 HIGHBD_TAIL_SUFFIX);
227 static void loop_filter_v_sb128uv_c(pixel *dst, const ptrdiff_t stride,
228 const uint32_t *const vmask,
229 const uint8_t (*l)[4], ptrdiff_t b4_stride,
230 const Av1FilterLUT *lut, const int w
231 HIGHBD_DECL_SUFFIX)
233 const unsigned vm = vmask[0] | vmask[1];
234 for (unsigned x = 1; vm & ~(x - 1); x <<= 1, dst += 4, l++) {
235 if (vm & x) {
236 const int L = l[0][0] ? l[0][0] : l[-b4_stride][0];
237 if (!L) continue;
238 const int H = L >> 4;
239 const int E = lut->e[L], I = lut->i[L];
240 const int idx = !!(vmask[1] & x);
241 loop_filter(dst, E, I, H, 1, PXSTRIDE(stride), 4 + 2 * idx
242 HIGHBD_TAIL_SUFFIX);
247 #if HAVE_ASM
248 #if ARCH_AARCH64 || ARCH_ARM
249 #include "src/arm/loopfilter.h"
250 #elif ARCH_LOONGARCH64
251 #include "src/loongarch/loopfilter.h"
252 #elif ARCH_X86
253 #include "src/x86/loopfilter.h"
254 #endif
255 #endif
257 COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
258 c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
259 c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
260 c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
261 c->loop_filter_sb[1][1] = loop_filter_v_sb128uv_c;
263 #if HAVE_ASM
264 #if ARCH_AARCH64 || ARCH_ARM
265 loop_filter_dsp_init_arm(c);
266 #elif ARCH_LOONGARCH64
267 loop_filter_dsp_init_loongarch(c);
268 #elif ARCH_X86
269 loop_filter_dsp_init_x86(c);
270 #endif
271 #endif