2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_ports/mem.h"
20 static INLINE
int8_t signed_char_clamp(int t
) {
21 return (int8_t)clamp(t
, -128, 127);
24 #if CONFIG_AV1_HIGHBITDEPTH
25 static INLINE
int16_t signed_char_clamp_high(int t
, int bd
) {
27 case 10: return (int16_t)clamp(t
, -128 * 4, 128 * 4 - 1);
28 case 12: return (int16_t)clamp(t
, -128 * 16, 128 * 16 - 1);
30 default: return (int16_t)clamp(t
, -128, 128 - 1);
35 // should we apply any filter at all: 11111111 yes, 00000000 no
36 static INLINE
int8_t filter_mask2(uint8_t limit
, uint8_t blimit
, uint8_t p1
,
37 uint8_t p0
, uint8_t q0
, uint8_t q1
) {
39 mask
|= (abs(p1
- p0
) > limit
) * -1;
40 mask
|= (abs(q1
- q0
) > limit
) * -1;
41 mask
|= (abs(p0
- q0
) * 2 + abs(p1
- q1
) / 2 > blimit
) * -1;
45 static INLINE
int8_t filter_mask(uint8_t limit
, uint8_t blimit
, uint8_t p3
,
46 uint8_t p2
, uint8_t p1
, uint8_t p0
, uint8_t q0
,
47 uint8_t q1
, uint8_t q2
, uint8_t q3
) {
49 mask
|= (abs(p3
- p2
) > limit
) * -1;
50 mask
|= (abs(p2
- p1
) > limit
) * -1;
51 mask
|= (abs(p1
- p0
) > limit
) * -1;
52 mask
|= (abs(q1
- q0
) > limit
) * -1;
53 mask
|= (abs(q2
- q1
) > limit
) * -1;
54 mask
|= (abs(q3
- q2
) > limit
) * -1;
55 mask
|= (abs(p0
- q0
) * 2 + abs(p1
- q1
) / 2 > blimit
) * -1;
59 static INLINE
int8_t filter_mask3_chroma(uint8_t limit
, uint8_t blimit
,
60 uint8_t p2
, uint8_t p1
, uint8_t p0
,
61 uint8_t q0
, uint8_t q1
, uint8_t q2
) {
63 mask
|= (abs(p2
- p1
) > limit
) * -1;
64 mask
|= (abs(p1
- p0
) > limit
) * -1;
65 mask
|= (abs(q1
- q0
) > limit
) * -1;
66 mask
|= (abs(q2
- q1
) > limit
) * -1;
67 mask
|= (abs(p0
- q0
) * 2 + abs(p1
- q1
) / 2 > blimit
) * -1;
71 static INLINE
int8_t flat_mask3_chroma(uint8_t thresh
, uint8_t p2
, uint8_t p1
,
72 uint8_t p0
, uint8_t q0
, uint8_t q1
,
75 mask
|= (abs(p1
- p0
) > thresh
) * -1;
76 mask
|= (abs(q1
- q0
) > thresh
) * -1;
77 mask
|= (abs(p2
- p0
) > thresh
) * -1;
78 mask
|= (abs(q2
- q0
) > thresh
) * -1;
82 static INLINE
int8_t flat_mask4(uint8_t thresh
, uint8_t p3
, uint8_t p2
,
83 uint8_t p1
, uint8_t p0
, uint8_t q0
, uint8_t q1
,
84 uint8_t q2
, uint8_t q3
) {
86 mask
|= (abs(p1
- p0
) > thresh
) * -1;
87 mask
|= (abs(q1
- q0
) > thresh
) * -1;
88 mask
|= (abs(p2
- p0
) > thresh
) * -1;
89 mask
|= (abs(q2
- q0
) > thresh
) * -1;
90 mask
|= (abs(p3
- p0
) > thresh
) * -1;
91 mask
|= (abs(q3
- q0
) > thresh
) * -1;
95 // is there high edge variance internal edge: 11111111 yes, 00000000 no
96 static INLINE
int8_t hev_mask(uint8_t thresh
, uint8_t p1
, uint8_t p0
,
97 uint8_t q0
, uint8_t q1
) {
99 hev
|= (abs(p1
- p0
) > thresh
) * -1;
100 hev
|= (abs(q1
- q0
) > thresh
) * -1;
104 static INLINE
void filter4(int8_t mask
, uint8_t thresh
, uint8_t *op1
,
105 uint8_t *op0
, uint8_t *oq0
, uint8_t *oq1
) {
106 int8_t filter1
, filter2
;
108 const int8_t ps1
= (int8_t)(*op1
^ 0x80);
109 const int8_t ps0
= (int8_t)(*op0
^ 0x80);
110 const int8_t qs0
= (int8_t)(*oq0
^ 0x80);
111 const int8_t qs1
= (int8_t)(*oq1
^ 0x80);
112 const int8_t hev
= hev_mask(thresh
, *op1
, *op0
, *oq0
, *oq1
);
114 // add outer taps if we have high edge variance
115 int8_t filter
= signed_char_clamp(ps1
- qs1
) & hev
;
118 filter
= signed_char_clamp(filter
+ 3 * (qs0
- ps0
)) & mask
;
120 // save bottom 3 bits so that we round one side +4 and the other +3
121 // if it equals 4 we'll set to adjust by -1 to account for the fact
122 // we'd round 3 the other way
123 filter1
= signed_char_clamp(filter
+ 4) >> 3;
124 filter2
= signed_char_clamp(filter
+ 3) >> 3;
126 *oq0
= (uint8_t)(signed_char_clamp(qs0
- filter1
) ^ 0x80);
127 *op0
= (uint8_t)(signed_char_clamp(ps0
+ filter2
) ^ 0x80);
129 // outer tap adjustments
130 filter
= ROUND_POWER_OF_TWO(filter1
, 1) & ~hev
;
132 *oq1
= (uint8_t)(signed_char_clamp(qs1
- filter
) ^ 0x80);
133 *op1
= (uint8_t)(signed_char_clamp(ps1
+ filter
) ^ 0x80);
136 void aom_lpf_horizontal_4_c(uint8_t *s
, int p
/* pitch */,
137 const uint8_t *blimit
, const uint8_t *limit
,
138 const uint8_t *thresh
) {
142 // loop filter designed to work using chars so that we can make maximum use
143 // of 8 bit simd instructions.
144 for (i
= 0; i
< count
; ++i
) {
145 const uint8_t p1
= s
[-2 * p
], p0
= s
[-p
];
146 const uint8_t q0
= s
[0 * p
], q1
= s
[1 * p
];
147 const int8_t mask
= filter_mask2(*limit
, *blimit
, p1
, p0
, q0
, q1
);
148 filter4(mask
, *thresh
, s
- 2 * p
, s
- 1 * p
, s
, s
+ 1 * p
);
153 void aom_lpf_horizontal_4_dual_c(uint8_t *s
, int p
, const uint8_t *blimit0
,
154 const uint8_t *limit0
, const uint8_t *thresh0
,
155 const uint8_t *blimit1
, const uint8_t *limit1
,
156 const uint8_t *thresh1
) {
157 aom_lpf_horizontal_4_c(s
, p
, blimit0
, limit0
, thresh0
);
158 aom_lpf_horizontal_4_c(s
+ 4, p
, blimit1
, limit1
, thresh1
);
161 void aom_lpf_vertical_4_c(uint8_t *s
, int pitch
, const uint8_t *blimit
,
162 const uint8_t *limit
, const uint8_t *thresh
) {
166 // loop filter designed to work using chars so that we can make maximum use
167 // of 8 bit simd instructions.
168 for (i
= 0; i
< count
; ++i
) {
169 const uint8_t p1
= s
[-2], p0
= s
[-1];
170 const uint8_t q0
= s
[0], q1
= s
[1];
171 const int8_t mask
= filter_mask2(*limit
, *blimit
, p1
, p0
, q0
, q1
);
172 filter4(mask
, *thresh
, s
- 2, s
- 1, s
, s
+ 1);
177 void aom_lpf_vertical_4_dual_c(uint8_t *s
, int pitch
, const uint8_t *blimit0
,
178 const uint8_t *limit0
, const uint8_t *thresh0
,
179 const uint8_t *blimit1
, const uint8_t *limit1
,
180 const uint8_t *thresh1
) {
181 aom_lpf_vertical_4_c(s
, pitch
, blimit0
, limit0
, thresh0
);
182 aom_lpf_vertical_4_c(s
+ 4 * pitch
, pitch
, blimit1
, limit1
, thresh1
);
185 static INLINE
void filter6(int8_t mask
, uint8_t thresh
, int8_t flat
,
186 uint8_t *op2
, uint8_t *op1
, uint8_t *op0
,
187 uint8_t *oq0
, uint8_t *oq1
, uint8_t *oq2
) {
189 const uint8_t p2
= *op2
, p1
= *op1
, p0
= *op0
;
190 const uint8_t q0
= *oq0
, q1
= *oq1
, q2
= *oq2
;
192 // 5-tap filter [1, 2, 2, 2, 1]
193 *op1
= ROUND_POWER_OF_TWO(p2
* 3 + p1
* 2 + p0
* 2 + q0
, 3);
194 *op0
= ROUND_POWER_OF_TWO(p2
+ p1
* 2 + p0
* 2 + q0
* 2 + q1
, 3);
195 *oq0
= ROUND_POWER_OF_TWO(p1
+ p0
* 2 + q0
* 2 + q1
* 2 + q2
, 3);
196 *oq1
= ROUND_POWER_OF_TWO(p0
+ q0
* 2 + q1
* 2 + q2
* 3, 3);
198 filter4(mask
, thresh
, op1
, op0
, oq0
, oq1
);
202 static INLINE
void filter8(int8_t mask
, uint8_t thresh
, int8_t flat
,
203 uint8_t *op3
, uint8_t *op2
, uint8_t *op1
,
204 uint8_t *op0
, uint8_t *oq0
, uint8_t *oq1
,
205 uint8_t *oq2
, uint8_t *oq3
) {
207 const uint8_t p3
= *op3
, p2
= *op2
, p1
= *op1
, p0
= *op0
;
208 const uint8_t q0
= *oq0
, q1
= *oq1
, q2
= *oq2
, q3
= *oq3
;
210 // 7-tap filter [1, 1, 1, 2, 1, 1, 1]
211 *op2
= ROUND_POWER_OF_TWO(p3
+ p3
+ p3
+ 2 * p2
+ p1
+ p0
+ q0
, 3);
212 *op1
= ROUND_POWER_OF_TWO(p3
+ p3
+ p2
+ 2 * p1
+ p0
+ q0
+ q1
, 3);
213 *op0
= ROUND_POWER_OF_TWO(p3
+ p2
+ p1
+ 2 * p0
+ q0
+ q1
+ q2
, 3);
214 *oq0
= ROUND_POWER_OF_TWO(p2
+ p1
+ p0
+ 2 * q0
+ q1
+ q2
+ q3
, 3);
215 *oq1
= ROUND_POWER_OF_TWO(p1
+ p0
+ q0
+ 2 * q1
+ q2
+ q3
+ q3
, 3);
216 *oq2
= ROUND_POWER_OF_TWO(p0
+ q0
+ q1
+ 2 * q2
+ q3
+ q3
+ q3
, 3);
218 filter4(mask
, thresh
, op1
, op0
, oq0
, oq1
);
222 void aom_lpf_horizontal_6_c(uint8_t *s
, int p
, const uint8_t *blimit
,
223 const uint8_t *limit
, const uint8_t *thresh
) {
227 // loop filter designed to work using chars so that we can make maximum use
228 // of 8 bit simd instructions.
229 for (i
= 0; i
< count
; ++i
) {
230 const uint8_t p2
= s
[-3 * p
], p1
= s
[-2 * p
], p0
= s
[-p
];
231 const uint8_t q0
= s
[0 * p
], q1
= s
[1 * p
], q2
= s
[2 * p
];
234 filter_mask3_chroma(*limit
, *blimit
, p2
, p1
, p0
, q0
, q1
, q2
);
235 const int8_t flat
= flat_mask3_chroma(1, p2
, p1
, p0
, q0
, q1
, q2
);
236 filter6(mask
, *thresh
, flat
, s
- 3 * p
, s
- 2 * p
, s
- 1 * p
, s
, s
+ 1 * p
,
242 void aom_lpf_horizontal_6_dual_c(uint8_t *s
, int p
, const uint8_t *blimit0
,
243 const uint8_t *limit0
, const uint8_t *thresh0
,
244 const uint8_t *blimit1
, const uint8_t *limit1
,
245 const uint8_t *thresh1
) {
246 aom_lpf_horizontal_6_c(s
, p
, blimit0
, limit0
, thresh0
);
247 aom_lpf_horizontal_6_c(s
+ 4, p
, blimit1
, limit1
, thresh1
);
250 void aom_lpf_horizontal_8_c(uint8_t *s
, int p
, const uint8_t *blimit
,
251 const uint8_t *limit
, const uint8_t *thresh
) {
255 // loop filter designed to work using chars so that we can make maximum use
256 // of 8 bit simd instructions.
257 for (i
= 0; i
< count
; ++i
) {
258 const uint8_t p3
= s
[-4 * p
], p2
= s
[-3 * p
], p1
= s
[-2 * p
], p0
= s
[-p
];
259 const uint8_t q0
= s
[0 * p
], q1
= s
[1 * p
], q2
= s
[2 * p
], q3
= s
[3 * p
];
262 filter_mask(*limit
, *blimit
, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
);
263 const int8_t flat
= flat_mask4(1, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
);
264 filter8(mask
, *thresh
, flat
, s
- 4 * p
, s
- 3 * p
, s
- 2 * p
, s
- 1 * p
, s
,
265 s
+ 1 * p
, s
+ 2 * p
, s
+ 3 * p
);
270 void aom_lpf_horizontal_8_dual_c(uint8_t *s
, int p
, const uint8_t *blimit0
,
271 const uint8_t *limit0
, const uint8_t *thresh0
,
272 const uint8_t *blimit1
, const uint8_t *limit1
,
273 const uint8_t *thresh1
) {
274 aom_lpf_horizontal_8_c(s
, p
, blimit0
, limit0
, thresh0
);
275 aom_lpf_horizontal_8_c(s
+ 4, p
, blimit1
, limit1
, thresh1
);
278 void aom_lpf_vertical_6_c(uint8_t *s
, int pitch
, const uint8_t *blimit
,
279 const uint8_t *limit
, const uint8_t *thresh
) {
283 for (i
= 0; i
< count
; ++i
) {
284 const uint8_t p2
= s
[-3], p1
= s
[-2], p0
= s
[-1];
285 const uint8_t q0
= s
[0], q1
= s
[1], q2
= s
[2];
287 filter_mask3_chroma(*limit
, *blimit
, p2
, p1
, p0
, q0
, q1
, q2
);
288 const int8_t flat
= flat_mask3_chroma(1, p2
, p1
, p0
, q0
, q1
, q2
);
289 filter6(mask
, *thresh
, flat
, s
- 3, s
- 2, s
- 1, s
, s
+ 1, s
+ 2);
294 void aom_lpf_vertical_6_dual_c(uint8_t *s
, int pitch
, const uint8_t *blimit0
,
295 const uint8_t *limit0
, const uint8_t *thresh0
,
296 const uint8_t *blimit1
, const uint8_t *limit1
,
297 const uint8_t *thresh1
) {
298 aom_lpf_vertical_6_c(s
, pitch
, blimit0
, limit0
, thresh0
);
299 aom_lpf_vertical_6_c(s
+ 4 * pitch
, pitch
, blimit1
, limit1
, thresh1
);
302 void aom_lpf_vertical_8_c(uint8_t *s
, int pitch
, const uint8_t *blimit
,
303 const uint8_t *limit
, const uint8_t *thresh
) {
307 for (i
= 0; i
< count
; ++i
) {
308 const uint8_t p3
= s
[-4], p2
= s
[-3], p1
= s
[-2], p0
= s
[-1];
309 const uint8_t q0
= s
[0], q1
= s
[1], q2
= s
[2], q3
= s
[3];
311 filter_mask(*limit
, *blimit
, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
);
312 const int8_t flat
= flat_mask4(1, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
);
313 filter8(mask
, *thresh
, flat
, s
- 4, s
- 3, s
- 2, s
- 1, s
, s
+ 1, s
+ 2,
319 void aom_lpf_vertical_8_dual_c(uint8_t *s
, int pitch
, const uint8_t *blimit0
,
320 const uint8_t *limit0
, const uint8_t *thresh0
,
321 const uint8_t *blimit1
, const uint8_t *limit1
,
322 const uint8_t *thresh1
) {
323 aom_lpf_vertical_8_c(s
, pitch
, blimit0
, limit0
, thresh0
);
324 aom_lpf_vertical_8_c(s
+ 4 * pitch
, pitch
, blimit1
, limit1
, thresh1
);
327 static INLINE
void filter14(int8_t mask
, uint8_t thresh
, int8_t flat
,
328 int8_t flat2
, uint8_t *op6
, uint8_t *op5
,
329 uint8_t *op4
, uint8_t *op3
, uint8_t *op2
,
330 uint8_t *op1
, uint8_t *op0
, uint8_t *oq0
,
331 uint8_t *oq1
, uint8_t *oq2
, uint8_t *oq3
,
332 uint8_t *oq4
, uint8_t *oq5
, uint8_t *oq6
) {
333 if (flat2
&& flat
&& mask
) {
334 const uint8_t p6
= *op6
, p5
= *op5
, p4
= *op4
, p3
= *op3
, p2
= *op2
,
335 p1
= *op1
, p0
= *op0
;
336 const uint8_t q0
= *oq0
, q1
= *oq1
, q2
= *oq2
, q3
= *oq3
, q4
= *oq4
,
337 q5
= *oq5
, q6
= *oq6
;
339 // 13-tap filter [1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1]
340 *op5
= ROUND_POWER_OF_TWO(p6
* 7 + p5
* 2 + p4
* 2 + p3
+ p2
+ p1
+ p0
+ q0
,
342 *op4
= ROUND_POWER_OF_TWO(
343 p6
* 5 + p5
* 2 + p4
* 2 + p3
* 2 + p2
+ p1
+ p0
+ q0
+ q1
, 4);
344 *op3
= ROUND_POWER_OF_TWO(
345 p6
* 4 + p5
+ p4
* 2 + p3
* 2 + p2
* 2 + p1
+ p0
+ q0
+ q1
+ q2
, 4);
346 *op2
= ROUND_POWER_OF_TWO(
347 p6
* 3 + p5
+ p4
+ p3
* 2 + p2
* 2 + p1
* 2 + p0
+ q0
+ q1
+ q2
+ q3
,
349 *op1
= ROUND_POWER_OF_TWO(p6
* 2 + p5
+ p4
+ p3
+ p2
* 2 + p1
* 2 + p0
* 2 +
350 q0
+ q1
+ q2
+ q3
+ q4
,
352 *op0
= ROUND_POWER_OF_TWO(p6
+ p5
+ p4
+ p3
+ p2
+ p1
* 2 + p0
* 2 +
353 q0
* 2 + q1
+ q2
+ q3
+ q4
+ q5
,
355 *oq0
= ROUND_POWER_OF_TWO(p5
+ p4
+ p3
+ p2
+ p1
+ p0
* 2 + q0
* 2 +
356 q1
* 2 + q2
+ q3
+ q4
+ q5
+ q6
,
358 *oq1
= ROUND_POWER_OF_TWO(p4
+ p3
+ p2
+ p1
+ p0
+ q0
* 2 + q1
* 2 +
359 q2
* 2 + q3
+ q4
+ q5
+ q6
* 2,
361 *oq2
= ROUND_POWER_OF_TWO(
362 p3
+ p2
+ p1
+ p0
+ q0
+ q1
* 2 + q2
* 2 + q3
* 2 + q4
+ q5
+ q6
* 3,
364 *oq3
= ROUND_POWER_OF_TWO(
365 p2
+ p1
+ p0
+ q0
+ q1
+ q2
* 2 + q3
* 2 + q4
* 2 + q5
+ q6
* 4, 4);
366 *oq4
= ROUND_POWER_OF_TWO(
367 p1
+ p0
+ q0
+ q1
+ q2
+ q3
* 2 + q4
* 2 + q5
* 2 + q6
* 5, 4);
368 *oq5
= ROUND_POWER_OF_TWO(p0
+ q0
+ q1
+ q2
+ q3
+ q4
* 2 + q5
* 2 + q6
* 7,
371 filter8(mask
, thresh
, flat
, op3
, op2
, op1
, op0
, oq0
, oq1
, oq2
, oq3
);
375 static void mb_lpf_horizontal_edge_w(uint8_t *s
, int p
, const uint8_t *blimit
,
376 const uint8_t *limit
,
377 const uint8_t *thresh
, int count
) {
381 // loop filter designed to work using chars so that we can make maximum use
382 // of 8 bit simd instructions.
383 for (i
= 0; i
< step
* count
; ++i
) {
384 const uint8_t p6
= s
[-7 * p
], p5
= s
[-6 * p
], p4
= s
[-5 * p
],
385 p3
= s
[-4 * p
], p2
= s
[-3 * p
], p1
= s
[-2 * p
], p0
= s
[-p
];
386 const uint8_t q0
= s
[0 * p
], q1
= s
[1 * p
], q2
= s
[2 * p
], q3
= s
[3 * p
],
387 q4
= s
[4 * p
], q5
= s
[5 * p
], q6
= s
[6 * p
];
389 filter_mask(*limit
, *blimit
, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
);
390 const int8_t flat
= flat_mask4(1, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
);
391 const int8_t flat2
= flat_mask4(1, p6
, p5
, p4
, p0
, q0
, q4
, q5
, q6
);
393 filter14(mask
, *thresh
, flat
, flat2
, s
- 7 * p
, s
- 6 * p
, s
- 5 * p
,
394 s
- 4 * p
, s
- 3 * p
, s
- 2 * p
, s
- 1 * p
, s
, s
+ 1 * p
,
395 s
+ 2 * p
, s
+ 3 * p
, s
+ 4 * p
, s
+ 5 * p
, s
+ 6 * p
);
400 void aom_lpf_horizontal_14_c(uint8_t *s
, int p
, const uint8_t *blimit
,
401 const uint8_t *limit
, const uint8_t *thresh
) {
402 mb_lpf_horizontal_edge_w(s
, p
, blimit
, limit
, thresh
, 1);
405 void aom_lpf_horizontal_14_dual_c(uint8_t *s
, int p
, const uint8_t *blimit0
,
406 const uint8_t *limit0
, const uint8_t *thresh0
,
407 const uint8_t *blimit1
, const uint8_t *limit1
,
408 const uint8_t *thresh1
) {
409 mb_lpf_horizontal_edge_w(s
, p
, blimit0
, limit0
, thresh0
, 1);
410 mb_lpf_horizontal_edge_w(s
+ 4, p
, blimit1
, limit1
, thresh1
, 1);
413 static void mb_lpf_vertical_edge_w(uint8_t *s
, int p
, const uint8_t *blimit
,
414 const uint8_t *limit
, const uint8_t *thresh
,
418 for (i
= 0; i
< count
; ++i
) {
419 const uint8_t p6
= s
[-7], p5
= s
[-6], p4
= s
[-5], p3
= s
[-4], p2
= s
[-3],
420 p1
= s
[-2], p0
= s
[-1];
421 const uint8_t q0
= s
[0], q1
= s
[1], q2
= s
[2], q3
= s
[3], q4
= s
[4],
422 q5
= s
[5], q6
= s
[6];
424 filter_mask(*limit
, *blimit
, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
);
425 const int8_t flat
= flat_mask4(1, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
);
426 const int8_t flat2
= flat_mask4(1, p6
, p5
, p4
, p0
, q0
, q4
, q5
, q6
);
428 filter14(mask
, *thresh
, flat
, flat2
, s
- 7, s
- 6, s
- 5, s
- 4, s
- 3,
429 s
- 2, s
- 1, s
, s
+ 1, s
+ 2, s
+ 3, s
+ 4, s
+ 5, s
+ 6);
434 void aom_lpf_vertical_14_c(uint8_t *s
, int p
, const uint8_t *blimit
,
435 const uint8_t *limit
, const uint8_t *thresh
) {
436 mb_lpf_vertical_edge_w(s
, p
, blimit
, limit
, thresh
, 4);
439 void aom_lpf_vertical_14_dual_c(uint8_t *s
, int pitch
, const uint8_t *blimit0
,
440 const uint8_t *limit0
, const uint8_t *thresh0
,
441 const uint8_t *blimit1
, const uint8_t *limit1
,
442 const uint8_t *thresh1
) {
443 mb_lpf_vertical_edge_w(s
, pitch
, blimit0
, limit0
, thresh0
, 4);
444 mb_lpf_vertical_edge_w(s
+ 4 * pitch
, pitch
, blimit1
, limit1
, thresh1
, 4);
447 #if CONFIG_AV1_HIGHBITDEPTH
448 // Should we apply any filter at all: 11111111 yes, 00000000 no ?
449 static INLINE
int8_t highbd_filter_mask2(uint8_t limit
, uint8_t blimit
,
450 uint16_t p1
, uint16_t p0
, uint16_t q0
,
451 uint16_t q1
, int bd
) {
453 int16_t limit16
= (uint16_t)limit
<< (bd
- 8);
454 int16_t blimit16
= (uint16_t)blimit
<< (bd
- 8);
455 mask
|= (abs(p1
- p0
) > limit16
) * -1;
456 mask
|= (abs(q1
- q0
) > limit16
) * -1;
457 mask
|= (abs(p0
- q0
) * 2 + abs(p1
- q1
) / 2 > blimit16
) * -1;
461 // Should we apply any filter at all: 11111111 yes, 00000000 no ?
462 static INLINE
int8_t highbd_filter_mask(uint8_t limit
, uint8_t blimit
,
463 uint16_t p3
, uint16_t p2
, uint16_t p1
,
464 uint16_t p0
, uint16_t q0
, uint16_t q1
,
465 uint16_t q2
, uint16_t q3
, int bd
) {
467 int16_t limit16
= (uint16_t)limit
<< (bd
- 8);
468 int16_t blimit16
= (uint16_t)blimit
<< (bd
- 8);
469 mask
|= (abs(p3
- p2
) > limit16
) * -1;
470 mask
|= (abs(p2
- p1
) > limit16
) * -1;
471 mask
|= (abs(p1
- p0
) > limit16
) * -1;
472 mask
|= (abs(q1
- q0
) > limit16
) * -1;
473 mask
|= (abs(q2
- q1
) > limit16
) * -1;
474 mask
|= (abs(q3
- q2
) > limit16
) * -1;
475 mask
|= (abs(p0
- q0
) * 2 + abs(p1
- q1
) / 2 > blimit16
) * -1;
479 static INLINE
int8_t highbd_filter_mask3_chroma(uint8_t limit
, uint8_t blimit
,
480 uint16_t p2
, uint16_t p1
,
481 uint16_t p0
, uint16_t q0
,
482 uint16_t q1
, uint16_t q2
,
485 int16_t limit16
= (uint16_t)limit
<< (bd
- 8);
486 int16_t blimit16
= (uint16_t)blimit
<< (bd
- 8);
487 mask
|= (abs(p2
- p1
) > limit16
) * -1;
488 mask
|= (abs(p1
- p0
) > limit16
) * -1;
489 mask
|= (abs(q1
- q0
) > limit16
) * -1;
490 mask
|= (abs(q2
- q1
) > limit16
) * -1;
491 mask
|= (abs(p0
- q0
) * 2 + abs(p1
- q1
) / 2 > blimit16
) * -1;
495 static INLINE
int8_t highbd_flat_mask3_chroma(uint8_t thresh
, uint16_t p2
,
496 uint16_t p1
, uint16_t p0
,
497 uint16_t q0
, uint16_t q1
,
498 uint16_t q2
, int bd
) {
500 int16_t thresh16
= (uint16_t)thresh
<< (bd
- 8);
501 mask
|= (abs(p1
- p0
) > thresh16
) * -1;
502 mask
|= (abs(q1
- q0
) > thresh16
) * -1;
503 mask
|= (abs(p2
- p0
) > thresh16
) * -1;
504 mask
|= (abs(q2
- q0
) > thresh16
) * -1;
508 static INLINE
int8_t highbd_flat_mask4(uint8_t thresh
, uint16_t p3
, uint16_t p2
,
509 uint16_t p1
, uint16_t p0
, uint16_t q0
,
510 uint16_t q1
, uint16_t q2
, uint16_t q3
,
513 int16_t thresh16
= (uint16_t)thresh
<< (bd
- 8);
514 mask
|= (abs(p1
- p0
) > thresh16
) * -1;
515 mask
|= (abs(q1
- q0
) > thresh16
) * -1;
516 mask
|= (abs(p2
- p0
) > thresh16
) * -1;
517 mask
|= (abs(q2
- q0
) > thresh16
) * -1;
518 mask
|= (abs(p3
- p0
) > thresh16
) * -1;
519 mask
|= (abs(q3
- q0
) > thresh16
) * -1;
523 // Is there high edge variance internal edge:
524 // 11111111_11111111 yes, 00000000_00000000 no ?
525 static INLINE
int16_t highbd_hev_mask(uint8_t thresh
, uint16_t p1
, uint16_t p0
,
526 uint16_t q0
, uint16_t q1
, int bd
) {
528 int16_t thresh16
= (uint16_t)thresh
<< (bd
- 8);
529 hev
|= (abs(p1
- p0
) > thresh16
) * -1;
530 hev
|= (abs(q1
- q0
) > thresh16
) * -1;
534 static INLINE
void highbd_filter4(int8_t mask
, uint8_t thresh
, uint16_t *op1
,
535 uint16_t *op0
, uint16_t *oq0
, uint16_t *oq1
,
537 int16_t filter1
, filter2
;
538 // ^0x80 equivalent to subtracting 0x80 from the values to turn them
539 // into -128 to +127 instead of 0 to 255.
541 const int16_t ps1
= (int16_t)*op1
- (0x80 << shift
);
542 const int16_t ps0
= (int16_t)*op0
- (0x80 << shift
);
543 const int16_t qs0
= (int16_t)*oq0
- (0x80 << shift
);
544 const int16_t qs1
= (int16_t)*oq1
- (0x80 << shift
);
545 const int16_t hev
= highbd_hev_mask(thresh
, *op1
, *op0
, *oq0
, *oq1
, bd
);
547 // Add outer taps if we have high edge variance.
548 int16_t filter
= signed_char_clamp_high(ps1
- qs1
, bd
) & hev
;
551 filter
= signed_char_clamp_high(filter
+ 3 * (qs0
- ps0
), bd
) & mask
;
553 // Save bottom 3 bits so that we round one side +4 and the other +3
554 // if it equals 4 we'll set to adjust by -1 to account for the fact
555 // we'd round 3 the other way.
556 filter1
= signed_char_clamp_high(filter
+ 4, bd
) >> 3;
557 filter2
= signed_char_clamp_high(filter
+ 3, bd
) >> 3;
559 *oq0
= signed_char_clamp_high(qs0
- filter1
, bd
) + (0x80 << shift
);
560 *op0
= signed_char_clamp_high(ps0
+ filter2
, bd
) + (0x80 << shift
);
562 // Outer tap adjustments.
563 filter
= ROUND_POWER_OF_TWO(filter1
, 1) & ~hev
;
565 *oq1
= signed_char_clamp_high(qs1
- filter
, bd
) + (0x80 << shift
);
566 *op1
= signed_char_clamp_high(ps1
+ filter
, bd
) + (0x80 << shift
);
569 void aom_highbd_lpf_horizontal_4_c(uint16_t *s
, int p
/* pitch */,
570 const uint8_t *blimit
, const uint8_t *limit
,
571 const uint8_t *thresh
, int bd
) {
575 // loop filter designed to work using chars so that we can make maximum use
576 // of 8 bit simd instructions.
577 for (i
= 0; i
< count
; ++i
) {
578 const uint16_t p1
= s
[-2 * p
];
579 const uint16_t p0
= s
[-p
];
580 const uint16_t q0
= s
[0 * p
];
581 const uint16_t q1
= s
[1 * p
];
583 highbd_filter_mask2(*limit
, *blimit
, p1
, p0
, q0
, q1
, bd
);
584 highbd_filter4(mask
, *thresh
, s
- 2 * p
, s
- 1 * p
, s
, s
+ 1 * p
, bd
);
589 void aom_highbd_lpf_horizontal_4_dual_c(
590 uint16_t *s
, int p
, const uint8_t *blimit0
, const uint8_t *limit0
,
591 const uint8_t *thresh0
, const uint8_t *blimit1
, const uint8_t *limit1
,
592 const uint8_t *thresh1
, int bd
) {
593 aom_highbd_lpf_horizontal_4_c(s
, p
, blimit0
, limit0
, thresh0
, bd
);
594 aom_highbd_lpf_horizontal_4_c(s
+ 4, p
, blimit1
, limit1
, thresh1
, bd
);
597 void aom_highbd_lpf_vertical_4_c(uint16_t *s
, int pitch
, const uint8_t *blimit
,
598 const uint8_t *limit
, const uint8_t *thresh
,
603 // loop filter designed to work using chars so that we can make maximum use
604 // of 8 bit simd instructions.
605 for (i
= 0; i
< count
; ++i
) {
606 const uint16_t p1
= s
[-2], p0
= s
[-1];
607 const uint16_t q0
= s
[0], q1
= s
[1];
609 highbd_filter_mask2(*limit
, *blimit
, p1
, p0
, q0
, q1
, bd
);
610 highbd_filter4(mask
, *thresh
, s
- 2, s
- 1, s
, s
+ 1, bd
);
615 void aom_highbd_lpf_vertical_4_dual_c(
616 uint16_t *s
, int pitch
, const uint8_t *blimit0
, const uint8_t *limit0
,
617 const uint8_t *thresh0
, const uint8_t *blimit1
, const uint8_t *limit1
,
618 const uint8_t *thresh1
, int bd
) {
619 aom_highbd_lpf_vertical_4_c(s
, pitch
, blimit0
, limit0
, thresh0
, bd
);
620 aom_highbd_lpf_vertical_4_c(s
+ 4 * pitch
, pitch
, blimit1
, limit1
, thresh1
,
624 static INLINE
void highbd_filter6(int8_t mask
, uint8_t thresh
, int8_t flat
,
625 uint16_t *op2
, uint16_t *op1
, uint16_t *op0
,
626 uint16_t *oq0
, uint16_t *oq1
, uint16_t *oq2
,
629 const uint16_t p2
= *op2
, p1
= *op1
, p0
= *op0
;
630 const uint16_t q0
= *oq0
, q1
= *oq1
, q2
= *oq2
;
632 // 5-tap filter [1, 2, 2, 2, 1]
633 *op1
= ROUND_POWER_OF_TWO(p2
* 3 + p1
* 2 + p0
* 2 + q0
, 3);
634 *op0
= ROUND_POWER_OF_TWO(p2
+ p1
* 2 + p0
* 2 + q0
* 2 + q1
, 3);
635 *oq0
= ROUND_POWER_OF_TWO(p1
+ p0
* 2 + q0
* 2 + q1
* 2 + q2
, 3);
636 *oq1
= ROUND_POWER_OF_TWO(p0
+ q0
* 2 + q1
* 2 + q2
* 3, 3);
638 highbd_filter4(mask
, thresh
, op1
, op0
, oq0
, oq1
, bd
);
642 static INLINE
void highbd_filter8(int8_t mask
, uint8_t thresh
, int8_t flat
,
643 uint16_t *op3
, uint16_t *op2
, uint16_t *op1
,
644 uint16_t *op0
, uint16_t *oq0
, uint16_t *oq1
,
645 uint16_t *oq2
, uint16_t *oq3
, int bd
) {
647 const uint16_t p3
= *op3
, p2
= *op2
, p1
= *op1
, p0
= *op0
;
648 const uint16_t q0
= *oq0
, q1
= *oq1
, q2
= *oq2
, q3
= *oq3
;
650 // 7-tap filter [1, 1, 1, 2, 1, 1, 1]
651 *op2
= ROUND_POWER_OF_TWO(p3
+ p3
+ p3
+ 2 * p2
+ p1
+ p0
+ q0
, 3);
652 *op1
= ROUND_POWER_OF_TWO(p3
+ p3
+ p2
+ 2 * p1
+ p0
+ q0
+ q1
, 3);
653 *op0
= ROUND_POWER_OF_TWO(p3
+ p2
+ p1
+ 2 * p0
+ q0
+ q1
+ q2
, 3);
654 *oq0
= ROUND_POWER_OF_TWO(p2
+ p1
+ p0
+ 2 * q0
+ q1
+ q2
+ q3
, 3);
655 *oq1
= ROUND_POWER_OF_TWO(p1
+ p0
+ q0
+ 2 * q1
+ q2
+ q3
+ q3
, 3);
656 *oq2
= ROUND_POWER_OF_TWO(p0
+ q0
+ q1
+ 2 * q2
+ q3
+ q3
+ q3
, 3);
658 highbd_filter4(mask
, thresh
, op1
, op0
, oq0
, oq1
, bd
);
662 void aom_highbd_lpf_horizontal_8_c(uint16_t *s
, int p
, const uint8_t *blimit
,
663 const uint8_t *limit
, const uint8_t *thresh
,
668 // loop filter designed to work using chars so that we can make maximum use
669 // of 8 bit simd instructions.
670 for (i
= 0; i
< count
; ++i
) {
671 const uint16_t p3
= s
[-4 * p
], p2
= s
[-3 * p
], p1
= s
[-2 * p
], p0
= s
[-p
];
672 const uint16_t q0
= s
[0 * p
], q1
= s
[1 * p
], q2
= s
[2 * p
], q3
= s
[3 * p
];
675 highbd_filter_mask(*limit
, *blimit
, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
, bd
);
677 highbd_flat_mask4(1, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
, bd
);
678 highbd_filter8(mask
, *thresh
, flat
, s
- 4 * p
, s
- 3 * p
, s
- 2 * p
,
679 s
- 1 * p
, s
, s
+ 1 * p
, s
+ 2 * p
, s
+ 3 * p
, bd
);
684 void aom_highbd_lpf_horizontal_6_c(uint16_t *s
, int p
, const uint8_t *blimit
,
685 const uint8_t *limit
, const uint8_t *thresh
,
690 // loop filter designed to work using chars so that we can make maximum use
691 // of 8 bit simd instructions.
692 for (i
= 0; i
< count
; ++i
) {
693 const uint16_t p2
= s
[-3 * p
], p1
= s
[-2 * p
], p0
= s
[-p
];
694 const uint16_t q0
= s
[0 * p
], q1
= s
[1 * p
], q2
= s
[2 * p
];
697 highbd_filter_mask3_chroma(*limit
, *blimit
, p2
, p1
, p0
, q0
, q1
, q2
, bd
);
698 const int8_t flat
= highbd_flat_mask3_chroma(1, p2
, p1
, p0
, q0
, q1
, q2
, bd
);
699 highbd_filter6(mask
, *thresh
, flat
, s
- 3 * p
, s
- 2 * p
, s
- 1 * p
, s
,
700 s
+ 1 * p
, s
+ 2 * p
, bd
);
705 void aom_highbd_lpf_horizontal_6_dual_c(
706 uint16_t *s
, int p
, const uint8_t *blimit0
, const uint8_t *limit0
,
707 const uint8_t *thresh0
, const uint8_t *blimit1
, const uint8_t *limit1
,
708 const uint8_t *thresh1
, int bd
) {
709 aom_highbd_lpf_horizontal_6_c(s
, p
, blimit0
, limit0
, thresh0
, bd
);
710 aom_highbd_lpf_horizontal_6_c(s
+ 4, p
, blimit1
, limit1
, thresh1
, bd
);
713 void aom_highbd_lpf_horizontal_8_dual_c(
714 uint16_t *s
, int p
, const uint8_t *blimit0
, const uint8_t *limit0
,
715 const uint8_t *thresh0
, const uint8_t *blimit1
, const uint8_t *limit1
,
716 const uint8_t *thresh1
, int bd
) {
717 aom_highbd_lpf_horizontal_8_c(s
, p
, blimit0
, limit0
, thresh0
, bd
);
718 aom_highbd_lpf_horizontal_8_c(s
+ 4, p
, blimit1
, limit1
, thresh1
, bd
);
721 void aom_highbd_lpf_vertical_6_c(uint16_t *s
, int pitch
, const uint8_t *blimit
,
722 const uint8_t *limit
, const uint8_t *thresh
,
727 for (i
= 0; i
< count
; ++i
) {
728 const uint16_t p2
= s
[-3], p1
= s
[-2], p0
= s
[-1];
729 const uint16_t q0
= s
[0], q1
= s
[1], q2
= s
[2];
731 highbd_filter_mask3_chroma(*limit
, *blimit
, p2
, p1
, p0
, q0
, q1
, q2
, bd
);
732 const int8_t flat
= highbd_flat_mask3_chroma(1, p2
, p1
, p0
, q0
, q1
, q2
, bd
);
733 highbd_filter6(mask
, *thresh
, flat
, s
- 3, s
- 2, s
- 1, s
, s
+ 1, s
+ 2,
739 void aom_highbd_lpf_vertical_6_dual_c(
740 uint16_t *s
, int pitch
, const uint8_t *blimit0
, const uint8_t *limit0
,
741 const uint8_t *thresh0
, const uint8_t *blimit1
, const uint8_t *limit1
,
742 const uint8_t *thresh1
, int bd
) {
743 aom_highbd_lpf_vertical_6_c(s
, pitch
, blimit0
, limit0
, thresh0
, bd
);
744 aom_highbd_lpf_vertical_6_c(s
+ 4 * pitch
, pitch
, blimit1
, limit1
, thresh1
,
748 void aom_highbd_lpf_vertical_8_c(uint16_t *s
, int pitch
, const uint8_t *blimit
,
749 const uint8_t *limit
, const uint8_t *thresh
,
754 for (i
= 0; i
< count
; ++i
) {
755 const uint16_t p3
= s
[-4], p2
= s
[-3], p1
= s
[-2], p0
= s
[-1];
756 const uint16_t q0
= s
[0], q1
= s
[1], q2
= s
[2], q3
= s
[3];
758 highbd_filter_mask(*limit
, *blimit
, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
, bd
);
760 highbd_flat_mask4(1, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
, bd
);
761 highbd_filter8(mask
, *thresh
, flat
, s
- 4, s
- 3, s
- 2, s
- 1, s
, s
+ 1,
767 void aom_highbd_lpf_vertical_8_dual_c(
768 uint16_t *s
, int pitch
, const uint8_t *blimit0
, const uint8_t *limit0
,
769 const uint8_t *thresh0
, const uint8_t *blimit1
, const uint8_t *limit1
,
770 const uint8_t *thresh1
, int bd
) {
771 aom_highbd_lpf_vertical_8_c(s
, pitch
, blimit0
, limit0
, thresh0
, bd
);
772 aom_highbd_lpf_vertical_8_c(s
+ 4 * pitch
, pitch
, blimit1
, limit1
, thresh1
,
776 static INLINE
void highbd_filter14(int8_t mask
, uint8_t thresh
, int8_t flat
,
777 int8_t flat2
, uint16_t *op6
, uint16_t *op5
,
778 uint16_t *op4
, uint16_t *op3
, uint16_t *op2
,
779 uint16_t *op1
, uint16_t *op0
, uint16_t *oq0
,
780 uint16_t *oq1
, uint16_t *oq2
, uint16_t *oq3
,
781 uint16_t *oq4
, uint16_t *oq5
, uint16_t *oq6
,
783 if (flat2
&& flat
&& mask
) {
784 const uint16_t p6
= *op6
;
785 const uint16_t p5
= *op5
;
786 const uint16_t p4
= *op4
;
787 const uint16_t p3
= *op3
;
788 const uint16_t p2
= *op2
;
789 const uint16_t p1
= *op1
;
790 const uint16_t p0
= *op0
;
791 const uint16_t q0
= *oq0
;
792 const uint16_t q1
= *oq1
;
793 const uint16_t q2
= *oq2
;
794 const uint16_t q3
= *oq3
;
795 const uint16_t q4
= *oq4
;
796 const uint16_t q5
= *oq5
;
797 const uint16_t q6
= *oq6
;
799 // 13-tap filter [1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1]
800 *op5
= ROUND_POWER_OF_TWO(p6
* 7 + p5
* 2 + p4
* 2 + p3
+ p2
+ p1
+ p0
+ q0
,
802 *op4
= ROUND_POWER_OF_TWO(
803 p6
* 5 + p5
* 2 + p4
* 2 + p3
* 2 + p2
+ p1
+ p0
+ q0
+ q1
, 4);
804 *op3
= ROUND_POWER_OF_TWO(
805 p6
* 4 + p5
+ p4
* 2 + p3
* 2 + p2
* 2 + p1
+ p0
+ q0
+ q1
+ q2
, 4);
806 *op2
= ROUND_POWER_OF_TWO(
807 p6
* 3 + p5
+ p4
+ p3
* 2 + p2
* 2 + p1
* 2 + p0
+ q0
+ q1
+ q2
+ q3
,
809 *op1
= ROUND_POWER_OF_TWO(p6
* 2 + p5
+ p4
+ p3
+ p2
* 2 + p1
* 2 + p0
* 2 +
810 q0
+ q1
+ q2
+ q3
+ q4
,
812 *op0
= ROUND_POWER_OF_TWO(p6
+ p5
+ p4
+ p3
+ p2
+ p1
* 2 + p0
* 2 +
813 q0
* 2 + q1
+ q2
+ q3
+ q4
+ q5
,
815 *oq0
= ROUND_POWER_OF_TWO(p5
+ p4
+ p3
+ p2
+ p1
+ p0
* 2 + q0
* 2 +
816 q1
* 2 + q2
+ q3
+ q4
+ q5
+ q6
,
818 *oq1
= ROUND_POWER_OF_TWO(p4
+ p3
+ p2
+ p1
+ p0
+ q0
* 2 + q1
* 2 +
819 q2
* 2 + q3
+ q4
+ q5
+ q6
* 2,
821 *oq2
= ROUND_POWER_OF_TWO(
822 p3
+ p2
+ p1
+ p0
+ q0
+ q1
* 2 + q2
* 2 + q3
* 2 + q4
+ q5
+ q6
* 3,
824 *oq3
= ROUND_POWER_OF_TWO(
825 p2
+ p1
+ p0
+ q0
+ q1
+ q2
* 2 + q3
* 2 + q4
* 2 + q5
+ q6
* 4, 4);
826 *oq4
= ROUND_POWER_OF_TWO(
827 p1
+ p0
+ q0
+ q1
+ q2
+ q3
* 2 + q4
* 2 + q5
* 2 + q6
* 5, 4);
828 *oq5
= ROUND_POWER_OF_TWO(p0
+ q0
+ q1
+ q2
+ q3
+ q4
* 2 + q5
* 2 + q6
* 7,
831 highbd_filter8(mask
, thresh
, flat
, op3
, op2
, op1
, op0
, oq0
, oq1
, oq2
, oq3
,
836 static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s
, int p
,
837 const uint8_t *blimit
,
838 const uint8_t *limit
,
839 const uint8_t *thresh
, int count
,
844 // loop filter designed to work using chars so that we can make maximum use
845 // of 8 bit simd instructions.
846 for (i
= 0; i
< step
* count
; ++i
) {
847 const uint16_t p3
= s
[-4 * p
];
848 const uint16_t p2
= s
[-3 * p
];
849 const uint16_t p1
= s
[-2 * p
];
850 const uint16_t p0
= s
[-p
];
851 const uint16_t q0
= s
[0 * p
];
852 const uint16_t q1
= s
[1 * p
];
853 const uint16_t q2
= s
[2 * p
];
854 const uint16_t q3
= s
[3 * p
];
856 highbd_filter_mask(*limit
, *blimit
, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
, bd
);
858 highbd_flat_mask4(1, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
, bd
);
861 highbd_flat_mask4(1, s
[-7 * p
], s
[-6 * p
], s
[-5 * p
], p0
, q0
, s
[4 * p
],
862 s
[5 * p
], s
[6 * p
], bd
);
864 highbd_filter14(mask
, *thresh
, flat
, flat2
, s
- 7 * p
, s
- 6 * p
, s
- 5 * p
,
865 s
- 4 * p
, s
- 3 * p
, s
- 2 * p
, s
- 1 * p
, s
, s
+ 1 * p
,
866 s
+ 2 * p
, s
+ 3 * p
, s
+ 4 * p
, s
+ 5 * p
, s
+ 6 * p
, bd
);
871 void aom_highbd_lpf_horizontal_14_c(uint16_t *s
, int pitch
,
872 const uint8_t *blimit
, const uint8_t *limit
,
873 const uint8_t *thresh
, int bd
) {
874 highbd_mb_lpf_horizontal_edge_w(s
, pitch
, blimit
, limit
, thresh
, 1, bd
);
877 void aom_highbd_lpf_horizontal_14_dual_c(
878 uint16_t *s
, int p
, const uint8_t *blimit0
, const uint8_t *limit0
,
879 const uint8_t *thresh0
, const uint8_t *blimit1
, const uint8_t *limit1
,
880 const uint8_t *thresh1
, int bd
) {
881 highbd_mb_lpf_horizontal_edge_w(s
, p
, blimit0
, limit0
, thresh0
, 1, bd
);
882 highbd_mb_lpf_horizontal_edge_w(s
+ 4, p
, blimit1
, limit1
, thresh1
, 1, bd
);
885 static void highbd_mb_lpf_vertical_edge_w(uint16_t *s
, int p
,
886 const uint8_t *blimit
,
887 const uint8_t *limit
,
888 const uint8_t *thresh
, int count
,
892 for (i
= 0; i
< count
; ++i
) {
893 const uint16_t p3
= s
[-4];
894 const uint16_t p2
= s
[-3];
895 const uint16_t p1
= s
[-2];
896 const uint16_t p0
= s
[-1];
897 const uint16_t q0
= s
[0];
898 const uint16_t q1
= s
[1];
899 const uint16_t q2
= s
[2];
900 const uint16_t q3
= s
[3];
902 highbd_filter_mask(*limit
, *blimit
, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
, bd
);
904 highbd_flat_mask4(1, p3
, p2
, p1
, p0
, q0
, q1
, q2
, q3
, bd
);
906 highbd_flat_mask4(1, s
[-7], s
[-6], s
[-5], p0
, q0
, s
[4], s
[5], s
[6], bd
);
908 highbd_filter14(mask
, *thresh
, flat
, flat2
, s
- 7, s
- 6, s
- 5, s
- 4,
909 s
- 3, s
- 2, s
- 1, s
, s
+ 1, s
+ 2, s
+ 3, s
+ 4, s
+ 5,
915 void aom_highbd_lpf_vertical_14_c(uint16_t *s
, int p
, const uint8_t *blimit
,
916 const uint8_t *limit
, const uint8_t *thresh
,
918 highbd_mb_lpf_vertical_edge_w(s
, p
, blimit
, limit
, thresh
, 4, bd
);
921 void aom_highbd_lpf_vertical_14_dual_c(
922 uint16_t *s
, int pitch
, const uint8_t *blimit0
, const uint8_t *limit0
,
923 const uint8_t *thresh0
, const uint8_t *blimit1
, const uint8_t *limit1
,
924 const uint8_t *thresh1
, int bd
) {
925 highbd_mb_lpf_vertical_edge_w(s
, pitch
, blimit0
, limit0
, thresh0
, 4, bd
);
926 highbd_mb_lpf_vertical_edge_w(s
+ 4 * pitch
, pitch
, blimit1
, limit1
, thresh1
,
929 #endif // CONFIG_AV1_HIGHBITDEPTH