Bug 1893155 - Part 6: Correct constant for minimum epoch day. r=spidermonkey-reviewer...
[gecko.git] / third_party / dav1d / src / decode.c
blob7427c35592a7b6b73c6cd75d5fdb436b45c5e75b
1 /*
2 * Copyright © 2018-2021, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "config.h"
30 #include <errno.h>
31 #include <limits.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <inttypes.h>
36 #include "dav1d/data.h"
38 #include "common/frame.h"
39 #include "common/intops.h"
41 #include "src/ctx.h"
42 #include "src/decode.h"
43 #include "src/dequant_tables.h"
44 #include "src/env.h"
45 #include "src/filmgrain.h"
46 #include "src/log.h"
47 #include "src/qm.h"
48 #include "src/recon.h"
49 #include "src/ref.h"
50 #include "src/tables.h"
51 #include "src/thread_task.h"
52 #include "src/warpmv.h"
54 static void init_quant_tables(const Dav1dSequenceHeader *const seq_hdr,
55 const Dav1dFrameHeader *const frame_hdr,
56 const int qidx, uint16_t (*dq)[3][2])
58 for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
59 const int yac = frame_hdr->segmentation.enabled ?
60 iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
61 const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
62 const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
63 const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
64 const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
65 const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
67 dq[i][0][0] = dav1d_dq_tbl[seq_hdr->hbd][ydc][0];
68 dq[i][0][1] = dav1d_dq_tbl[seq_hdr->hbd][yac][1];
69 dq[i][1][0] = dav1d_dq_tbl[seq_hdr->hbd][udc][0];
70 dq[i][1][1] = dav1d_dq_tbl[seq_hdr->hbd][uac][1];
71 dq[i][2][0] = dav1d_dq_tbl[seq_hdr->hbd][vdc][0];
72 dq[i][2][1] = dav1d_dq_tbl[seq_hdr->hbd][vac][1];
76 static int read_mv_component_diff(MsacContext *const msac,
77 CdfMvComponent *const mv_comp,
78 const int mv_prec)
80 const int sign = dav1d_msac_decode_bool_adapt(msac, mv_comp->sign);
81 const int cl = dav1d_msac_decode_symbol_adapt16(msac, mv_comp->classes, 10);
82 int up, fp = 3, hp = 1;
84 if (!cl) {
85 up = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0);
86 if (mv_prec >= 0) { // !force_integer_mv
87 fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->class0_fp[up], 3);
88 if (mv_prec > 0) // allow_high_precision_mv
89 hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->class0_hp);
91 } else {
92 up = 1 << cl;
93 for (int n = 0; n < cl; n++)
94 up |= dav1d_msac_decode_bool_adapt(msac, mv_comp->classN[n]) << n;
95 if (mv_prec >= 0) { // !force_integer_mv
96 fp = dav1d_msac_decode_symbol_adapt4(msac, mv_comp->classN_fp, 3);
97 if (mv_prec > 0) // allow_high_precision_mv
98 hp = dav1d_msac_decode_bool_adapt(msac, mv_comp->classN_hp);
102 const int diff = ((up << 3) | (fp << 1) | hp) + 1;
104 return sign ? -diff : diff;
107 static void read_mv_residual(Dav1dTileState *const ts, mv *const ref_mv,
108 const int mv_prec)
110 MsacContext *const msac = &ts->msac;
111 const enum MVJoint mv_joint =
112 dav1d_msac_decode_symbol_adapt4(msac, ts->cdf.mv.joint, N_MV_JOINTS - 1);
113 if (mv_joint & MV_JOINT_V)
114 ref_mv->y += read_mv_component_diff(msac, &ts->cdf.mv.comp[0], mv_prec);
115 if (mv_joint & MV_JOINT_H)
116 ref_mv->x += read_mv_component_diff(msac, &ts->cdf.mv.comp[1], mv_prec);
119 static void read_tx_tree(Dav1dTaskContext *const t,
120 const enum RectTxfmSize from,
121 const int depth, uint16_t *const masks,
122 const int x_off, const int y_off)
124 const Dav1dFrameContext *const f = t->f;
125 const int bx4 = t->bx & 31, by4 = t->by & 31;
126 const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
127 const int txw = t_dim->lw, txh = t_dim->lh;
128 int is_split;
130 if (depth < 2 && from > (int) TX_4X4) {
131 const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
132 const int a = t->a->tx[bx4] < txw;
133 const int l = t->l.tx[by4] < txh;
135 is_split = dav1d_msac_decode_bool_adapt(&t->ts->msac,
136 t->ts->cdf.m.txpart[cat][a + l]);
137 if (is_split)
138 masks[depth] |= 1 << (y_off * 4 + x_off);
139 } else {
140 is_split = 0;
143 if (is_split && t_dim->max > TX_8X8) {
144 const enum RectTxfmSize sub = t_dim->sub;
145 const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
146 const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
148 read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
149 t->bx += txsw;
150 if (txw >= txh && t->bx < f->bw)
151 read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
152 t->bx -= txsw;
153 t->by += txsh;
154 if (txh >= txw && t->by < f->bh) {
155 read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
156 t->bx += txsw;
157 if (txw >= txh && t->bx < f->bw)
158 read_tx_tree(t, sub, depth + 1, masks,
159 x_off * 2 + 1, y_off * 2 + 1);
160 t->bx -= txsw;
162 t->by -= txsh;
163 } else {
164 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
165 rep_macro(type, t->dir tx, off, is_split ? TX_4X4 : mul * txh)
166 case_set_upto16(t_dim->h, l., 1, by4);
167 #undef set_ctx
168 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
169 rep_macro(type, t->dir tx, off, is_split ? TX_4X4 : mul * txw)
170 case_set_upto16(t_dim->w, a->, 0, bx4);
171 #undef set_ctx
175 static int neg_deinterleave(int diff, int ref, int max) {
176 if (!ref) return diff;
177 if (ref >= (max - 1)) return max - diff - 1;
178 if (2 * ref < max) {
179 if (diff <= 2 * ref) {
180 if (diff & 1)
181 return ref + ((diff + 1) >> 1);
182 else
183 return ref - (diff >> 1);
185 return diff;
186 } else {
187 if (diff <= 2 * (max - ref - 1)) {
188 if (diff & 1)
189 return ref + ((diff + 1) >> 1);
190 else
191 return ref - (diff >> 1);
193 return max - (diff + 1);
197 static void find_matching_ref(const Dav1dTaskContext *const t,
198 const enum EdgeFlags intra_edge_flags,
199 const int bw4, const int bh4,
200 const int w4, const int h4,
201 const int have_left, const int have_top,
202 const int ref, uint64_t masks[2])
204 /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
205 int count = 0;
206 int have_topleft = have_top && have_left;
207 int have_topright = imax(bw4, bh4) < 32 &&
208 have_top && t->bx + bw4 < t->ts->tiling.col_end &&
209 (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);
211 #define bs(rp) dav1d_block_dimensions[(rp)->bs]
212 #define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
214 if (have_top) {
215 const refmvs_block *r2 = &r[-1][t->bx];
216 if (matches(r2)) {
217 masks[0] |= 1;
218 count = 1;
220 int aw4 = bs(r2)[0];
221 if (aw4 >= bw4) {
222 const int off = t->bx & (aw4 - 1);
223 if (off) have_topleft = 0;
224 if (aw4 - off > bw4) have_topright = 0;
225 } else {
226 unsigned mask = 1 << aw4;
227 for (int x = aw4; x < w4; x += aw4) {
228 r2 += aw4;
229 if (matches(r2)) {
230 masks[0] |= mask;
231 if (++count >= 8) return;
233 aw4 = bs(r2)[0];
234 mask <<= aw4;
238 if (have_left) {
239 /*const*/ refmvs_block *const *r2 = r;
240 if (matches(&r2[0][t->bx - 1])) {
241 masks[1] |= 1;
242 if (++count >= 8) return;
244 int lh4 = bs(&r2[0][t->bx - 1])[1];
245 if (lh4 >= bh4) {
246 if (t->by & (lh4 - 1)) have_topleft = 0;
247 } else {
248 unsigned mask = 1 << lh4;
249 for (int y = lh4; y < h4; y += lh4) {
250 r2 += lh4;
251 if (matches(&r2[0][t->bx - 1])) {
252 masks[1] |= mask;
253 if (++count >= 8) return;
255 lh4 = bs(&r2[0][t->bx - 1])[1];
256 mask <<= lh4;
260 if (have_topleft && matches(&r[-1][t->bx - 1])) {
261 masks[1] |= 1ULL << 32;
262 if (++count >= 8) return;
264 if (have_topright && matches(&r[-1][t->bx + bw4])) {
265 masks[0] |= 1ULL << 32;
267 #undef matches
270 static void derive_warpmv(const Dav1dTaskContext *const t,
271 const int bw4, const int bh4,
272 const uint64_t masks[2], const union mv mv,
273 Dav1dWarpedMotionParams *const wmp)
275 int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
276 /*const*/ refmvs_block *const *r = &t->rt.r[(t->by & 31) + 5];
278 #define add_sample(dx, dy, sx, sy, rp) do { \
279 pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
280 pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
281 pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
282 pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
283 np++; \
284 } while (0)
286 // use masks[] to find the projectable motion vectors in the edges
287 if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) {
288 const int off = t->bx & (bs(&r[-1][t->bx])[0] - 1);
289 add_sample(-off, 0, 1, -1, &r[-1][t->bx]);
290 } else for (unsigned off = 0, xmask = (uint32_t) masks[0]; np < 8 && xmask;) { // top
291 const int tz = ctz(xmask);
292 off += tz;
293 xmask >>= tz;
294 add_sample(off, 0, 1, -1, &r[-1][t->bx + off]);
295 xmask &= ~1;
297 if (np < 8 && masks[1] == 1) {
298 const int off = t->by & (bs(&r[0][t->bx - 1])[1] - 1);
299 add_sample(0, -off, -1, 1, &r[-off][t->bx - 1]);
300 } else for (unsigned off = 0, ymask = (uint32_t) masks[1]; np < 8 && ymask;) { // left
301 const int tz = ctz(ymask);
302 off += tz;
303 ymask >>= tz;
304 add_sample(0, off, -1, 1, &r[off][t->bx - 1]);
305 ymask &= ~1;
307 if (np < 8 && masks[1] >> 32) // top/left
308 add_sample(0, 0, -1, -1, &r[-1][t->bx - 1]);
309 if (np < 8 && masks[0] >> 32) // top/right
310 add_sample(bw4, 0, 1, -1, &r[-1][t->bx + bw4]);
311 assert(np > 0 && np <= 8);
312 #undef bs
314 // select according to motion vector difference against a threshold
315 int mvd[8], ret = 0;
316 const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
317 for (int i = 0; i < np; i++) {
318 mvd[i] = abs(pts[i][1][0] - pts[i][0][0] - mv.x) +
319 abs(pts[i][1][1] - pts[i][0][1] - mv.y);
320 if (mvd[i] > thresh)
321 mvd[i] = -1;
322 else
323 ret++;
325 if (!ret) {
326 ret = 1;
327 } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
328 while (mvd[i] != -1) i++;
329 while (mvd[j] == -1) j--;
330 assert(i != j);
331 if (i > j) break;
332 // replace the discarded samples;
333 mvd[i] = mvd[j];
334 memcpy(pts[i], pts[j], sizeof(*pts));
337 if (!dav1d_find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
338 !dav1d_get_shear_params(wmp))
340 wmp->type = DAV1D_WM_TYPE_AFFINE;
341 } else
342 wmp->type = DAV1D_WM_TYPE_IDENTITY;
345 static inline int findoddzero(const uint8_t *buf, int len) {
346 for (int n = 0; n < len; n++)
347 if (!buf[n * 2]) return 1;
348 return 0;
351 // meant to be SIMD'able, so that theoretical complexity of this function
352 // times block size goes from w4*h4 to w4+h4-1
353 // a and b are previous two lines containing (a) top/left entries or (b)
354 // top/left entries, with a[0] being either the first top or first left entry,
355 // depending on top_offset being 1 or 0, and b being the first top/left entry
356 // for whichever has one. left_offset indicates whether the (len-1)th entry
357 // has a left neighbour.
358 // output is order[] and ctx for each member of this diagonal.
359 static void order_palette(const uint8_t *pal_idx, const ptrdiff_t stride,
360 const int i, const int first, const int last,
361 uint8_t (*const order)[8], uint8_t *const ctx)
363 int have_top = i > first;
365 assert(pal_idx);
366 pal_idx += first + (i - first) * stride;
367 for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
368 const int have_left = j > 0;
370 assert(have_left || have_top);
372 #define add(v_in) do { \
373 const int v = v_in; \
374 assert((unsigned)v < 8U); \
375 order[n][o_idx++] = v; \
376 mask |= 1 << v; \
377 } while (0)
379 unsigned mask = 0;
380 int o_idx = 0;
381 if (!have_left) {
382 ctx[n] = 0;
383 add(pal_idx[-stride]);
384 } else if (!have_top) {
385 ctx[n] = 0;
386 add(pal_idx[-1]);
387 } else {
388 const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
389 const int same_t_l = t == l;
390 const int same_t_tl = t == tl;
391 const int same_l_tl = l == tl;
392 const int same_all = same_t_l & same_t_tl & same_l_tl;
394 if (same_all) {
395 ctx[n] = 4;
396 add(t);
397 } else if (same_t_l) {
398 ctx[n] = 3;
399 add(t);
400 add(tl);
401 } else if (same_t_tl | same_l_tl) {
402 ctx[n] = 2;
403 add(tl);
404 add(same_t_tl ? l : t);
405 } else {
406 ctx[n] = 1;
407 add(imin(t, l));
408 add(imax(t, l));
409 add(tl);
412 for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
413 if (!(mask & m))
414 order[n][o_idx++] = bit;
415 assert(o_idx == 8);
416 #undef add
420 static void read_pal_indices(Dav1dTaskContext *const t,
421 uint8_t *const pal_idx,
422 const Av1Block *const b, const int pl,
423 const int w4, const int h4,
424 const int bw4, const int bh4)
426 Dav1dTileState *const ts = t->ts;
427 const ptrdiff_t stride = bw4 * 4;
428 assert(pal_idx);
429 pixel *const pal_tmp = t->scratch.pal_idx_uv;
430 pal_tmp[0] = dav1d_msac_decode_uniform(&ts->msac, b->pal_sz[pl]);
431 uint16_t (*const color_map_cdf)[8] =
432 ts->cdf.m.color_map[pl][b->pal_sz[pl] - 2];
433 uint8_t (*const order)[8] = t->scratch.pal_order;
434 uint8_t *const ctx = t->scratch.pal_ctx;
435 for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
436 // top/left-to-bottom/right diagonals ("wave-front")
437 const int first = imin(i, w4 * 4 - 1);
438 const int last = imax(0, i - h4 * 4 + 1);
439 order_palette(pal_tmp, stride, i, first, last, order, ctx);
440 for (int j = first, m = 0; j >= last; j--, m++) {
441 const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
442 color_map_cdf[ctx[m]], b->pal_sz[pl] - 1);
443 pal_tmp[(i - j) * stride + j] = order[m][color_idx];
447 t->c->pal_dsp.pal_idx_finish(pal_idx, pal_tmp, bw4 * 4, bh4 * 4,
448 w4 * 4, h4 * 4);
451 static void read_vartx_tree(Dav1dTaskContext *const t,
452 Av1Block *const b, const enum BlockSize bs,
453 const int bx4, const int by4)
455 const Dav1dFrameContext *const f = t->f;
456 const uint8_t *const b_dim = dav1d_block_dimensions[bs];
457 const int bw4 = b_dim[0], bh4 = b_dim[1];
459 // var-tx tree coding
460 uint16_t tx_split[2] = { 0 };
461 b->max_ytx = dav1d_max_txfm_size_for_bs[bs][0];
462 if (!b->skip && (f->frame_hdr->segmentation.lossless[b->seg_id] ||
463 b->max_ytx == TX_4X4))
465 b->max_ytx = b->uvtx = TX_4X4;
466 if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
467 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
468 rep_macro(type, t->dir tx, off, TX_4X4)
469 case_set(bh4, l., 1, by4);
470 case_set(bw4, a->, 0, bx4);
471 #undef set_ctx
473 } else if (f->frame_hdr->txfm_mode != DAV1D_TX_SWITCHABLE || b->skip) {
474 if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE) {
475 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
476 rep_macro(type, t->dir tx, off, mul * b_dim[2 + diridx])
477 case_set(bh4, l., 1, by4);
478 case_set(bw4, a->, 0, bx4);
479 #undef set_ctx
481 b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
482 } else {
483 assert(bw4 <= 16 || bh4 <= 16 || b->max_ytx == TX_64X64);
484 int y, x, y_off, x_off;
485 const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
486 for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
487 for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
488 read_tx_tree(t, b->max_ytx, 0, tx_split, x_off, y_off);
489 // contexts are updated inside read_tx_tree()
490 t->bx += ytx->w;
492 t->bx -= x;
493 t->by += ytx->h;
495 t->by -= y;
496 if (DEBUG_BLOCK_INFO)
497 printf("Post-vartxtree[%x/%x]: r=%d\n",
498 tx_split[0], tx_split[1], t->ts->msac.rng);
499 b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
501 assert(!(tx_split[0] & ~0x33));
502 b->tx_split0 = (uint8_t)tx_split[0];
503 b->tx_split1 = tx_split[1];
506 static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f,
507 const int by, const int bx,
508 const int w4, int h4,
509 const uint8_t *ref_seg_map,
510 const ptrdiff_t stride)
512 assert(f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE);
514 unsigned seg_id = 8;
515 ref_seg_map += by * stride + bx;
516 do {
517 for (int x = 0; x < w4; x++)
518 seg_id = imin(seg_id, ref_seg_map[x]);
519 ref_seg_map += stride;
520 } while (--h4 > 0 && seg_id);
521 assert(seg_id < 8);
523 return seg_id;
526 static inline void splat_oneref_mv(const Dav1dContext *const c,
527 Dav1dTaskContext *const t,
528 const enum BlockSize bs,
529 const Av1Block *const b,
530 const int bw4, const int bh4)
532 const enum InterPredMode mode = b->inter_mode;
533 const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
534 .ref.ref = { b->ref[0] + 1, b->interintra_type ? 0 : -1 },
535 .mv.mv[0] = b->mv[0],
536 .bs = bs,
537 .mf = (mode == GLOBALMV && imin(bw4, bh4) >= 2) | ((mode == NEWMV) * 2),
539 c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
542 static inline void splat_intrabc_mv(const Dav1dContext *const c,
543 Dav1dTaskContext *const t,
544 const enum BlockSize bs,
545 const Av1Block *const b,
546 const int bw4, const int bh4)
548 const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
549 .ref.ref = { 0, -1 },
550 .mv.mv[0] = b->mv[0],
551 .bs = bs,
552 .mf = 0,
554 c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
557 static inline void splat_tworef_mv(const Dav1dContext *const c,
558 Dav1dTaskContext *const t,
559 const enum BlockSize bs,
560 const Av1Block *const b,
561 const int bw4, const int bh4)
563 assert(bw4 >= 2 && bh4 >= 2);
564 const enum CompInterPredMode mode = b->inter_mode;
565 const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
566 .ref.ref = { b->ref[0] + 1, b->ref[1] + 1 },
567 .mv.mv = { b->mv[0], b->mv[1] },
568 .bs = bs,
569 .mf = (mode == GLOBALMV_GLOBALMV) | !!((1 << mode) & (0xbc)) * 2,
571 c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
574 static inline void splat_intraref(const Dav1dContext *const c,
575 Dav1dTaskContext *const t,
576 const enum BlockSize bs,
577 const int bw4, const int bh4)
579 const refmvs_block ALIGN(tmpl, 16) = (refmvs_block) {
580 .ref.ref = { 0, -1 },
581 .mv.mv[0].n = INVALID_MV,
582 .bs = bs,
583 .mf = 0,
585 c->refmvs_dsp.splat_mv(&t->rt.r[(t->by & 31) + 5], &tmpl, t->bx, bw4, bh4);
588 static void mc_lowest_px(int *const dst, const int by4, const int bh4,
589 const int mvy, const int ss_ver,
590 const struct ScalableMotionParams *const smp)
592 const int v_mul = 4 >> ss_ver;
593 if (!smp->scale) {
594 const int my = mvy >> (3 + ss_ver), dy = mvy & (15 >> !ss_ver);
595 *dst = imax(*dst, (by4 + bh4) * v_mul + my + 4 * !!dy);
596 } else {
597 int y = (by4 * v_mul << 4) + mvy * (1 << !ss_ver);
598 const int64_t tmp = (int64_t)(y) * smp->scale + (smp->scale - 0x4000) * 8;
599 y = apply_sign64((int)((llabs(tmp) + 128) >> 8), tmp) + 32;
600 const int bottom = ((y + (bh4 * v_mul - 1) * smp->step) >> 10) + 1 + 4;
601 *dst = imax(*dst, bottom);
605 static ALWAYS_INLINE void affine_lowest_px(Dav1dTaskContext *const t, int *const dst,
606 const uint8_t *const b_dim,
607 const Dav1dWarpedMotionParams *const wmp,
608 const int ss_ver, const int ss_hor)
610 const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
611 assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
612 const int32_t *const mat = wmp->matrix;
613 const int y = b_dim[1] * v_mul - 8; // lowest y
615 const int src_y = t->by * 4 + ((y + 4) << ss_ver);
616 const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
617 // check left- and right-most blocks
618 for (int x = 0; x < b_dim[0] * h_mul; x += imax(8, b_dim[0] * h_mul - 8)) {
619 // calculate transformation relative to center of 8x8 block in
620 // luma pixel units
621 const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
622 const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
623 const int dy = (int) (mvy >> 16) - 4;
624 *dst = imax(*dst, dy + 4 + 8);
628 static NOINLINE void affine_lowest_px_luma(Dav1dTaskContext *const t, int *const dst,
629 const uint8_t *const b_dim,
630 const Dav1dWarpedMotionParams *const wmp)
632 affine_lowest_px(t, dst, b_dim, wmp, 0, 0);
635 static NOINLINE void affine_lowest_px_chroma(Dav1dTaskContext *const t, int *const dst,
636 const uint8_t *const b_dim,
637 const Dav1dWarpedMotionParams *const wmp)
639 const Dav1dFrameContext *const f = t->f;
640 assert(f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400);
641 if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I444)
642 affine_lowest_px_luma(t, dst, b_dim, wmp);
643 else
644 affine_lowest_px(t, dst, b_dim, wmp, f->cur.p.layout & DAV1D_PIXEL_LAYOUT_I420, 1);
647 static void obmc_lowest_px(Dav1dTaskContext *const t,
648 int (*const dst)[2], const int is_chroma,
649 const uint8_t *const b_dim,
650 const int bx4, const int by4, const int w4, const int h4)
652 assert(!(t->bx & 1) && !(t->by & 1));
653 const Dav1dFrameContext *const f = t->f;
654 /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
655 const int ss_ver = is_chroma && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
656 const int ss_hor = is_chroma && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
657 const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
659 if (t->by > t->ts->tiling.row_start &&
660 (!is_chroma || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
662 for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
663 // only odd blocks are considered for overlap handling, hence +1
664 const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
665 const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];
667 if (a_r->ref.ref[0] > 0) {
668 const int oh4 = imin(b_dim[1], 16) >> 1;
669 mc_lowest_px(&dst[a_r->ref.ref[0] - 1][is_chroma], t->by,
670 (oh4 * 3 + 3) >> 2, a_r->mv.mv[0].y, ss_ver,
671 &f->svc[a_r->ref.ref[0] - 1][1]);
672 i++;
674 x += imax(a_b_dim[0], 2);
678 if (t->bx > t->ts->tiling.col_start)
679 for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
680 // only odd blocks are considered for overlap handling, hence +1
681 const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
682 const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];
684 if (l_r->ref.ref[0] > 0) {
685 const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]);
686 mc_lowest_px(&dst[l_r->ref.ref[0] - 1][is_chroma],
687 t->by + y, oh4, l_r->mv.mv[0].y, ss_ver,
688 &f->svc[l_r->ref.ref[0] - 1][1]);
689 i++;
691 y += imax(l_b_dim[1], 2);
695 static int decode_b(Dav1dTaskContext *const t,
696 const enum BlockLevel bl,
697 const enum BlockSize bs,
698 const enum BlockPartition bp,
699 const enum EdgeFlags intra_edge_flags)
701 Dav1dTileState *const ts = t->ts;
702 const Dav1dFrameContext *const f = t->f;
703 Av1Block b_mem, *const b = t->frame_thread.pass ?
704 &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
705 const uint8_t *const b_dim = dav1d_block_dimensions[bs];
706 const int bx4 = t->bx & 31, by4 = t->by & 31;
707 const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
708 const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
709 const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
710 const int bw4 = b_dim[0], bh4 = b_dim[1];
711 const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
712 const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
713 const int have_left = t->bx > ts->tiling.col_start;
714 const int have_top = t->by > ts->tiling.row_start;
715 const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
716 (bw4 > ss_hor || t->bx & 1) &&
717 (bh4 > ss_ver || t->by & 1);
719 if (t->frame_thread.pass == 2) {
720 if (b->intra) {
721 f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
723 const enum IntraPredMode y_mode_nofilt =
724 b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
725 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
726 rep_macro(type, t->dir mode, off, mul * y_mode_nofilt); \
727 rep_macro(type, t->dir intra, off, mul)
728 case_set(bh4, l., 1, by4);
729 case_set(bw4, a->, 0, bx4);
730 #undef set_ctx
731 if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
732 refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
733 for (int x = 0; x < bw4; x++) {
734 r[x].ref.ref[0] = 0;
735 r[x].bs = bs;
737 refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
738 for (int y = 0; y < bh4 - 1; y++) {
739 rr[y][t->bx + bw4 - 1].ref.ref[0] = 0;
740 rr[y][t->bx + bw4 - 1].bs = bs;
744 if (has_chroma) {
745 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
746 rep_macro(type, t->dir uvmode, off, mul * b->uv_mode)
747 case_set(cbh4, l., 1, cby4);
748 case_set(cbw4, a->, 0, cbx4);
749 #undef set_ctx
751 } else {
752 if (IS_INTER_OR_SWITCH(f->frame_hdr) /* not intrabc */ &&
753 b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP)
755 if (b->matrix[0] == SHRT_MIN) {
756 t->warpmv.type = DAV1D_WM_TYPE_IDENTITY;
757 } else {
758 t->warpmv.type = DAV1D_WM_TYPE_AFFINE;
759 t->warpmv.matrix[2] = b->matrix[0] + 0x10000;
760 t->warpmv.matrix[3] = b->matrix[1];
761 t->warpmv.matrix[4] = b->matrix[2];
762 t->warpmv.matrix[5] = b->matrix[3] + 0x10000;
763 dav1d_set_affine_mv2d(bw4, bh4, b->mv2d, &t->warpmv,
764 t->bx, t->by);
765 dav1d_get_shear_params(&t->warpmv);
766 #define signabs(v) v < 0 ? '-' : ' ', abs(v)
767 if (DEBUG_BLOCK_INFO)
768 printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n"
769 "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n",
770 signabs(t->warpmv.matrix[0]),
771 signabs(t->warpmv.matrix[1]),
772 signabs(t->warpmv.matrix[2]),
773 signabs(t->warpmv.matrix[3]),
774 signabs(t->warpmv.matrix[4]),
775 signabs(t->warpmv.matrix[5]),
776 signabs(t->warpmv.u.p.alpha),
777 signabs(t->warpmv.u.p.beta),
778 signabs(t->warpmv.u.p.gamma),
779 signabs(t->warpmv.u.p.delta),
780 b->mv2d.y, b->mv2d.x);
781 #undef signabs
784 if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
786 const uint8_t *const filter = dav1d_filter_dir[b->filter2d];
787 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
788 rep_macro(type, t->dir filter[0], off, mul * filter[0]); \
789 rep_macro(type, t->dir filter[1], off, mul * filter[1]); \
790 rep_macro(type, t->dir intra, off, 0)
791 case_set(bh4, l., 1, by4);
792 case_set(bw4, a->, 0, bx4);
793 #undef set_ctx
795 if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
796 refmvs_block *const r = &t->rt.r[(t->by & 31) + 5 + bh4 - 1][t->bx];
797 for (int x = 0; x < bw4; x++) {
798 r[x].ref.ref[0] = b->ref[0] + 1;
799 r[x].mv.mv[0] = b->mv[0];
800 r[x].bs = bs;
802 refmvs_block *const *rr = &t->rt.r[(t->by & 31) + 5];
803 for (int y = 0; y < bh4 - 1; y++) {
804 rr[y][t->bx + bw4 - 1].ref.ref[0] = b->ref[0] + 1;
805 rr[y][t->bx + bw4 - 1].mv.mv[0] = b->mv[0];
806 rr[y][t->bx + bw4 - 1].bs = bs;
810 if (has_chroma) {
811 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
812 rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
813 case_set(cbh4, l., 1, cby4);
814 case_set(cbw4, a->, 0, cbx4);
815 #undef set_ctx
818 return 0;
821 const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
823 b->bl = bl;
824 b->bp = bp;
825 b->bs = bs;
827 const Dav1dSegmentationData *seg = NULL;
829 // segment_id (if seg_feature for skip/ref/gmv is enabled)
830 int seg_pred = 0;
831 if (f->frame_hdr->segmentation.enabled) {
832 if (!f->frame_hdr->segmentation.update_map) {
833 if (f->prev_segmap) {
834 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
835 f->prev_segmap,
836 f->b4_stride);
837 if (seg_id >= 8) return -1;
838 b->seg_id = seg_id;
839 } else {
840 b->seg_id = 0;
842 seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
843 } else if (f->frame_hdr->segmentation.seg_data.preskip) {
844 if (f->frame_hdr->segmentation.temporal &&
845 (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
846 ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
847 t->l.seg_pred[by4]])))
849 // temporal predicted seg_id
850 if (f->prev_segmap) {
851 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx,
852 w4, h4,
853 f->prev_segmap,
854 f->b4_stride);
855 if (seg_id >= 8) return -1;
856 b->seg_id = seg_id;
857 } else {
858 b->seg_id = 0;
860 } else {
861 int seg_ctx;
862 const unsigned pred_seg_id =
863 get_cur_frame_segid(t->by, t->bx, have_top, have_left,
864 &seg_ctx, f->cur_segmap, f->b4_stride);
865 const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
866 ts->cdf.m.seg_id[seg_ctx],
867 DAV1D_MAX_SEGMENTS - 1);
868 const unsigned last_active_seg_id =
869 f->frame_hdr->segmentation.seg_data.last_active_segid;
870 b->seg_id = neg_deinterleave(diff, pred_seg_id,
871 last_active_seg_id + 1);
872 if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
873 if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
876 if (DEBUG_BLOCK_INFO)
877 printf("Post-segid[preskip;%d]: r=%d\n",
878 b->seg_id, ts->msac.rng);
880 seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
882 } else {
883 b->seg_id = 0;
886 // skip_mode
887 if ((!seg || (!seg->globalmv && seg->ref == -1 && !seg->skip)) &&
888 f->frame_hdr->skip_mode_enabled && imin(bw4, bh4) > 1)
890 const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
891 b->skip_mode = dav1d_msac_decode_bool_adapt(&ts->msac,
892 ts->cdf.m.skip_mode[smctx]);
893 if (DEBUG_BLOCK_INFO)
894 printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
895 } else {
896 b->skip_mode = 0;
899 // skip
900 if (b->skip_mode || (seg && seg->skip)) {
901 b->skip = 1;
902 } else {
903 const int sctx = t->a->skip[bx4] + t->l.skip[by4];
904 b->skip = dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
905 if (DEBUG_BLOCK_INFO)
906 printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
909 // segment_id
910 if (f->frame_hdr->segmentation.enabled &&
911 f->frame_hdr->segmentation.update_map &&
912 !f->frame_hdr->segmentation.seg_data.preskip)
914 if (!b->skip && f->frame_hdr->segmentation.temporal &&
915 (seg_pred = dav1d_msac_decode_bool_adapt(&ts->msac,
916 ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
917 t->l.seg_pred[by4]])))
919 // temporal predicted seg_id
920 if (f->prev_segmap) {
921 unsigned seg_id = get_prev_frame_segid(f, t->by, t->bx, w4, h4,
922 f->prev_segmap,
923 f->b4_stride);
924 if (seg_id >= 8) return -1;
925 b->seg_id = seg_id;
926 } else {
927 b->seg_id = 0;
929 } else {
930 int seg_ctx;
931 const unsigned pred_seg_id =
932 get_cur_frame_segid(t->by, t->bx, have_top, have_left,
933 &seg_ctx, f->cur_segmap, f->b4_stride);
934 if (b->skip) {
935 b->seg_id = pred_seg_id;
936 } else {
937 const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
938 ts->cdf.m.seg_id[seg_ctx],
939 DAV1D_MAX_SEGMENTS - 1);
940 const unsigned last_active_seg_id =
941 f->frame_hdr->segmentation.seg_data.last_active_segid;
942 b->seg_id = neg_deinterleave(diff, pred_seg_id,
943 last_active_seg_id + 1);
944 if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
946 if (b->seg_id >= DAV1D_MAX_SEGMENTS) b->seg_id = 0; // error?
949 seg = &f->frame_hdr->segmentation.seg_data.d[b->seg_id];
951 if (DEBUG_BLOCK_INFO)
952 printf("Post-segid[postskip;%d]: r=%d\n",
953 b->seg_id, ts->msac.rng);
956 // cdef index
957 if (!b->skip) {
958 const int idx = f->seq_hdr->sb128 ? ((t->bx & 16) >> 4) +
959 ((t->by & 16) >> 3) : 0;
960 if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
961 const int v = dav1d_msac_decode_bools(&ts->msac,
962 f->frame_hdr->cdef.n_bits);
963 t->cur_sb_cdef_idx_ptr[idx] = v;
964 if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
965 if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
966 if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
968 if (DEBUG_BLOCK_INFO)
969 printf("Post-cdef_idx[%d]: r=%d\n",
970 *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
974 // delta-q/lf
975 if (!(t->bx & (31 >> !f->seq_hdr->sb128)) &&
976 !(t->by & (31 >> !f->seq_hdr->sb128)))
978 const int prev_qidx = ts->last_qidx;
979 const int have_delta_q = f->frame_hdr->delta.q.present &&
980 (bs != (f->seq_hdr->sb128 ? BS_128x128 : BS_64x64) || !b->skip);
982 uint32_t prev_delta_lf = ts->last_delta_lf.u32;
984 if (have_delta_q) {
985 int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
986 ts->cdf.m.delta_q, 3);
987 if (delta_q == 3) {
988 const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
989 delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
990 1 + (1 << n_bits);
992 if (delta_q) {
993 if (dav1d_msac_decode_bool_equi(&ts->msac)) delta_q = -delta_q;
994 delta_q *= 1 << f->frame_hdr->delta.q.res_log2;
996 ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
997 if (have_delta_q && DEBUG_BLOCK_INFO)
998 printf("Post-delta_q[%d->%d]: r=%d\n",
999 delta_q, ts->last_qidx, ts->msac.rng);
1001 if (f->frame_hdr->delta.lf.present) {
1002 const int n_lfs = f->frame_hdr->delta.lf.multi ?
1003 f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
1005 for (int i = 0; i < n_lfs; i++) {
1006 int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1007 ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
1008 if (delta_lf == 3) {
1009 const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
1010 delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
1011 1 + (1 << n_bits);
1013 if (delta_lf) {
1014 if (dav1d_msac_decode_bool_equi(&ts->msac))
1015 delta_lf = -delta_lf;
1016 delta_lf *= 1 << f->frame_hdr->delta.lf.res_log2;
1018 ts->last_delta_lf.i8[i] =
1019 iclip(ts->last_delta_lf.i8[i] + delta_lf, -63, 63);
1020 if (have_delta_q && DEBUG_BLOCK_INFO)
1021 printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf,
1022 ts->msac.rng);
1026 if (ts->last_qidx == f->frame_hdr->quant.yac) {
1027 // assign frame-wide q values to this sb
1028 ts->dq = f->dq;
1029 } else if (ts->last_qidx != prev_qidx) {
1030 // find sb-specific quant parameters
1031 init_quant_tables(f->seq_hdr, f->frame_hdr, ts->last_qidx, ts->dqmem);
1032 ts->dq = ts->dqmem;
1034 if (!ts->last_delta_lf.u32) {
1035 // assign frame-wide lf values to this sb
1036 ts->lflvl = f->lf.lvl;
1037 } else if (ts->last_delta_lf.u32 != prev_delta_lf) {
1038 // find sb-specific lf lvl parameters
1039 ts->lflvl = ts->lflvlmem;
1040 dav1d_calc_lf_values(ts->lflvlmem, f->frame_hdr, ts->last_delta_lf.i8);
1044 if (b->skip_mode) {
1045 b->intra = 0;
1046 } else if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
1047 if (seg && (seg->ref >= 0 || seg->globalmv)) {
1048 b->intra = !seg->ref;
1049 } else {
1050 const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
1051 have_top, have_left);
1052 b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac,
1053 ts->cdf.m.intra[ictx]);
1054 if (DEBUG_BLOCK_INFO)
1055 printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng);
1057 } else if (f->frame_hdr->allow_intrabc) {
1058 b->intra = !dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
1059 if (DEBUG_BLOCK_INFO)
1060 printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng);
1061 } else {
1062 b->intra = 1;
1065 // intra/inter-specific stuff
1066 if (b->intra) {
1067 uint16_t *const ymode_cdf = IS_INTER_OR_SWITCH(f->frame_hdr) ?
1068 ts->cdf.m.y_mode[dav1d_ymode_size_context[bs]] :
1069 ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
1070 [dav1d_intra_mode_context[t->l.mode[by4]]];
1071 b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
1072 N_INTRA_PRED_MODES - 1);
1073 if (DEBUG_BLOCK_INFO)
1074 printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);
1076 // angle delta
1077 if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
1078 b->y_mode <= VERT_LEFT_PRED)
1080 uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
1081 const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
1082 b->y_angle = angle - 3;
1083 } else {
1084 b->y_angle = 0;
1087 if (has_chroma) {
1088 const int cfl_allowed = f->frame_hdr->segmentation.lossless[b->seg_id] ?
1089 cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
1090 uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
1091 b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
1092 N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
1093 if (DEBUG_BLOCK_INFO)
1094 printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);
1096 b->uv_angle = 0;
1097 if (b->uv_mode == CFL_PRED) {
1098 #define SIGN(a) (!!(a) + ((a) > 0))
1099 const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
1100 ts->cdf.m.cfl_sign, 7) + 1;
1101 const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
1102 assert(sign_u == sign / 3);
1103 if (sign_u) {
1104 const int ctx = (sign_u == 2) * 3 + sign_v;
1105 b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1106 ts->cdf.m.cfl_alpha[ctx], 15) + 1;
1107 if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
1108 } else {
1109 b->cfl_alpha[0] = 0;
1111 if (sign_v) {
1112 const int ctx = (sign_v == 2) * 3 + sign_u;
1113 b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1114 ts->cdf.m.cfl_alpha[ctx], 15) + 1;
1115 if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
1116 } else {
1117 b->cfl_alpha[1] = 0;
1119 #undef SIGN
1120 if (DEBUG_BLOCK_INFO)
1121 printf("Post-uvalphas[%d/%d]: r=%d\n",
1122 b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
1123 } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
1124 b->uv_mode <= VERT_LEFT_PRED)
1126 uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
1127 const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
1128 b->uv_angle = angle - 3;
1132 b->pal_sz[0] = b->pal_sz[1] = 0;
1133 if (f->frame_hdr->allow_screen_content_tools &&
1134 imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
1136 const int sz_ctx = b_dim[2] + b_dim[3] - 2;
1137 if (b->y_mode == DC_PRED) {
1138 const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
1139 const int use_y_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
1140 ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
1141 if (DEBUG_BLOCK_INFO)
1142 printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng);
1143 if (use_y_pal)
1144 f->bd_fn.read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
1147 if (has_chroma && b->uv_mode == DC_PRED) {
1148 const int pal_ctx = b->pal_sz[0] > 0;
1149 const int use_uv_pal = dav1d_msac_decode_bool_adapt(&ts->msac,
1150 ts->cdf.m.pal_uv[pal_ctx]);
1151 if (DEBUG_BLOCK_INFO)
1152 printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
1153 if (use_uv_pal) // see aomedia bug 2183 for why we use luma coordinates
1154 f->bd_fn.read_pal_uv(t, b, sz_ctx, bx4, by4);
1158 if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
1159 imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr->filter_intra)
1161 const int is_filter = dav1d_msac_decode_bool_adapt(&ts->msac,
1162 ts->cdf.m.use_filter_intra[bs]);
1163 if (is_filter) {
1164 b->y_mode = FILTER_PRED;
1165 b->y_angle = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1166 ts->cdf.m.filter_intra, 4);
1168 if (DEBUG_BLOCK_INFO)
1169 printf("Post-filterintramode[%d/%d]: r=%d\n",
1170 b->y_mode, b->y_angle, ts->msac.rng);
1173 if (b->pal_sz[0]) {
1174 uint8_t *pal_idx;
1175 if (t->frame_thread.pass) {
1176 const int p = t->frame_thread.pass & 1;
1177 assert(ts->frame_thread[p].pal_idx);
1178 pal_idx = ts->frame_thread[p].pal_idx;
1179 ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
1180 } else
1181 pal_idx = t->scratch.pal_idx_y;
1182 read_pal_indices(t, pal_idx, b, 0, w4, h4, bw4, bh4);
1183 if (DEBUG_BLOCK_INFO)
1184 printf("Post-y-pal-indices: r=%d\n", ts->msac.rng);
1187 if (has_chroma && b->pal_sz[1]) {
1188 uint8_t *pal_idx;
1189 if (t->frame_thread.pass) {
1190 const int p = t->frame_thread.pass & 1;
1191 assert(ts->frame_thread[p].pal_idx);
1192 pal_idx = ts->frame_thread[p].pal_idx;
1193 ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
1194 } else
1195 pal_idx = t->scratch.pal_idx_uv;
1196 read_pal_indices(t, pal_idx, b, 1, cw4, ch4, cbw4, cbh4);
1197 if (DEBUG_BLOCK_INFO)
1198 printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng);
1201 const TxfmInfo *t_dim;
1202 if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
1203 b->tx = b->uvtx = (int) TX_4X4;
1204 t_dim = &dav1d_txfm_dimensions[TX_4X4];
1205 } else {
1206 b->tx = dav1d_max_txfm_size_for_bs[bs][0];
1207 b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
1208 t_dim = &dav1d_txfm_dimensions[b->tx];
1209 if (f->frame_hdr->txfm_mode == DAV1D_TX_SWITCHABLE && t_dim->max > TX_4X4) {
1210 const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
1211 uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
1212 int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
1213 imin(t_dim->max, 2));
1215 while (depth--) {
1216 b->tx = t_dim->sub;
1217 t_dim = &dav1d_txfm_dimensions[b->tx];
1220 if (DEBUG_BLOCK_INFO)
1221 printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng);
1224 // reconstruction
1225 if (t->frame_thread.pass == 1) {
1226 f->bd_fn.read_coef_blocks(t, bs, b);
1227 } else {
1228 f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
1231 if (f->frame_hdr->loopfilter.level_y[0] ||
1232 f->frame_hdr->loopfilter.level_y[1])
1234 dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
1235 (const uint8_t (*)[8][2])
1236 &ts->lflvl[b->seg_id][0][0][0],
1237 t->bx, t->by, f->w4, f->h4, bs,
1238 b->tx, b->uvtx, f->cur.p.layout,
1239 &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
1240 has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
1241 has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
1244 // update contexts
1245 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1246 rep_macro(type, t->dir tx_intra, off, mul * (((uint8_t *) &t_dim->lw)[diridx])); \
1247 rep_macro(type, t->dir tx, off, mul * (((uint8_t *) &t_dim->lw)[diridx])); \
1248 rep_macro(type, t->dir mode, off, mul * y_mode_nofilt); \
1249 rep_macro(type, t->dir pal_sz, off, mul * b->pal_sz[0]); \
1250 rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1251 rep_macro(type, t->dir skip_mode, off, 0); \
1252 rep_macro(type, t->dir intra, off, mul); \
1253 rep_macro(type, t->dir skip, off, mul * b->skip); \
1254 /* see aomedia bug 2183 for why we use luma coordinates here */ \
1255 rep_macro(type, t->pal_sz_uv[diridx], off, mul * (has_chroma ? b->pal_sz[1] : 0)); \
1256 if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
1257 rep_macro(type, t->dir comp_type, off, mul * COMP_INTER_NONE); \
1258 rep_macro(type, t->dir ref[0], off, mul * ((uint8_t) -1)); \
1259 rep_macro(type, t->dir ref[1], off, mul * ((uint8_t) -1)); \
1260 rep_macro(type, t->dir filter[0], off, mul * DAV1D_N_SWITCHABLE_FILTERS); \
1261 rep_macro(type, t->dir filter[1], off, mul * DAV1D_N_SWITCHABLE_FILTERS); \
1263 const enum IntraPredMode y_mode_nofilt =
1264 b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
1265 case_set(bh4, l., 1, by4);
1266 case_set(bw4, a->, 0, bx4);
1267 #undef set_ctx
1268 if (b->pal_sz[0])
1269 f->bd_fn.copy_pal_block_y(t, bx4, by4, bw4, bh4);
1270 if (has_chroma) {
1271 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1272 rep_macro(type, t->dir uvmode, off, mul * b->uv_mode)
1273 case_set(cbh4, l., 1, cby4);
1274 case_set(cbw4, a->, 0, cbx4);
1275 #undef set_ctx
1276 if (b->pal_sz[1])
1277 f->bd_fn.copy_pal_block_uv(t, bx4, by4, bw4, bh4);
1279 if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc)
1280 splat_intraref(f->c, t, bs, bw4, bh4);
1281 } else if (IS_KEY_OR_INTRA(f->frame_hdr)) {
1282 // intra block copy
1283 refmvs_candidate mvstack[8];
1284 int n_mvs, ctx;
1285 dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1286 (union refmvs_refpair) { .ref = { 0, -1 }},
1287 bs, intra_edge_flags, t->by, t->bx);
1289 if (mvstack[0].mv.mv[0].n)
1290 b->mv[0] = mvstack[0].mv.mv[0];
1291 else if (mvstack[1].mv.mv[0].n)
1292 b->mv[0] = mvstack[1].mv.mv[0];
1293 else {
1294 if (t->by - (16 << f->seq_hdr->sb128) < ts->tiling.row_start) {
1295 b->mv[0].y = 0;
1296 b->mv[0].x = -(512 << f->seq_hdr->sb128) - 2048;
1297 } else {
1298 b->mv[0].y = -(512 << f->seq_hdr->sb128);
1299 b->mv[0].x = 0;
1303 const union mv ref = b->mv[0];
1304 read_mv_residual(ts, &b->mv[0], -1);
1306 // clip intrabc motion vector to decoded parts of current tile
1307 int border_left = ts->tiling.col_start * 4;
1308 int border_top = ts->tiling.row_start * 4;
1309 if (has_chroma) {
1310 if (bw4 < 2 && ss_hor)
1311 border_left += 4;
1312 if (bh4 < 2 && ss_ver)
1313 border_top += 4;
1315 int src_left = t->bx * 4 + (b->mv[0].x >> 3);
1316 int src_top = t->by * 4 + (b->mv[0].y >> 3);
1317 int src_right = src_left + bw4 * 4;
1318 int src_bottom = src_top + bh4 * 4;
1319 const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4;
1321 // check against left or right tile boundary and adjust if necessary
1322 if (src_left < border_left) {
1323 src_right += border_left - src_left;
1324 src_left += border_left - src_left;
1325 } else if (src_right > border_right) {
1326 src_left -= src_right - border_right;
1327 src_right -= src_right - border_right;
1329 // check against top tile boundary and adjust if necessary
1330 if (src_top < border_top) {
1331 src_bottom += border_top - src_top;
1332 src_top += border_top - src_top;
1335 const int sbx = (t->bx >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
1336 const int sby = (t->by >> (4 + f->seq_hdr->sb128)) << (6 + f->seq_hdr->sb128);
1337 const int sb_size = 1 << (6 + f->seq_hdr->sb128);
1338 // check for overlap with current superblock
1339 if (src_bottom > sby && src_right > sbx) {
1340 if (src_top - border_top >= src_bottom - sby) {
1341 // if possible move src up into the previous suberblock row
1342 src_top -= src_bottom - sby;
1343 src_bottom -= src_bottom - sby;
1344 } else if (src_left - border_left >= src_right - sbx) {
1345 // if possible move src left into the previous suberblock
1346 src_left -= src_right - sbx;
1347 src_right -= src_right - sbx;
1350 // move src up if it is below current superblock row
1351 if (src_bottom > sby + sb_size) {
1352 src_top -= src_bottom - (sby + sb_size);
1353 src_bottom -= src_bottom - (sby + sb_size);
1355 // error out if mv still overlaps with the current superblock
1356 if (src_bottom > sby && src_right > sbx)
1357 return -1;
1359 b->mv[0].x = (src_left - t->bx * 4) * 8;
1360 b->mv[0].y = (src_top - t->by * 4) * 8;
1362 if (DEBUG_BLOCK_INFO)
1363 printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
1364 b->mv[0].y, b->mv[0].x, ref.y, ref.x,
1365 mvstack[0].mv.mv[0].y, mvstack[0].mv.mv[0].x, ts->msac.rng);
1366 read_vartx_tree(t, b, bs, bx4, by4);
1368 // reconstruction
1369 if (t->frame_thread.pass == 1) {
1370 f->bd_fn.read_coef_blocks(t, bs, b);
1371 b->filter2d = FILTER_2D_BILINEAR;
1372 } else {
1373 if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
1376 splat_intrabc_mv(f->c, t, bs, b, bw4, bh4);
1378 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1379 rep_macro(type, t->dir tx_intra, off, mul * b_dim[2 + diridx]); \
1380 rep_macro(type, t->dir mode, off, mul * DC_PRED); \
1381 rep_macro(type, t->dir pal_sz, off, 0); \
1382 /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
1383 rep_macro(type, t->pal_sz_uv[diridx], off, 0); \
1384 rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1385 rep_macro(type, t->dir skip_mode, off, 0); \
1386 rep_macro(type, t->dir intra, off, 0); \
1387 rep_macro(type, t->dir skip, off, mul * b->skip)
1388 case_set(bh4, l., 1, by4);
1389 case_set(bw4, a->, 0, bx4);
1390 #undef set_ctx
1391 if (has_chroma) {
1392 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1393 rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
1394 case_set(cbh4, l., 1, cby4);
1395 case_set(cbw4, a->, 0, cbx4);
1396 #undef set_ctx
1398 } else {
1399 // inter-specific mode/mv coding
1400 int is_comp, has_subpel_filter;
1402 if (b->skip_mode) {
1403 is_comp = 1;
1404 } else if ((!seg || (seg->ref == -1 && !seg->globalmv && !seg->skip)) &&
1405 f->frame_hdr->switchable_comp_refs && imin(bw4, bh4) > 1)
1407 const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
1408 have_top, have_left);
1409 is_comp = dav1d_msac_decode_bool_adapt(&ts->msac,
1410 ts->cdf.m.comp[ctx]);
1411 if (DEBUG_BLOCK_INFO)
1412 printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng);
1413 } else {
1414 is_comp = 0;
1417 if (b->skip_mode) {
1418 b->ref[0] = f->frame_hdr->skip_mode_refs[0];
1419 b->ref[1] = f->frame_hdr->skip_mode_refs[1];
1420 b->comp_type = COMP_INTER_AVG;
1421 b->inter_mode = NEARESTMV_NEARESTMV;
1422 b->drl_idx = NEAREST_DRL;
1423 has_subpel_filter = 0;
1425 refmvs_candidate mvstack[8];
1426 int n_mvs, ctx;
1427 dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1428 (union refmvs_refpair) { .ref = {
1429 b->ref[0] + 1, b->ref[1] + 1 }},
1430 bs, intra_edge_flags, t->by, t->bx);
1432 b->mv[0] = mvstack[0].mv.mv[0];
1433 b->mv[1] = mvstack[0].mv.mv[1];
1434 fix_mv_precision(f->frame_hdr, &b->mv[0]);
1435 fix_mv_precision(f->frame_hdr, &b->mv[1]);
1436 if (DEBUG_BLOCK_INFO)
1437 printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
1438 b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
1439 b->ref[0], b->ref[1]);
1440 } else if (is_comp) {
1441 const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
1442 have_top, have_left);
1443 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1444 ts->cdf.m.comp_dir[dir_ctx]))
1446 // bidir - first reference (fw)
1447 const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
1448 have_top, have_left);
1449 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1450 ts->cdf.m.comp_fwd_ref[0][ctx1]))
1452 const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
1453 have_top, have_left);
1454 b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
1455 ts->cdf.m.comp_fwd_ref[2][ctx2]);
1456 } else {
1457 const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
1458 have_top, have_left);
1459 b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
1460 ts->cdf.m.comp_fwd_ref[1][ctx2]);
1463 // second reference (bw)
1464 const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
1465 have_top, have_left);
1466 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1467 ts->cdf.m.comp_bwd_ref[0][ctx3]))
1469 b->ref[1] = 6;
1470 } else {
1471 const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
1472 have_top, have_left);
1473 b->ref[1] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
1474 ts->cdf.m.comp_bwd_ref[1][ctx4]);
1476 } else {
1477 // unidir
1478 const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
1479 have_top, have_left);
1480 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1481 ts->cdf.m.comp_uni_ref[0][uctx_p]))
1483 b->ref[0] = 4;
1484 b->ref[1] = 6;
1485 } else {
1486 const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
1487 have_top, have_left);
1488 b->ref[0] = 0;
1489 b->ref[1] = 1 + dav1d_msac_decode_bool_adapt(&ts->msac,
1490 ts->cdf.m.comp_uni_ref[1][uctx_p1]);
1491 if (b->ref[1] == 2) {
1492 const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
1493 have_top, have_left);
1494 b->ref[1] += dav1d_msac_decode_bool_adapt(&ts->msac,
1495 ts->cdf.m.comp_uni_ref[2][uctx_p2]);
1499 if (DEBUG_BLOCK_INFO)
1500 printf("Post-refs[%d/%d]: r=%d\n",
1501 b->ref[0], b->ref[1], ts->msac.rng);
1503 refmvs_candidate mvstack[8];
1504 int n_mvs, ctx;
1505 dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1506 (union refmvs_refpair) { .ref = {
1507 b->ref[0] + 1, b->ref[1] + 1 }},
1508 bs, intra_edge_flags, t->by, t->bx);
1510 b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
1511 ts->cdf.m.comp_inter_mode[ctx],
1512 N_COMP_INTER_PRED_MODES - 1);
1513 if (DEBUG_BLOCK_INFO)
1514 printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
1515 b->inter_mode, ctx, n_mvs, ts->msac.rng);
1517 const uint8_t *const im = dav1d_comp_inter_pred_modes[b->inter_mode];
1518 b->drl_idx = NEAREST_DRL;
1519 if (b->inter_mode == NEWMV_NEWMV) {
1520 if (n_mvs > 1) { // NEARER, NEAR or NEARISH
1521 const int drl_ctx_v1 = get_drl_context(mvstack, 0);
1522 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1523 ts->cdf.m.drl_bit[drl_ctx_v1]);
1524 if (b->drl_idx == NEARER_DRL && n_mvs > 2) {
1525 const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1526 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1527 ts->cdf.m.drl_bit[drl_ctx_v2]);
1529 if (DEBUG_BLOCK_INFO)
1530 printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
1531 b->drl_idx, n_mvs, ts->msac.rng);
1533 } else if (im[0] == NEARMV || im[1] == NEARMV) {
1534 b->drl_idx = NEARER_DRL;
1535 if (n_mvs > 2) { // NEAR or NEARISH
1536 const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1537 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1538 ts->cdf.m.drl_bit[drl_ctx_v2]);
1539 if (b->drl_idx == NEAR_DRL && n_mvs > 3) {
1540 const int drl_ctx_v3 = get_drl_context(mvstack, 2);
1541 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1542 ts->cdf.m.drl_bit[drl_ctx_v3]);
1544 if (DEBUG_BLOCK_INFO)
1545 printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
1546 b->drl_idx, n_mvs, ts->msac.rng);
1549 assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1551 #define assign_comp_mv(idx) \
1552 switch (im[idx]) { \
1553 case NEARMV: \
1554 case NEARESTMV: \
1555 b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
1556 fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
1557 break; \
1558 case GLOBALMV: \
1559 has_subpel_filter |= \
1560 f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
1561 b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
1562 t->bx, t->by, bw4, bh4, f->frame_hdr); \
1563 break; \
1564 case NEWMV: \
1565 b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
1566 const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
1567 read_mv_residual(ts, &b->mv[idx], mv_prec); \
1568 break; \
1570 has_subpel_filter = imin(bw4, bh4) == 1 ||
1571 b->inter_mode != GLOBALMV_GLOBALMV;
1572 assign_comp_mv(0);
1573 assign_comp_mv(1);
1574 #undef assign_comp_mv
1575 if (DEBUG_BLOCK_INFO)
1576 printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n",
1577 b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
1578 ts->msac.rng);
1580 // jnt_comp vs. seg vs. wedge
1581 int is_segwedge = 0;
1582 if (f->seq_hdr->masked_compound) {
1583 const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4);
1585 is_segwedge = dav1d_msac_decode_bool_adapt(&ts->msac,
1586 ts->cdf.m.mask_comp[mask_ctx]);
1587 if (DEBUG_BLOCK_INFO)
1588 printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n",
1589 is_segwedge, mask_ctx, ts->msac.rng);
1592 if (!is_segwedge) {
1593 if (f->seq_hdr->jnt_comp) {
1594 const int jnt_ctx =
1595 get_jnt_comp_ctx(f->seq_hdr->order_hint_n_bits,
1596 f->cur.frame_hdr->frame_offset,
1597 f->refp[b->ref[0]].p.frame_hdr->frame_offset,
1598 f->refp[b->ref[1]].p.frame_hdr->frame_offset,
1599 t->a, &t->l, by4, bx4);
1600 b->comp_type = COMP_INTER_WEIGHTED_AVG +
1601 dav1d_msac_decode_bool_adapt(&ts->msac,
1602 ts->cdf.m.jnt_comp[jnt_ctx]);
1603 if (DEBUG_BLOCK_INFO)
1604 printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n",
1605 b->comp_type == COMP_INTER_AVG,
1606 jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4],
1607 t->l.comp_type[by4], t->l.ref[0][by4],
1608 ts->msac.rng);
1609 } else {
1610 b->comp_type = COMP_INTER_AVG;
1612 } else {
1613 if (wedge_allowed_mask & (1 << bs)) {
1614 const int ctx = dav1d_wedge_ctx_lut[bs];
1615 b->comp_type = COMP_INTER_WEDGE -
1616 dav1d_msac_decode_bool_adapt(&ts->msac,
1617 ts->cdf.m.wedge_comp[ctx]);
1618 if (b->comp_type == COMP_INTER_WEDGE)
1619 b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1620 ts->cdf.m.wedge_idx[ctx], 15);
1621 } else {
1622 b->comp_type = COMP_INTER_SEG;
1624 b->mask_sign = dav1d_msac_decode_bool_equi(&ts->msac);
1625 if (DEBUG_BLOCK_INFO)
1626 printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n",
1627 b->comp_type == COMP_INTER_WEDGE,
1628 b->wedge_idx, b->mask_sign, ts->msac.rng);
1630 } else {
1631 b->comp_type = COMP_INTER_NONE;
1633 // ref
1634 if (seg && seg->ref > 0) {
1635 b->ref[0] = seg->ref - 1;
1636 } else if (seg && (seg->globalmv || seg->skip)) {
1637 b->ref[0] = 0;
1638 } else {
1639 const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4,
1640 have_top, have_left);
1641 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1642 ts->cdf.m.ref[0][ctx1]))
1644 const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4,
1645 have_top, have_left);
1646 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1647 ts->cdf.m.ref[1][ctx2]))
1649 b->ref[0] = 6;
1650 } else {
1651 const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4,
1652 have_top, have_left);
1653 b->ref[0] = 4 + dav1d_msac_decode_bool_adapt(&ts->msac,
1654 ts->cdf.m.ref[5][ctx3]);
1656 } else {
1657 const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4,
1658 have_top, have_left);
1659 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1660 ts->cdf.m.ref[2][ctx2]))
1662 const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4,
1663 have_top, have_left);
1664 b->ref[0] = 2 + dav1d_msac_decode_bool_adapt(&ts->msac,
1665 ts->cdf.m.ref[4][ctx3]);
1666 } else {
1667 const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4,
1668 have_top, have_left);
1669 b->ref[0] = dav1d_msac_decode_bool_adapt(&ts->msac,
1670 ts->cdf.m.ref[3][ctx3]);
1673 if (DEBUG_BLOCK_INFO)
1674 printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng);
1676 b->ref[1] = -1;
1678 refmvs_candidate mvstack[8];
1679 int n_mvs, ctx;
1680 dav1d_refmvs_find(&t->rt, mvstack, &n_mvs, &ctx,
1681 (union refmvs_refpair) { .ref = { b->ref[0] + 1, -1 }},
1682 bs, intra_edge_flags, t->by, t->bx);
1684 // mode parsing and mv derivation from ref_mvs
1685 if ((seg && (seg->skip || seg->globalmv)) ||
1686 dav1d_msac_decode_bool_adapt(&ts->msac,
1687 ts->cdf.m.newmv_mode[ctx & 7]))
1689 if ((seg && (seg->skip || seg->globalmv)) ||
1690 !dav1d_msac_decode_bool_adapt(&ts->msac,
1691 ts->cdf.m.globalmv_mode[(ctx >> 3) & 1]))
1693 b->inter_mode = GLOBALMV;
1694 b->mv[0] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[0]],
1695 t->bx, t->by, bw4, bh4, f->frame_hdr);
1696 has_subpel_filter = imin(bw4, bh4) == 1 ||
1697 f->frame_hdr->gmv[b->ref[0]].type == DAV1D_WM_TYPE_TRANSLATION;
1698 } else {
1699 has_subpel_filter = 1;
1700 if (dav1d_msac_decode_bool_adapt(&ts->msac,
1701 ts->cdf.m.refmv_mode[(ctx >> 4) & 15]))
1702 { // NEAREST, NEARER, NEAR or NEARISH
1703 b->inter_mode = NEARMV;
1704 b->drl_idx = NEARER_DRL;
1705 if (n_mvs > 2) { // NEARER, NEAR or NEARISH
1706 const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1707 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1708 ts->cdf.m.drl_bit[drl_ctx_v2]);
1709 if (b->drl_idx == NEAR_DRL && n_mvs > 3) { // NEAR or NEARISH
1710 const int drl_ctx_v3 =
1711 get_drl_context(mvstack, 2);
1712 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1713 ts->cdf.m.drl_bit[drl_ctx_v3]);
1716 } else {
1717 b->inter_mode = NEARESTMV;
1718 b->drl_idx = NEAREST_DRL;
1720 assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1721 b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
1722 if (b->drl_idx < NEAR_DRL)
1723 fix_mv_precision(f->frame_hdr, &b->mv[0]);
1726 if (DEBUG_BLOCK_INFO)
1727 printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n",
1728 b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs,
1729 ts->msac.rng);
1730 } else {
1731 has_subpel_filter = 1;
1732 b->inter_mode = NEWMV;
1733 b->drl_idx = NEAREST_DRL;
1734 if (n_mvs > 1) { // NEARER, NEAR or NEARISH
1735 const int drl_ctx_v1 = get_drl_context(mvstack, 0);
1736 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1737 ts->cdf.m.drl_bit[drl_ctx_v1]);
1738 if (b->drl_idx == NEARER_DRL && n_mvs > 2) { // NEAR or NEARISH
1739 const int drl_ctx_v2 = get_drl_context(mvstack, 1);
1740 b->drl_idx += dav1d_msac_decode_bool_adapt(&ts->msac,
1741 ts->cdf.m.drl_bit[drl_ctx_v2]);
1744 assert(b->drl_idx >= NEAREST_DRL && b->drl_idx <= NEARISH_DRL);
1745 if (n_mvs > 1) {
1746 b->mv[0] = mvstack[b->drl_idx].mv.mv[0];
1747 } else {
1748 assert(!b->drl_idx);
1749 b->mv[0] = mvstack[0].mv.mv[0];
1750 fix_mv_precision(f->frame_hdr, &b->mv[0]);
1752 if (DEBUG_BLOCK_INFO)
1753 printf("Post-intermode[%d,drl=%d]: r=%d\n",
1754 b->inter_mode, b->drl_idx, ts->msac.rng);
1755 const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv;
1756 read_mv_residual(ts, &b->mv[0], mv_prec);
1757 if (DEBUG_BLOCK_INFO)
1758 printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
1759 b->mv[0].y, b->mv[0].x, ts->msac.rng);
1762 // interintra flags
1763 const int ii_sz_grp = dav1d_ymode_size_context[bs];
1764 if (f->seq_hdr->inter_intra &&
1765 interintra_allowed_mask & (1 << bs) &&
1766 dav1d_msac_decode_bool_adapt(&ts->msac,
1767 ts->cdf.m.interintra[ii_sz_grp]))
1769 b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1770 ts->cdf.m.interintra_mode[ii_sz_grp],
1771 N_INTER_INTRA_PRED_MODES - 1);
1772 const int wedge_ctx = dav1d_wedge_ctx_lut[bs];
1773 b->interintra_type = INTER_INTRA_BLEND +
1774 dav1d_msac_decode_bool_adapt(&ts->msac,
1775 ts->cdf.m.interintra_wedge[wedge_ctx]);
1776 if (b->interintra_type == INTER_INTRA_WEDGE)
1777 b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
1778 ts->cdf.m.wedge_idx[wedge_ctx], 15);
1779 } else {
1780 b->interintra_type = INTER_INTRA_NONE;
1782 if (DEBUG_BLOCK_INFO && f->seq_hdr->inter_intra &&
1783 interintra_allowed_mask & (1 << bs))
1785 printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n",
1786 b->interintra_type, b->interintra_mode,
1787 b->wedge_idx, ts->msac.rng);
1790 // motion variation
1791 if (f->frame_hdr->switchable_motion_mode &&
1792 b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 &&
1793 // is not warped global motion
1794 !(!f->frame_hdr->force_integer_mv && b->inter_mode == GLOBALMV &&
1795 f->frame_hdr->gmv[b->ref[0]].type > DAV1D_WM_TYPE_TRANSLATION) &&
1796 // has overlappable neighbours
1797 ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) ||
1798 (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1))))
1800 // reaching here means the block allows obmc - check warp by
1801 // finding matching-ref blocks in top/left edges
1802 uint64_t mask[2] = { 0, 0 };
1803 find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
1804 have_left, have_top, b->ref[0], mask);
1805 const int allow_warp = !f->svc[b->ref[0]][0].scale &&
1806 !f->frame_hdr->force_integer_mv &&
1807 f->frame_hdr->warp_motion && (mask[0] | mask[1]);
1809 b->motion_mode = allow_warp ?
1810 dav1d_msac_decode_symbol_adapt4(&ts->msac,
1811 ts->cdf.m.motion_mode[bs], 2) :
1812 dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]);
1813 if (b->motion_mode == MM_WARP) {
1814 has_subpel_filter = 0;
1815 derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
1816 #define signabs(v) v < 0 ? '-' : ' ', abs(v)
1817 if (DEBUG_BLOCK_INFO)
1818 printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n"
1819 "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, "
1820 "mv=y:%d,x:%d\n",
1821 signabs(t->warpmv.matrix[0]),
1822 signabs(t->warpmv.matrix[1]),
1823 signabs(t->warpmv.matrix[2]),
1824 signabs(t->warpmv.matrix[3]),
1825 signabs(t->warpmv.matrix[4]),
1826 signabs(t->warpmv.matrix[5]),
1827 signabs(t->warpmv.u.p.alpha),
1828 signabs(t->warpmv.u.p.beta),
1829 signabs(t->warpmv.u.p.gamma),
1830 signabs(t->warpmv.u.p.delta),
1831 b->mv[0].y, b->mv[0].x);
1832 #undef signabs
1833 if (t->frame_thread.pass) {
1834 if (t->warpmv.type == DAV1D_WM_TYPE_AFFINE) {
1835 b->matrix[0] = t->warpmv.matrix[2] - 0x10000;
1836 b->matrix[1] = t->warpmv.matrix[3];
1837 b->matrix[2] = t->warpmv.matrix[4];
1838 b->matrix[3] = t->warpmv.matrix[5] - 0x10000;
1839 } else {
1840 b->matrix[0] = SHRT_MIN;
1845 if (DEBUG_BLOCK_INFO)
1846 printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIx64 "/0x%"
1847 PRIx64 "]\n", b->motion_mode, ts->msac.rng, mask[0],
1848 mask[1]);
1849 } else {
1850 b->motion_mode = MM_TRANSLATION;
1854 // subpel filter
1855 enum Dav1dFilterMode filter[2];
1856 if (f->frame_hdr->subpel_filter_mode == DAV1D_FILTER_SWITCHABLE) {
1857 if (has_subpel_filter) {
1858 const int comp = b->comp_type != COMP_INTER_NONE;
1859 const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0],
1860 by4, bx4);
1861 filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1862 ts->cdf.m.filter[0][ctx1],
1863 DAV1D_N_SWITCHABLE_FILTERS - 1);
1864 if (f->seq_hdr->dual_filter) {
1865 const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1,
1866 b->ref[0], by4, bx4);
1867 if (DEBUG_BLOCK_INFO)
1868 printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n",
1869 filter[0], ctx1, ts->msac.rng);
1870 filter[1] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
1871 ts->cdf.m.filter[1][ctx2],
1872 DAV1D_N_SWITCHABLE_FILTERS - 1);
1873 if (DEBUG_BLOCK_INFO)
1874 printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n",
1875 filter[1], ctx2, ts->msac.rng);
1876 } else {
1877 filter[1] = filter[0];
1878 if (DEBUG_BLOCK_INFO)
1879 printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n",
1880 filter[0], ctx1, ts->msac.rng);
1882 } else {
1883 filter[0] = filter[1] = DAV1D_FILTER_8TAP_REGULAR;
1885 } else {
1886 filter[0] = filter[1] = f->frame_hdr->subpel_filter_mode;
1888 b->filter2d = dav1d_filter_2d[filter[1]][filter[0]];
1890 read_vartx_tree(t, b, bs, bx4, by4);
1892 // reconstruction
1893 if (t->frame_thread.pass == 1) {
1894 f->bd_fn.read_coef_blocks(t, bs, b);
1895 } else {
1896 if (f->bd_fn.recon_b_inter(t, bs, b)) return -1;
1899 if (f->frame_hdr->loopfilter.level_y[0] ||
1900 f->frame_hdr->loopfilter.level_y[1])
1902 const int is_globalmv =
1903 b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV);
1904 const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2])
1905 &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv];
1906 const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
1907 enum RectTxfmSize ytx = b->max_ytx, uvtx = b->uvtx;
1908 if (f->frame_hdr->segmentation.lossless[b->seg_id]) {
1909 ytx = (enum RectTxfmSize) TX_4X4;
1910 uvtx = (enum RectTxfmSize) TX_4X4;
1912 dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, lf_lvls,
1913 t->bx, t->by, f->w4, f->h4, b->skip, bs,
1914 ytx, tx_split, uvtx, f->cur.p.layout,
1915 &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
1916 has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
1917 has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
1920 // context updates
1921 if (is_comp)
1922 splat_tworef_mv(f->c, t, bs, b, bw4, bh4);
1923 else
1924 splat_oneref_mv(f->c, t, bs, b, bw4, bh4);
1926 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1927 rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1928 rep_macro(type, t->dir skip_mode, off, mul * b->skip_mode); \
1929 rep_macro(type, t->dir intra, off, 0); \
1930 rep_macro(type, t->dir skip, off, mul * b->skip); \
1931 rep_macro(type, t->dir pal_sz, off, 0); \
1932 /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
1933 rep_macro(type, t->pal_sz_uv[diridx], off, 0); \
1934 rep_macro(type, t->dir tx_intra, off, mul * b_dim[2 + diridx]); \
1935 rep_macro(type, t->dir comp_type, off, mul * b->comp_type); \
1936 rep_macro(type, t->dir filter[0], off, mul * filter[0]); \
1937 rep_macro(type, t->dir filter[1], off, mul * filter[1]); \
1938 rep_macro(type, t->dir mode, off, mul * b->inter_mode); \
1939 rep_macro(type, t->dir ref[0], off, mul * b->ref[0]); \
1940 rep_macro(type, t->dir ref[1], off, mul * ((uint8_t) b->ref[1]))
1941 case_set(bh4, l., 1, by4);
1942 case_set(bw4, a->, 0, bx4);
1943 #undef set_ctx
1945 if (has_chroma) {
1946 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1947 rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
1948 case_set(cbh4, l., 1, cby4);
1949 case_set(cbw4, a->, 0, cbx4);
1950 #undef set_ctx
1954 // update contexts
1955 if (f->frame_hdr->segmentation.enabled &&
1956 f->frame_hdr->segmentation.update_map)
1958 uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx];
1959 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1960 for (int y = 0; y < bh4; y++) { \
1961 rep_macro(type, seg_ptr, 0, mul * b->seg_id); \
1962 seg_ptr += f->b4_stride; \
1964 case_set(bw4, NULL, 0, 0);
1965 #undef set_ctx
1967 if (!b->skip) {
1968 uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4 >> 1];
1969 const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15);
1970 const int bx_idx = (bx4 & 16) >> 4;
1971 for (int y = 0; y < bh4; y += 2, noskip_mask++) {
1972 (*noskip_mask)[bx_idx] |= mask;
1973 if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway
1974 (*noskip_mask)[1] |= mask;
1978 if (t->frame_thread.pass == 1 && !b->intra && IS_INTER_OR_SWITCH(f->frame_hdr)) {
1979 const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
1980 int (*const lowest_px)[2] = ts->lowest_pixel[sby];
1982 // keep track of motion vectors for each reference
1983 if (b->comp_type == COMP_INTER_NONE) {
1984 // y
1985 if (imin(bw4, bh4) > 1 &&
1986 ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
1987 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
1989 affine_lowest_px_luma(t, &lowest_px[b->ref[0]][0], b_dim,
1990 b->motion_mode == MM_WARP ? &t->warpmv :
1991 &f->frame_hdr->gmv[b->ref[0]]);
1992 } else {
1993 mc_lowest_px(&lowest_px[b->ref[0]][0], t->by, bh4, b->mv[0].y,
1994 0, &f->svc[b->ref[0]][1]);
1995 if (b->motion_mode == MM_OBMC) {
1996 obmc_lowest_px(t, lowest_px, 0, b_dim, bx4, by4, w4, h4);
2000 // uv
2001 if (has_chroma) {
2002 // sub8x8 derivation
2003 int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
2004 refmvs_block *const *r;
2005 if (is_sub8x8) {
2006 assert(ss_hor == 1);
2007 r = &t->rt.r[(t->by & 31) + 5];
2008 if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
2009 if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
2010 if (bw4 == 1 && bh4 == ss_ver)
2011 is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
2014 // chroma prediction
2015 if (is_sub8x8) {
2016 assert(ss_hor == 1);
2017 if (bw4 == 1 && bh4 == ss_ver) {
2018 const refmvs_block *const rr = &r[-1][t->bx - 1];
2019 mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
2020 t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
2021 &f->svc[rr->ref.ref[0] - 1][1]);
2023 if (bw4 == 1) {
2024 const refmvs_block *const rr = &r[0][t->bx - 1];
2025 mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
2026 t->by, bh4, rr->mv.mv[0].y, ss_ver,
2027 &f->svc[rr->ref.ref[0] - 1][1]);
2029 if (bh4 == ss_ver) {
2030 const refmvs_block *const rr = &r[-1][t->bx];
2031 mc_lowest_px(&lowest_px[rr->ref.ref[0] - 1][1],
2032 t->by - 1, bh4, rr->mv.mv[0].y, ss_ver,
2033 &f->svc[rr->ref.ref[0] - 1][1]);
2035 mc_lowest_px(&lowest_px[b->ref[0]][1], t->by, bh4,
2036 b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
2037 } else {
2038 if (imin(cbw4, cbh4) > 1 &&
2039 ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
2040 (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
2042 affine_lowest_px_chroma(t, &lowest_px[b->ref[0]][1], b_dim,
2043 b->motion_mode == MM_WARP ? &t->warpmv :
2044 &f->frame_hdr->gmv[b->ref[0]]);
2045 } else {
2046 mc_lowest_px(&lowest_px[b->ref[0]][1],
2047 t->by & ~ss_ver, bh4 << (bh4 == ss_ver),
2048 b->mv[0].y, ss_ver, &f->svc[b->ref[0]][1]);
2049 if (b->motion_mode == MM_OBMC) {
2050 obmc_lowest_px(t, lowest_px, 1, b_dim, bx4, by4, w4, h4);
2055 } else {
2056 // y
2057 for (int i = 0; i < 2; i++) {
2058 if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
2059 affine_lowest_px_luma(t, &lowest_px[b->ref[i]][0], b_dim,
2060 &f->frame_hdr->gmv[b->ref[i]]);
2061 } else {
2062 mc_lowest_px(&lowest_px[b->ref[i]][0], t->by, bh4,
2063 b->mv[i].y, 0, &f->svc[b->ref[i]][1]);
2067 // uv
2068 if (has_chroma) for (int i = 0; i < 2; i++) {
2069 if (b->inter_mode == GLOBALMV_GLOBALMV &&
2070 imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
2072 affine_lowest_px_chroma(t, &lowest_px[b->ref[i]][1], b_dim,
2073 &f->frame_hdr->gmv[b->ref[i]]);
2074 } else {
2075 mc_lowest_px(&lowest_px[b->ref[i]][1], t->by, bh4,
2076 b->mv[i].y, ss_ver, &f->svc[b->ref[i]][1]);
2082 return 0;
2085 #if __has_feature(memory_sanitizer)
2087 #include <sanitizer/msan_interface.h>
2089 static int checked_decode_b(Dav1dTaskContext *const t,
2090 const enum BlockLevel bl,
2091 const enum BlockSize bs,
2092 const enum BlockPartition bp,
2093 const enum EdgeFlags intra_edge_flags)
2095 const Dav1dFrameContext *const f = t->f;
2096 const int err = decode_b(t, bl, bs, bp, intra_edge_flags);
2098 if (err == 0 && !(t->frame_thread.pass & 1)) {
2099 const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2100 const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2101 const uint8_t *const b_dim = dav1d_block_dimensions[bs];
2102 const int bw4 = b_dim[0], bh4 = b_dim[1];
2103 const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
2104 const int has_chroma = f->seq_hdr->layout != DAV1D_PIXEL_LAYOUT_I400 &&
2105 (bw4 > ss_hor || t->bx & 1) &&
2106 (bh4 > ss_ver || t->by & 1);
2108 for (int p = 0; p < 1 + 2 * has_chroma; p++) {
2109 const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2110 const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2111 const ptrdiff_t stride = f->cur.stride[!!p];
2112 const int bx = t->bx & ~ss_hor;
2113 const int by = t->by & ~ss_ver;
2114 const int width = w4 << (2 - ss_hor + (bw4 == ss_hor));
2115 const int height = h4 << (2 - ss_ver + (bh4 == ss_ver));
2117 const uint8_t *data = f->cur.data[p] + (by << (2 - ss_ver)) * stride +
2118 (bx << (2 - ss_hor + !!f->seq_hdr->hbd));
2120 for (int y = 0; y < height; data += stride, y++) {
2121 const size_t line_sz = width << !!f->seq_hdr->hbd;
2122 if (__msan_test_shadow(data, line_sz) != -1) {
2123 fprintf(stderr, "B[%d](%d, %d) w4:%d, h4:%d, row:%d\n",
2124 p, bx, by, w4, h4, y);
2125 __msan_check_mem_is_initialized(data, line_sz);
2131 return err;
2134 #define decode_b checked_decode_b
2136 #endif /* defined(__has_feature) */
2138 static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,
2139 const EdgeNode *const node)
2141 const Dav1dFrameContext *const f = t->f;
2142 Dav1dTileState *const ts = t->ts;
2143 const int hsz = 16 >> bl;
2144 const int have_h_split = f->bw > t->bx + hsz;
2145 const int have_v_split = f->bh > t->by + hsz;
2147 if (!have_h_split && !have_v_split) {
2148 assert(bl < BL_8X8);
2149 return decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0));
2152 uint16_t *pc;
2153 enum BlockPartition bp;
2154 int ctx, bx8, by8;
2155 if (t->frame_thread.pass != 2) {
2156 if (0 && bl == BL_64X64)
2157 printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n",
2158 f->frame_hdr->frame_offset, t->by, t->bx, bl, ts->msac.rng);
2159 bx8 = (t->bx & 31) >> 1;
2160 by8 = (t->by & 31) >> 1;
2161 ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8);
2162 pc = ts->cdf.m.partition[bl][ctx];
2165 if (have_h_split && have_v_split) {
2166 if (t->frame_thread.pass == 2) {
2167 const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2168 bp = b->bl == bl ? b->bp : PARTITION_SPLIT;
2169 } else {
2170 bp = dav1d_msac_decode_symbol_adapt16(&ts->msac, pc,
2171 dav1d_partition_type_count[bl]);
2172 if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
2173 (bp == PARTITION_V || bp == PARTITION_V4 ||
2174 bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
2176 return 1;
2178 if (DEBUG_BLOCK_INFO)
2179 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2180 f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx, bp,
2181 ts->msac.rng);
2183 const uint8_t *const b = dav1d_block_sizes[bl][bp];
2185 switch (bp) {
2186 case PARTITION_NONE:
2187 if (decode_b(t, bl, b[0], PARTITION_NONE, node->o))
2188 return -1;
2189 break;
2190 case PARTITION_H:
2191 if (decode_b(t, bl, b[0], PARTITION_H, node->h[0]))
2192 return -1;
2193 t->by += hsz;
2194 if (decode_b(t, bl, b[0], PARTITION_H, node->h[1]))
2195 return -1;
2196 t->by -= hsz;
2197 break;
2198 case PARTITION_V:
2199 if (decode_b(t, bl, b[0], PARTITION_V, node->v[0]))
2200 return -1;
2201 t->bx += hsz;
2202 if (decode_b(t, bl, b[0], PARTITION_V, node->v[1]))
2203 return -1;
2204 t->bx -= hsz;
2205 break;
2206 case PARTITION_SPLIT:
2207 if (bl == BL_8X8) {
2208 const EdgeTip *const tip = (const EdgeTip *) node;
2209 assert(hsz == 1);
2210 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL))
2211 return -1;
2212 const enum Filter2d tl_filter = t->tl_4x4_filter;
2213 t->bx++;
2214 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
2215 return -1;
2216 t->bx--;
2217 t->by++;
2218 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
2219 return -1;
2220 t->bx++;
2221 t->tl_4x4_filter = tl_filter;
2222 if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
2223 return -1;
2224 t->bx--;
2225 t->by--;
2226 #if ARCH_X86_64
2227 if (t->frame_thread.pass) {
2228 /* In 8-bit mode with 2-pass decoding the coefficient buffer
2229 * can end up misaligned due to skips here. Work around
2230 * the issue by explicitly realigning the buffer. */
2231 const int p = t->frame_thread.pass & 1;
2232 ts->frame_thread[p].cf =
2233 (void*)(((uintptr_t)ts->frame_thread[p].cf + 63) & ~63);
2235 #endif
2236 } else {
2237 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0)))
2238 return 1;
2239 t->bx += hsz;
2240 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1)))
2241 return 1;
2242 t->bx -= hsz;
2243 t->by += hsz;
2244 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2)))
2245 return 1;
2246 t->bx += hsz;
2247 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 3)))
2248 return 1;
2249 t->bx -= hsz;
2250 t->by -= hsz;
2252 break;
2253 case PARTITION_T_TOP_SPLIT: {
2254 if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL))
2255 return -1;
2256 t->bx += hsz;
2257 if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1]))
2258 return -1;
2259 t->bx -= hsz;
2260 t->by += hsz;
2261 if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1]))
2262 return -1;
2263 t->by -= hsz;
2264 break;
2266 case PARTITION_T_BOTTOM_SPLIT: {
2267 if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0]))
2268 return -1;
2269 t->by += hsz;
2270 if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0]))
2271 return -1;
2272 t->bx += hsz;
2273 if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0))
2274 return -1;
2275 t->bx -= hsz;
2276 t->by -= hsz;
2277 break;
2279 case PARTITION_T_LEFT_SPLIT: {
2280 if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL))
2281 return -1;
2282 t->by += hsz;
2283 if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1]))
2284 return -1;
2285 t->by -= hsz;
2286 t->bx += hsz;
2287 if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1]))
2288 return -1;
2289 t->bx -= hsz;
2290 break;
2292 case PARTITION_T_RIGHT_SPLIT: {
2293 if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0]))
2294 return -1;
2295 t->bx += hsz;
2296 if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0]))
2297 return -1;
2298 t->by += hsz;
2299 if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0))
2300 return -1;
2301 t->by -= hsz;
2302 t->bx -= hsz;
2303 break;
2305 case PARTITION_H4: {
2306 const EdgeBranch *const branch = (const EdgeBranch *) node;
2307 if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0]))
2308 return -1;
2309 t->by += hsz >> 1;
2310 if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4))
2311 return -1;
2312 t->by += hsz >> 1;
2313 if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM))
2314 return -1;
2315 t->by += hsz >> 1;
2316 if (t->by < f->bh)
2317 if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1]))
2318 return -1;
2319 t->by -= hsz * 3 >> 1;
2320 break;
2322 case PARTITION_V4: {
2323 const EdgeBranch *const branch = (const EdgeBranch *) node;
2324 if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0]))
2325 return -1;
2326 t->bx += hsz >> 1;
2327 if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4))
2328 return -1;
2329 t->bx += hsz >> 1;
2330 if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT))
2331 return -1;
2332 t->bx += hsz >> 1;
2333 if (t->bx < f->bw)
2334 if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1]))
2335 return -1;
2336 t->bx -= hsz * 3 >> 1;
2337 break;
2339 default: assert(0);
2341 } else if (have_h_split) {
2342 unsigned is_split;
2343 if (t->frame_thread.pass == 2) {
2344 const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2345 is_split = b->bl != bl;
2346 } else {
2347 is_split = dav1d_msac_decode_bool(&ts->msac,
2348 gather_top_partition_prob(pc, bl));
2349 if (DEBUG_BLOCK_INFO)
2350 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2351 f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
2352 is_split ? PARTITION_SPLIT : PARTITION_H, ts->msac.rng);
2355 assert(bl < BL_8X8);
2356 if (is_split) {
2357 bp = PARTITION_SPLIT;
2358 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
2359 t->bx += hsz;
2360 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) return 1;
2361 t->bx -= hsz;
2362 } else {
2363 bp = PARTITION_H;
2364 if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_H][0],
2365 PARTITION_H, node->h[0]))
2366 return -1;
2368 } else {
2369 assert(have_v_split);
2370 unsigned is_split;
2371 if (t->frame_thread.pass == 2) {
2372 const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
2373 is_split = b->bl != bl;
2374 } else {
2375 is_split = dav1d_msac_decode_bool(&ts->msac,
2376 gather_left_partition_prob(pc, bl));
2377 if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
2378 return 1;
2379 if (DEBUG_BLOCK_INFO)
2380 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2381 f->frame_hdr->frame_offset, t->by, t->bx, bl, ctx,
2382 is_split ? PARTITION_SPLIT : PARTITION_V, ts->msac.rng);
2385 assert(bl < BL_8X8);
2386 if (is_split) {
2387 bp = PARTITION_SPLIT;
2388 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
2389 t->by += hsz;
2390 if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) return 1;
2391 t->by -= hsz;
2392 } else {
2393 bp = PARTITION_V;
2394 if (decode_b(t, bl, dav1d_block_sizes[bl][PARTITION_V][0],
2395 PARTITION_V, node->v[0]))
2396 return -1;
2400 if (t->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) {
2401 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
2402 rep_macro(type, t->a->partition, bx8, mul * dav1d_al_part_ctx[0][bl][bp]); \
2403 rep_macro(type, t->l.partition, by8, mul * dav1d_al_part_ctx[1][bl][bp])
2404 case_set_upto16(hsz,,,);
2405 #undef set_ctx
2408 return 0;
2411 static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) {
2412 memset(ctx->intra, keyframe, sizeof(ctx->intra));
2413 memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode));
2414 if (keyframe)
2415 memset(ctx->mode, DC_PRED, sizeof(ctx->mode));
2417 if (pass == 2) return;
2419 memset(ctx->partition, 0, sizeof(ctx->partition));
2420 memset(ctx->skip, 0, sizeof(ctx->skip));
2421 memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode));
2422 memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y));
2423 memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv));
2424 memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra));
2425 memset(ctx->tx, TX_64X64, sizeof(ctx->tx));
2426 if (!keyframe) {
2427 memset(ctx->ref, -1, sizeof(ctx->ref));
2428 memset(ctx->comp_type, 0, sizeof(ctx->comp_type));
2429 memset(ctx->mode, NEARESTMV, sizeof(ctx->mode));
2431 memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef));
2432 memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef));
2433 memset(ctx->filter, DAV1D_N_SWITCHABLE_FILTERS, sizeof(ctx->filter));
2434 memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred));
2435 memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz));
2438 // { Y+U+V, Y+U } * 4
2439 static const uint8_t ss_size_mul[4][2] = {
2440 [DAV1D_PIXEL_LAYOUT_I400] = { 4, 4 },
2441 [DAV1D_PIXEL_LAYOUT_I420] = { 6, 5 },
2442 [DAV1D_PIXEL_LAYOUT_I422] = { 8, 6 },
2443 [DAV1D_PIXEL_LAYOUT_I444] = { 12, 8 },
2446 static void setup_tile(Dav1dTileState *const ts,
2447 const Dav1dFrameContext *const f,
2448 const uint8_t *const data, const size_t sz,
2449 const int tile_row, const int tile_col,
2450 const unsigned tile_start_off)
2452 const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
2453 const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
2454 const int col_sb_end = f->frame_hdr->tiling.col_start_sb[tile_col + 1];
2455 const int row_sb_start = f->frame_hdr->tiling.row_start_sb[tile_row];
2456 const int row_sb_end = f->frame_hdr->tiling.row_start_sb[tile_row + 1];
2457 const int sb_shift = f->sb_shift;
2459 const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
2460 for (int p = 0; p < 2; p++) {
2461 ts->frame_thread[p].pal_idx = f->frame_thread.pal_idx ?
2462 &f->frame_thread.pal_idx[(size_t)tile_start_off * size_mul[1] / 8] :
2463 NULL;
2464 ts->frame_thread[p].cbi = f->frame_thread.cbi ?
2465 &f->frame_thread.cbi[(size_t)tile_start_off * size_mul[0] / 64] :
2466 NULL;
2467 ts->frame_thread[p].cf = f->frame_thread.cf ?
2468 (uint8_t*)f->frame_thread.cf +
2469 (((size_t)tile_start_off * size_mul[0]) >> !f->seq_hdr->hbd) :
2470 NULL;
2473 dav1d_cdf_thread_copy(&ts->cdf, &f->in_cdf);
2474 ts->last_qidx = f->frame_hdr->quant.yac;
2475 ts->last_delta_lf.u32 = 0;
2477 dav1d_msac_init(&ts->msac, data, sz, f->frame_hdr->disable_cdf_update);
2479 ts->tiling.row = tile_row;
2480 ts->tiling.col = tile_col;
2481 ts->tiling.col_start = col_sb_start << sb_shift;
2482 ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw);
2483 ts->tiling.row_start = row_sb_start << sb_shift;
2484 ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
2486 // Reference Restoration Unit (used for exp coding)
2487 int sb_idx, unit_idx;
2488 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2489 // vertical components only
2490 sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;
2491 unit_idx = (ts->tiling.row_start & 16) >> 3;
2492 } else {
2493 sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
2494 unit_idx = ((ts->tiling.row_start & 16) >> 3) +
2495 ((ts->tiling.col_start & 16) >> 4);
2497 for (int p = 0; p < 3; p++) {
2498 if (!((f->lf.restore_planes >> p) & 1U))
2499 continue;
2501 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2502 const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2503 const int d = f->frame_hdr->super_res.width_scale_denominator;
2504 const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
2505 const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;
2506 const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;
2507 const int px_x = x << (unit_size_log2 + ss_hor);
2508 const int u_idx = unit_idx + ((px_x & 64) >> 6);
2509 const int sb128x = px_x >> 7;
2510 if (sb128x >= f->sr_sb128w) continue;
2511 ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + sb128x].lr[p][u_idx];
2512 } else {
2513 ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2516 ts->lr_ref[p]->filter_v[0] = 3;
2517 ts->lr_ref[p]->filter_v[1] = -7;
2518 ts->lr_ref[p]->filter_v[2] = 15;
2519 ts->lr_ref[p]->filter_h[0] = 3;
2520 ts->lr_ref[p]->filter_h[1] = -7;
2521 ts->lr_ref[p]->filter_h[2] = 15;
2522 ts->lr_ref[p]->sgr_weights[0] = -32;
2523 ts->lr_ref[p]->sgr_weights[1] = 31;
2526 if (f->c->n_tc > 1) {
2527 for (int p = 0; p < 2; p++)
2528 atomic_init(&ts->progress[p], row_sb_start);
2532 static void read_restoration_info(Dav1dTaskContext *const t,
2533 Av1RestorationUnit *const lr, const int p,
2534 const enum Dav1dRestorationType frame_type)
2536 const Dav1dFrameContext *const f = t->f;
2537 Dav1dTileState *const ts = t->ts;
2539 if (frame_type == DAV1D_RESTORATION_SWITCHABLE) {
2540 const int filter = dav1d_msac_decode_symbol_adapt4(&ts->msac,
2541 ts->cdf.m.restore_switchable, 2);
2542 lr->type = filter + !!filter; /* NONE/WIENER/SGRPROJ */
2543 } else {
2544 const unsigned type =
2545 dav1d_msac_decode_bool_adapt(&ts->msac,
2546 frame_type == DAV1D_RESTORATION_WIENER ?
2547 ts->cdf.m.restore_wiener : ts->cdf.m.restore_sgrproj);
2548 lr->type = type ? frame_type : DAV1D_RESTORATION_NONE;
2551 if (lr->type == DAV1D_RESTORATION_WIENER) {
2552 lr->filter_v[0] = p ? 0 :
2553 dav1d_msac_decode_subexp(&ts->msac,
2554 ts->lr_ref[p]->filter_v[0] + 5, 16, 1) - 5;
2555 lr->filter_v[1] =
2556 dav1d_msac_decode_subexp(&ts->msac,
2557 ts->lr_ref[p]->filter_v[1] + 23, 32, 2) - 23;
2558 lr->filter_v[2] =
2559 dav1d_msac_decode_subexp(&ts->msac,
2560 ts->lr_ref[p]->filter_v[2] + 17, 64, 3) - 17;
2562 lr->filter_h[0] = p ? 0 :
2563 dav1d_msac_decode_subexp(&ts->msac,
2564 ts->lr_ref[p]->filter_h[0] + 5, 16, 1) - 5;
2565 lr->filter_h[1] =
2566 dav1d_msac_decode_subexp(&ts->msac,
2567 ts->lr_ref[p]->filter_h[1] + 23, 32, 2) - 23;
2568 lr->filter_h[2] =
2569 dav1d_msac_decode_subexp(&ts->msac,
2570 ts->lr_ref[p]->filter_h[2] + 17, 64, 3) - 17;
2571 memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
2572 ts->lr_ref[p] = lr;
2573 if (DEBUG_BLOCK_INFO)
2574 printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
2575 p, lr->filter_v[0], lr->filter_v[1],
2576 lr->filter_v[2], lr->filter_h[0],
2577 lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
2578 } else if (lr->type == DAV1D_RESTORATION_SGRPROJ) {
2579 const unsigned idx = dav1d_msac_decode_bools(&ts->msac, 4);
2580 const uint16_t *const sgr_params = dav1d_sgr_params[idx];
2581 lr->type += idx;
2582 lr->sgr_weights[0] = sgr_params[0] ? dav1d_msac_decode_subexp(&ts->msac,
2583 ts->lr_ref[p]->sgr_weights[0] + 96, 128, 4) - 96 : 0;
2584 lr->sgr_weights[1] = sgr_params[1] ? dav1d_msac_decode_subexp(&ts->msac,
2585 ts->lr_ref[p]->sgr_weights[1] + 32, 128, 4) - 32 : 95;
2586 memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
2587 memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
2588 ts->lr_ref[p] = lr;
2589 if (DEBUG_BLOCK_INFO)
2590 printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
2591 p, idx, lr->sgr_weights[0],
2592 lr->sgr_weights[1], ts->msac.rng);
2596 // modeled after the equivalent function in aomdec:decodeframe.c
2597 static int check_trailing_bits_after_symbol_coder(const MsacContext *const msac) {
2598 // check marker bit (single 1), followed by zeroes
2599 const int n_bits = -(msac->cnt + 14);
2600 assert(n_bits <= 0); // this assumes we errored out when cnt <= -15 in caller
2601 const int n_bytes = (n_bits + 7) >> 3;
2602 const uint8_t *p = &msac->buf_pos[n_bytes];
2603 const int pattern = 128 >> ((n_bits - 1) & 7);
2604 if ((p[-1] & (2 * pattern - 1)) != pattern)
2605 return 1;
2607 // check remainder zero bytes
2608 for (; p < msac->buf_end; p++)
2609 if (*p)
2610 return 1;
2612 return 0;
2615 int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) {
2616 const Dav1dFrameContext *const f = t->f;
2617 const enum BlockLevel root_bl = f->seq_hdr->sb128 ? BL_128X128 : BL_64X64;
2618 Dav1dTileState *const ts = t->ts;
2619 const Dav1dContext *const c = f->c;
2620 const int sb_step = f->sb_step;
2621 const int tile_row = ts->tiling.row, tile_col = ts->tiling.col;
2622 const int col_sb_start = f->frame_hdr->tiling.col_start_sb[tile_col];
2623 const int col_sb128_start = col_sb_start >> !f->seq_hdr->sb128;
2625 if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
2626 dav1d_refmvs_tile_sbrow_init(&t->rt, &f->rf, ts->tiling.col_start,
2627 ts->tiling.col_end, ts->tiling.row_start,
2628 ts->tiling.row_end, t->by >> f->sb_shift,
2629 ts->tiling.row, t->frame_thread.pass);
2632 if (IS_INTER_OR_SWITCH(f->frame_hdr) && c->n_fc > 1) {
2633 const int sby = (t->by - ts->tiling.row_start) >> f->sb_shift;
2634 int (*const lowest_px)[2] = ts->lowest_pixel[sby];
2635 for (int n = 0; n < 7; n++)
2636 for (int m = 0; m < 2; m++)
2637 lowest_px[n][m] = INT_MIN;
2640 reset_context(&t->l, IS_KEY_OR_INTRA(f->frame_hdr), t->frame_thread.pass);
2641 if (t->frame_thread.pass == 2) {
2642 const int off_2pass = c->n_tc > 1 ? f->sb128w * f->frame_hdr->tiling.rows : 0;
2643 for (t->bx = ts->tiling.col_start,
2644 t->a = f->a + off_2pass + col_sb128_start + tile_row * f->sb128w;
2645 t->bx < ts->tiling.col_end; t->bx += sb_step)
2647 if (atomic_load_explicit(c->flush, memory_order_acquire))
2648 return 1;
2649 if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
2650 return 1;
2651 if (t->bx & 16 || f->seq_hdr->sb128)
2652 t->a++;
2654 f->bd_fn.backup_ipred_edge(t);
2655 return 0;
2658 if (f->c->n_tc > 1 && f->frame_hdr->use_ref_frame_mvs) {
2659 f->c->refmvs_dsp.load_tmvs(&f->rf, ts->tiling.row,
2660 ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
2661 t->by >> 1, (t->by + sb_step) >> 1);
2663 memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv));
2664 const int sb128y = t->by >> 5;
2665 for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w,
2666 t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start;
2667 t->bx < ts->tiling.col_end; t->bx += sb_step)
2669 if (atomic_load_explicit(c->flush, memory_order_acquire))
2670 return 1;
2671 if (root_bl == BL_128X128) {
2672 t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx;
2673 t->cur_sb_cdef_idx_ptr[0] = -1;
2674 t->cur_sb_cdef_idx_ptr[1] = -1;
2675 t->cur_sb_cdef_idx_ptr[2] = -1;
2676 t->cur_sb_cdef_idx_ptr[3] = -1;
2677 } else {
2678 t->cur_sb_cdef_idx_ptr =
2679 &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) +
2680 ((t->by & 16) >> 3)];
2681 t->cur_sb_cdef_idx_ptr[0] = -1;
2683 // Restoration filter
2684 for (int p = 0; p < 3; p++) {
2685 if (!((f->lf.restore_planes >> p) & 1U))
2686 continue;
2688 const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2689 const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2690 const int unit_size_log2 = f->frame_hdr->restoration.unit_size[!!p];
2691 const int y = t->by * 4 >> ss_ver;
2692 const int h = (f->cur.p.h + ss_ver) >> ss_ver;
2694 const int unit_size = 1 << unit_size_log2;
2695 const unsigned mask = unit_size - 1;
2696 if (y & mask) continue;
2697 const int half_unit = unit_size >> 1;
2698 // Round half up at frame boundaries, if there's more than one
2699 // restoration unit
2700 if (y && y + half_unit > h) continue;
2702 const enum Dav1dRestorationType frame_type = f->frame_hdr->restoration.type[p];
2704 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
2705 const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
2706 const int n_units = imax(1, (w + half_unit) >> unit_size_log2);
2708 const int d = f->frame_hdr->super_res.width_scale_denominator;
2709 const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;
2710 const int x0 = ((4 * t->bx * d >> ss_hor) + rnd) >> shift;
2711 const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;
2713 for (int x = x0; x < imin(x1, n_units); x++) {
2714 const int px_x = x << (unit_size_log2 + ss_hor);
2715 const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);
2716 const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);
2717 Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2719 read_restoration_info(t, lr, p, frame_type);
2721 } else {
2722 const int x = 4 * t->bx >> ss_hor;
2723 if (x & mask) continue;
2724 const int w = (f->cur.p.w + ss_hor) >> ss_hor;
2725 // Round half up at frame boundaries, if there's more than one
2726 // restoration unit
2727 if (x && x + half_unit > w) continue;
2728 const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);
2729 const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
2730 Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
2732 read_restoration_info(t, lr, p, frame_type);
2735 if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
2736 return 1;
2737 if (t->bx & 16 || f->seq_hdr->sb128) {
2738 t->a++;
2739 t->lf_mask++;
2743 if (f->seq_hdr->ref_frame_mvs && f->c->n_tc > 1 && IS_INTER_OR_SWITCH(f->frame_hdr)) {
2744 dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
2745 ts->tiling.col_start >> 1, ts->tiling.col_end >> 1,
2746 t->by >> 1, (t->by + sb_step) >> 1);
2749 // backup pre-loopfilter pixels for intra prediction of the next sbrow
2750 if (t->frame_thread.pass != 1)
2751 f->bd_fn.backup_ipred_edge(t);
2753 // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix"
2754 // up the initial value in neighbour tiles when running the loopfilter
2755 int align_h = (f->bh + 31) & ~31;
2756 memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
2757 &t->l.tx_lpf_y[t->by & 16], sb_step);
2758 const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2759 align_h >>= ss_ver;
2760 memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],
2761 &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);
2763 // error out on symbol decoder overread
2764 if (ts->msac.cnt <= -15) return 1;
2766 return c->strict_std_compliance &&
2767 (t->by >> f->sb_shift) + 1 >= f->frame_hdr->tiling.row_start_sb[tile_row + 1] &&
2768 check_trailing_bits_after_symbol_coder(&ts->msac);
2771 int dav1d_decode_frame_init(Dav1dFrameContext *const f) {
2772 const Dav1dContext *const c = f->c;
2773 int retval = DAV1D_ERR(ENOMEM);
2775 if (f->sbh > f->lf.start_of_tile_row_sz) {
2776 dav1d_free(f->lf.start_of_tile_row);
2777 f->lf.start_of_tile_row = dav1d_malloc(ALLOC_TILE, f->sbh * sizeof(uint8_t));
2778 if (!f->lf.start_of_tile_row) {
2779 f->lf.start_of_tile_row_sz = 0;
2780 goto error;
2782 f->lf.start_of_tile_row_sz = f->sbh;
2784 int sby = 0;
2785 for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
2786 f->lf.start_of_tile_row[sby++] = tile_row;
2787 while (sby < f->frame_hdr->tiling.row_start_sb[tile_row + 1])
2788 f->lf.start_of_tile_row[sby++] = 0;
2791 const int n_ts = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
2792 if (n_ts != f->n_ts) {
2793 if (c->n_fc > 1) {
2794 dav1d_free(f->frame_thread.tile_start_off);
2795 f->frame_thread.tile_start_off =
2796 dav1d_malloc(ALLOC_TILE, sizeof(*f->frame_thread.tile_start_off) * n_ts);
2797 if (!f->frame_thread.tile_start_off) {
2798 f->n_ts = 0;
2799 goto error;
2802 dav1d_free_aligned(f->ts);
2803 f->ts = dav1d_alloc_aligned(ALLOC_TILE, sizeof(*f->ts) * n_ts, 32);
2804 if (!f->ts) goto error;
2805 f->n_ts = n_ts;
2808 const int a_sz = f->sb128w * f->frame_hdr->tiling.rows * (1 + (c->n_fc > 1 && c->n_tc > 1));
2809 if (a_sz != f->a_sz) {
2810 dav1d_free(f->a);
2811 f->a = dav1d_malloc(ALLOC_TILE, sizeof(*f->a) * a_sz);
2812 if (!f->a) {
2813 f->a_sz = 0;
2814 goto error;
2816 f->a_sz = a_sz;
2819 const int num_sb128 = f->sb128w * f->sb128h;
2820 const uint8_t *const size_mul = ss_size_mul[f->cur.p.layout];
2821 const int hbd = !!f->seq_hdr->hbd;
2822 if (c->n_fc > 1) {
2823 const unsigned sb_step4 = f->sb_step * 4;
2824 int tile_idx = 0;
2825 for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
2826 const unsigned row_off = f->frame_hdr->tiling.row_start_sb[tile_row] *
2827 sb_step4 * f->sb128w * 128;
2828 const unsigned b_diff = (f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
2829 f->frame_hdr->tiling.row_start_sb[tile_row]) * sb_step4;
2830 for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
2831 f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff *
2832 f->frame_hdr->tiling.col_start_sb[tile_col] * sb_step4;
2836 const int lowest_pixel_mem_sz = f->frame_hdr->tiling.cols * f->sbh;
2837 if (lowest_pixel_mem_sz != f->tile_thread.lowest_pixel_mem_sz) {
2838 dav1d_free(f->tile_thread.lowest_pixel_mem);
2839 f->tile_thread.lowest_pixel_mem =
2840 dav1d_malloc(ALLOC_TILE, lowest_pixel_mem_sz *
2841 sizeof(*f->tile_thread.lowest_pixel_mem));
2842 if (!f->tile_thread.lowest_pixel_mem) {
2843 f->tile_thread.lowest_pixel_mem_sz = 0;
2844 goto error;
2846 f->tile_thread.lowest_pixel_mem_sz = lowest_pixel_mem_sz;
2848 int (*lowest_pixel_ptr)[7][2] = f->tile_thread.lowest_pixel_mem;
2849 for (int tile_row = 0, tile_row_base = 0; tile_row < f->frame_hdr->tiling.rows;
2850 tile_row++, tile_row_base += f->frame_hdr->tiling.cols)
2852 const int tile_row_sb_h = f->frame_hdr->tiling.row_start_sb[tile_row + 1] -
2853 f->frame_hdr->tiling.row_start_sb[tile_row];
2854 for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
2855 f->ts[tile_row_base + tile_col].lowest_pixel = lowest_pixel_ptr;
2856 lowest_pixel_ptr += tile_row_sb_h;
2860 const int cbi_sz = num_sb128 * size_mul[0];
2861 if (cbi_sz != f->frame_thread.cbi_sz) {
2862 dav1d_free_aligned(f->frame_thread.cbi);
2863 f->frame_thread.cbi =
2864 dav1d_alloc_aligned(ALLOC_BLOCK, sizeof(*f->frame_thread.cbi) *
2865 cbi_sz * 32 * 32 / 4, 64);
2866 if (!f->frame_thread.cbi) {
2867 f->frame_thread.cbi_sz = 0;
2868 goto error;
2870 f->frame_thread.cbi_sz = cbi_sz;
2873 const int cf_sz = (num_sb128 * size_mul[0]) << hbd;
2874 if (cf_sz != f->frame_thread.cf_sz) {
2875 dav1d_free_aligned(f->frame_thread.cf);
2876 f->frame_thread.cf =
2877 dav1d_alloc_aligned(ALLOC_COEF, (size_t)cf_sz * 128 * 128 / 2, 64);
2878 if (!f->frame_thread.cf) {
2879 f->frame_thread.cf_sz = 0;
2880 goto error;
2882 memset(f->frame_thread.cf, 0, (size_t)cf_sz * 128 * 128 / 2);
2883 f->frame_thread.cf_sz = cf_sz;
2886 if (f->frame_hdr->allow_screen_content_tools) {
2887 const int pal_sz = num_sb128 << hbd;
2888 if (pal_sz != f->frame_thread.pal_sz) {
2889 dav1d_free_aligned(f->frame_thread.pal);
2890 f->frame_thread.pal =
2891 dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal) *
2892 pal_sz * 16 * 16, 64);
2893 if (!f->frame_thread.pal) {
2894 f->frame_thread.pal_sz = 0;
2895 goto error;
2897 f->frame_thread.pal_sz = pal_sz;
2900 const int pal_idx_sz = num_sb128 * size_mul[1];
2901 if (pal_idx_sz != f->frame_thread.pal_idx_sz) {
2902 dav1d_free_aligned(f->frame_thread.pal_idx);
2903 f->frame_thread.pal_idx =
2904 dav1d_alloc_aligned(ALLOC_PAL, sizeof(*f->frame_thread.pal_idx) *
2905 pal_idx_sz * 128 * 128 / 8, 64);
2906 if (!f->frame_thread.pal_idx) {
2907 f->frame_thread.pal_idx_sz = 0;
2908 goto error;
2910 f->frame_thread.pal_idx_sz = pal_idx_sz;
2912 } else if (f->frame_thread.pal) {
2913 dav1d_freep_aligned(&f->frame_thread.pal);
2914 dav1d_freep_aligned(&f->frame_thread.pal_idx);
2915 f->frame_thread.pal_sz = f->frame_thread.pal_idx_sz = 0;
2919 // update allocation of block contexts for above
2920 ptrdiff_t y_stride = f->cur.stride[0], uv_stride = f->cur.stride[1];
2921 const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
2922 const int need_cdef_lpf_copy = c->n_tc > 1 && has_resize;
2923 if (y_stride * f->sbh * 4 != f->lf.cdef_buf_plane_sz[0] ||
2924 uv_stride * f->sbh * 8 != f->lf.cdef_buf_plane_sz[1] ||
2925 need_cdef_lpf_copy != f->lf.need_cdef_lpf_copy ||
2926 f->sbh != f->lf.cdef_buf_sbh)
2928 dav1d_free_aligned(f->lf.cdef_line_buf);
2929 size_t alloc_sz = 64;
2930 alloc_sz += (size_t)llabs(y_stride) * 4 * f->sbh << need_cdef_lpf_copy;
2931 alloc_sz += (size_t)llabs(uv_stride) * 8 * f->sbh << need_cdef_lpf_copy;
2932 uint8_t *ptr = f->lf.cdef_line_buf = dav1d_alloc_aligned(ALLOC_CDEF, alloc_sz, 32);
2933 if (!ptr) {
2934 f->lf.cdef_buf_plane_sz[0] = f->lf.cdef_buf_plane_sz[1] = 0;
2935 goto error;
2938 ptr += 32;
2939 if (y_stride < 0) {
2940 f->lf.cdef_line[0][0] = ptr - y_stride * (f->sbh * 4 - 1);
2941 f->lf.cdef_line[1][0] = ptr - y_stride * (f->sbh * 4 - 3);
2942 } else {
2943 f->lf.cdef_line[0][0] = ptr + y_stride * 0;
2944 f->lf.cdef_line[1][0] = ptr + y_stride * 2;
2946 ptr += llabs(y_stride) * f->sbh * 4;
2947 if (uv_stride < 0) {
2948 f->lf.cdef_line[0][1] = ptr - uv_stride * (f->sbh * 8 - 1);
2949 f->lf.cdef_line[0][2] = ptr - uv_stride * (f->sbh * 8 - 3);
2950 f->lf.cdef_line[1][1] = ptr - uv_stride * (f->sbh * 8 - 5);
2951 f->lf.cdef_line[1][2] = ptr - uv_stride * (f->sbh * 8 - 7);
2952 } else {
2953 f->lf.cdef_line[0][1] = ptr + uv_stride * 0;
2954 f->lf.cdef_line[0][2] = ptr + uv_stride * 2;
2955 f->lf.cdef_line[1][1] = ptr + uv_stride * 4;
2956 f->lf.cdef_line[1][2] = ptr + uv_stride * 6;
2959 if (need_cdef_lpf_copy) {
2960 ptr += llabs(uv_stride) * f->sbh * 8;
2961 if (y_stride < 0)
2962 f->lf.cdef_lpf_line[0] = ptr - y_stride * (f->sbh * 4 - 1);
2963 else
2964 f->lf.cdef_lpf_line[0] = ptr;
2965 ptr += llabs(y_stride) * f->sbh * 4;
2966 if (uv_stride < 0) {
2967 f->lf.cdef_lpf_line[1] = ptr - uv_stride * (f->sbh * 4 - 1);
2968 f->lf.cdef_lpf_line[2] = ptr - uv_stride * (f->sbh * 8 - 1);
2969 } else {
2970 f->lf.cdef_lpf_line[1] = ptr;
2971 f->lf.cdef_lpf_line[2] = ptr + uv_stride * f->sbh * 4;
2975 f->lf.cdef_buf_plane_sz[0] = (int) y_stride * f->sbh * 4;
2976 f->lf.cdef_buf_plane_sz[1] = (int) uv_stride * f->sbh * 8;
2977 f->lf.need_cdef_lpf_copy = need_cdef_lpf_copy;
2978 f->lf.cdef_buf_sbh = f->sbh;
2981 const int sb128 = f->seq_hdr->sb128;
2982 const int num_lines = c->n_tc > 1 ? f->sbh * 4 << sb128 : 12;
2983 y_stride = f->sr_cur.p.stride[0], uv_stride = f->sr_cur.p.stride[1];
2984 if (y_stride * num_lines != f->lf.lr_buf_plane_sz[0] ||
2985 uv_stride * num_lines * 2 != f->lf.lr_buf_plane_sz[1])
2987 dav1d_free_aligned(f->lf.lr_line_buf);
2988 // lr simd may overread the input, so slightly over-allocate the lpf buffer
2989 size_t alloc_sz = 128;
2990 alloc_sz += (size_t)llabs(y_stride) * num_lines;
2991 alloc_sz += (size_t)llabs(uv_stride) * num_lines * 2;
2992 uint8_t *ptr = f->lf.lr_line_buf = dav1d_alloc_aligned(ALLOC_LR, alloc_sz, 64);
2993 if (!ptr) {
2994 f->lf.lr_buf_plane_sz[0] = f->lf.lr_buf_plane_sz[1] = 0;
2995 goto error;
2998 ptr += 64;
2999 if (y_stride < 0)
3000 f->lf.lr_lpf_line[0] = ptr - y_stride * (num_lines - 1);
3001 else
3002 f->lf.lr_lpf_line[0] = ptr;
3003 ptr += llabs(y_stride) * num_lines;
3004 if (uv_stride < 0) {
3005 f->lf.lr_lpf_line[1] = ptr - uv_stride * (num_lines * 1 - 1);
3006 f->lf.lr_lpf_line[2] = ptr - uv_stride * (num_lines * 2 - 1);
3007 } else {
3008 f->lf.lr_lpf_line[1] = ptr;
3009 f->lf.lr_lpf_line[2] = ptr + uv_stride * num_lines;
3012 f->lf.lr_buf_plane_sz[0] = (int) y_stride * num_lines;
3013 f->lf.lr_buf_plane_sz[1] = (int) uv_stride * num_lines * 2;
3016 // update allocation for loopfilter masks
3017 if (num_sb128 != f->lf.mask_sz) {
3018 dav1d_free(f->lf.mask);
3019 dav1d_free(f->lf.level);
3020 f->lf.mask = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.mask) * num_sb128);
3021 // over-allocate by 3 bytes since some of the SIMD implementations
3022 // index this from the level type and can thus over-read by up to 3
3023 f->lf.level = dav1d_malloc(ALLOC_LF, sizeof(*f->lf.level) * num_sb128 * 32 * 32 + 3);
3024 if (!f->lf.mask || !f->lf.level) {
3025 f->lf.mask_sz = 0;
3026 goto error;
3028 if (c->n_fc > 1) {
3029 dav1d_free(f->frame_thread.b);
3030 f->frame_thread.b = dav1d_malloc(ALLOC_BLOCK, sizeof(*f->frame_thread.b) *
3031 num_sb128 * 32 * 32);
3032 if (!f->frame_thread.b) {
3033 f->lf.mask_sz = 0;
3034 goto error;
3037 f->lf.mask_sz = num_sb128;
3040 f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7;
3041 const int lr_mask_sz = f->sr_sb128w * f->sb128h;
3042 if (lr_mask_sz != f->lf.lr_mask_sz) {
3043 dav1d_free(f->lf.lr_mask);
3044 f->lf.lr_mask = dav1d_malloc(ALLOC_LR, sizeof(*f->lf.lr_mask) * lr_mask_sz);
3045 if (!f->lf.lr_mask) {
3046 f->lf.lr_mask_sz = 0;
3047 goto error;
3049 f->lf.lr_mask_sz = lr_mask_sz;
3051 f->lf.restore_planes =
3052 ((f->frame_hdr->restoration.type[0] != DAV1D_RESTORATION_NONE) << 0) +
3053 ((f->frame_hdr->restoration.type[1] != DAV1D_RESTORATION_NONE) << 1) +
3054 ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2);
3055 if (f->frame_hdr->loopfilter.sharpness != f->lf.last_sharpness) {
3056 dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr->loopfilter.sharpness);
3057 f->lf.last_sharpness = f->frame_hdr->loopfilter.sharpness;
3059 dav1d_calc_lf_values(f->lf.lvl, f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 });
3060 memset(f->lf.mask, 0, sizeof(*f->lf.mask) * num_sb128);
3062 const int ipred_edge_sz = f->sbh * f->sb128w << hbd;
3063 if (ipred_edge_sz != f->ipred_edge_sz) {
3064 dav1d_free_aligned(f->ipred_edge[0]);
3065 uint8_t *ptr = f->ipred_edge[0] =
3066 dav1d_alloc_aligned(ALLOC_IPRED, ipred_edge_sz * 128 * 3, 64);
3067 if (!ptr) {
3068 f->ipred_edge_sz = 0;
3069 goto error;
3071 f->ipred_edge[1] = ptr + ipred_edge_sz * 128 * 1;
3072 f->ipred_edge[2] = ptr + ipred_edge_sz * 128 * 2;
3073 f->ipred_edge_sz = ipred_edge_sz;
3076 const int re_sz = f->sb128h * f->frame_hdr->tiling.cols;
3077 if (re_sz != f->lf.re_sz) {
3078 dav1d_free(f->lf.tx_lpf_right_edge[0]);
3079 f->lf.tx_lpf_right_edge[0] = dav1d_malloc(ALLOC_LF, re_sz * 32 * 2);
3080 if (!f->lf.tx_lpf_right_edge[0]) {
3081 f->lf.re_sz = 0;
3082 goto error;
3084 f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] + re_sz * 32;
3085 f->lf.re_sz = re_sz;
3088 // init ref mvs
3089 if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
3090 const int ret =
3091 dav1d_refmvs_init_frame(&f->rf, f->seq_hdr, f->frame_hdr,
3092 f->refpoc, f->mvs, f->refrefpoc, f->ref_mvs,
3093 f->c->n_tc, f->c->n_fc);
3094 if (ret < 0) goto error;
3097 // setup dequant tables
3098 init_quant_tables(f->seq_hdr, f->frame_hdr, f->frame_hdr->quant.yac, f->dq);
3099 if (f->frame_hdr->quant.qm)
3100 for (int i = 0; i < N_RECT_TX_SIZES; i++) {
3101 f->qm[i][0] = dav1d_qm_tbl[f->frame_hdr->quant.qm_y][0][i];
3102 f->qm[i][1] = dav1d_qm_tbl[f->frame_hdr->quant.qm_u][1][i];
3103 f->qm[i][2] = dav1d_qm_tbl[f->frame_hdr->quant.qm_v][1][i];
3105 else
3106 memset(f->qm, 0, sizeof(f->qm));
3108 // setup jnt_comp weights
3109 if (f->frame_hdr->switchable_comp_refs) {
3110 for (int i = 0; i < 7; i++) {
3111 const unsigned ref0poc = f->refp[i].p.frame_hdr->frame_offset;
3113 for (int j = i + 1; j < 7; j++) {
3114 const unsigned ref1poc = f->refp[j].p.frame_hdr->frame_offset;
3116 const unsigned d1 =
3117 imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref0poc,
3118 f->cur.frame_hdr->frame_offset)), 31);
3119 const unsigned d0 =
3120 imin(abs(get_poc_diff(f->seq_hdr->order_hint_n_bits, ref1poc,
3121 f->cur.frame_hdr->frame_offset)), 31);
3122 const int order = d0 <= d1;
3124 static const uint8_t quant_dist_weight[3][2] = {
3125 { 2, 3 }, { 2, 5 }, { 2, 7 }
3127 static const uint8_t quant_dist_lookup_table[4][2] = {
3128 { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 }
3131 int k;
3132 for (k = 0; k < 3; k++) {
3133 const int c0 = quant_dist_weight[k][order];
3134 const int c1 = quant_dist_weight[k][!order];
3135 const int d0_c0 = d0 * c0;
3136 const int d1_c1 = d1 * c1;
3137 if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
3140 f->jnt_weights[i][j] = quant_dist_lookup_table[k][order];
3145 /* Init loopfilter pointers. Increasing NULL pointers is technically UB,
3146 * so just point the chroma pointers in 4:0:0 to the luma plane here to
3147 * avoid having additional in-loop branches in various places. We never
3148 * dereference those pointers so it doesn't really matter what they
3149 * point at, as long as the pointers are valid. */
3150 const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
3151 f->lf.p[0] = f->cur.data[0];
3152 f->lf.p[1] = f->cur.data[has_chroma ? 1 : 0];
3153 f->lf.p[2] = f->cur.data[has_chroma ? 2 : 0];
3154 f->lf.sr_p[0] = f->sr_cur.p.data[0];
3155 f->lf.sr_p[1] = f->sr_cur.p.data[has_chroma ? 1 : 0];
3156 f->lf.sr_p[2] = f->sr_cur.p.data[has_chroma ? 2 : 0];
3158 retval = 0;
3159 error:
3160 return retval;
3163 int dav1d_decode_frame_init_cdf(Dav1dFrameContext *const f) {
3164 const Dav1dContext *const c = f->c;
3165 int retval = DAV1D_ERR(EINVAL);
3167 if (f->frame_hdr->refresh_context)
3168 dav1d_cdf_thread_copy(f->out_cdf.data.cdf, &f->in_cdf);
3170 // parse individual tiles per tile group
3171 int tile_row = 0, tile_col = 0;
3172 f->task_thread.update_set = 0;
3173 for (int i = 0; i < f->n_tile_data; i++) {
3174 const uint8_t *data = f->tile[i].data.data;
3175 size_t size = f->tile[i].data.sz;
3177 for (int j = f->tile[i].start; j <= f->tile[i].end; j++) {
3178 size_t tile_sz;
3179 if (j == f->tile[i].end) {
3180 tile_sz = size;
3181 } else {
3182 if (f->frame_hdr->tiling.n_bytes > size) goto error;
3183 tile_sz = 0;
3184 for (unsigned k = 0; k < f->frame_hdr->tiling.n_bytes; k++)
3185 tile_sz |= (unsigned)*data++ << (k * 8);
3186 tile_sz++;
3187 size -= f->frame_hdr->tiling.n_bytes;
3188 if (tile_sz > size) goto error;
3191 setup_tile(&f->ts[j], f, data, tile_sz, tile_row, tile_col++,
3192 c->n_fc > 1 ? f->frame_thread.tile_start_off[j] : 0);
3194 if (tile_col == f->frame_hdr->tiling.cols) {
3195 tile_col = 0;
3196 tile_row++;
3198 if (j == f->frame_hdr->tiling.update && f->frame_hdr->refresh_context)
3199 f->task_thread.update_set = 1;
3200 data += tile_sz;
3201 size -= tile_sz;
3205 if (c->n_tc > 1) {
3206 const int uses_2pass = c->n_fc > 1;
3207 for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows * (1 + uses_2pass); n++)
3208 reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr),
3209 uses_2pass ? 1 + (n >= f->sb128w * f->frame_hdr->tiling.rows) : 0);
3212 retval = 0;
3213 error:
3214 return retval;
3217 int dav1d_decode_frame_main(Dav1dFrameContext *const f) {
3218 const Dav1dContext *const c = f->c;
3219 int retval = DAV1D_ERR(EINVAL);
3221 assert(f->c->n_tc == 1);
3223 Dav1dTaskContext *const t = &c->tc[f - c->fc];
3224 t->f = f;
3225 t->frame_thread.pass = 0;
3227 for (int n = 0; n < f->sb128w * f->frame_hdr->tiling.rows; n++)
3228 reset_context(&f->a[n], IS_KEY_OR_INTRA(f->frame_hdr), 0);
3230 // no threading - we explicitly interleave tile/sbrow decoding
3231 // and post-filtering, so that the full process runs in-line
3232 for (int tile_row = 0; tile_row < f->frame_hdr->tiling.rows; tile_row++) {
3233 const int sbh_end =
3234 imin(f->frame_hdr->tiling.row_start_sb[tile_row + 1], f->sbh);
3235 for (int sby = f->frame_hdr->tiling.row_start_sb[tile_row];
3236 sby < sbh_end; sby++)
3238 t->by = sby << (4 + f->seq_hdr->sb128);
3239 const int by_end = (t->by + f->sb_step) >> 1;
3240 if (f->frame_hdr->use_ref_frame_mvs) {
3241 f->c->refmvs_dsp.load_tmvs(&f->rf, tile_row,
3242 0, f->bw >> 1, t->by >> 1, by_end);
3244 for (int tile_col = 0; tile_col < f->frame_hdr->tiling.cols; tile_col++) {
3245 t->ts = &f->ts[tile_row * f->frame_hdr->tiling.cols + tile_col];
3246 if (dav1d_decode_tile_sbrow(t)) goto error;
3248 if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
3249 dav1d_refmvs_save_tmvs(&f->c->refmvs_dsp, &t->rt,
3250 0, f->bw >> 1, t->by >> 1, by_end);
3253 // loopfilter + cdef + restoration
3254 f->bd_fn.filter_sbrow(f, sby);
3258 retval = 0;
3259 error:
3260 return retval;
3263 void dav1d_decode_frame_exit(Dav1dFrameContext *const f, int retval) {
3264 const Dav1dContext *const c = f->c;
3266 if (f->sr_cur.p.data[0])
3267 atomic_init(&f->task_thread.error, 0);
3269 if (c->n_fc > 1 && retval && f->frame_thread.cf) {
3270 memset(f->frame_thread.cf, 0,
3271 (size_t)f->frame_thread.cf_sz * 128 * 128 / 2);
3273 for (int i = 0; i < 7; i++) {
3274 if (f->refp[i].p.frame_hdr) {
3275 if (!retval && c->n_fc > 1 && c->strict_std_compliance &&
3276 atomic_load(&f->refp[i].progress[1]) == FRAME_ERROR)
3278 retval = DAV1D_ERR(EINVAL);
3279 atomic_store(&f->task_thread.error, 1);
3280 atomic_store(&f->sr_cur.progress[1], FRAME_ERROR);
3282 dav1d_thread_picture_unref(&f->refp[i]);
3284 dav1d_ref_dec(&f->ref_mvs_ref[i]);
3287 dav1d_picture_unref_internal(&f->cur);
3288 dav1d_thread_picture_unref(&f->sr_cur);
3289 dav1d_cdf_thread_unref(&f->in_cdf);
3290 if (f->frame_hdr && f->frame_hdr->refresh_context) {
3291 if (f->out_cdf.progress)
3292 atomic_store(f->out_cdf.progress, retval == 0 ? 1 : TILE_ERROR);
3293 dav1d_cdf_thread_unref(&f->out_cdf);
3295 dav1d_ref_dec(&f->cur_segmap_ref);
3296 dav1d_ref_dec(&f->prev_segmap_ref);
3297 dav1d_ref_dec(&f->mvs_ref);
3298 dav1d_ref_dec(&f->seq_hdr_ref);
3299 dav1d_ref_dec(&f->frame_hdr_ref);
3301 for (int i = 0; i < f->n_tile_data; i++)
3302 dav1d_data_unref_internal(&f->tile[i].data);
3303 f->task_thread.retval = retval;
3306 int dav1d_decode_frame(Dav1dFrameContext *const f) {
3307 assert(f->c->n_fc == 1);
3308 // if n_tc > 1 (but n_fc == 1), we could run init/exit in the task
3309 // threads also. Not sure it makes a measurable difference.
3310 int res = dav1d_decode_frame_init(f);
3311 if (!res) res = dav1d_decode_frame_init_cdf(f);
3312 // wait until all threads have completed
3313 if (!res) {
3314 if (f->c->n_tc > 1) {
3315 res = dav1d_task_create_tile_sbrow(f, 0, 1);
3316 pthread_mutex_lock(&f->task_thread.ttd->lock);
3317 pthread_cond_signal(&f->task_thread.ttd->cond);
3318 if (!res) {
3319 while (!f->task_thread.done[0] ||
3320 atomic_load(&f->task_thread.task_counter) > 0)
3322 pthread_cond_wait(&f->task_thread.cond,
3323 &f->task_thread.ttd->lock);
3326 pthread_mutex_unlock(&f->task_thread.ttd->lock);
3327 res = f->task_thread.retval;
3328 } else {
3329 res = dav1d_decode_frame_main(f);
3330 if (!res && f->frame_hdr->refresh_context && f->task_thread.update_set) {
3331 dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
3332 &f->ts[f->frame_hdr->tiling.update].cdf);
3336 dav1d_decode_frame_exit(f, res);
3337 res = f->task_thread.retval;
3338 f->n_tile_data = 0;
3339 return res;
3342 static int get_upscale_x0(const int in_w, const int out_w, const int step) {
3343 const int err = out_w * step - (in_w << 14);
3344 const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err / 2);
3345 return x0 & 0x3fff;
3348 int dav1d_submit_frame(Dav1dContext *const c) {
3349 Dav1dFrameContext *f;
3350 int res = -1;
3352 // wait for c->out_delayed[next] and move into c->out if visible
3353 Dav1dThreadPicture *out_delayed;
3354 if (c->n_fc > 1) {
3355 pthread_mutex_lock(&c->task_thread.lock);
3356 const unsigned next = c->frame_thread.next++;
3357 if (c->frame_thread.next == c->n_fc)
3358 c->frame_thread.next = 0;
3360 f = &c->fc[next];
3361 while (f->n_tile_data > 0)
3362 pthread_cond_wait(&f->task_thread.cond,
3363 &c->task_thread.lock);
3364 out_delayed = &c->frame_thread.out_delayed[next];
3365 if (out_delayed->p.data[0] || atomic_load(&f->task_thread.error)) {
3366 unsigned first = atomic_load(&c->task_thread.first);
3367 if (first + 1U < c->n_fc)
3368 atomic_fetch_add(&c->task_thread.first, 1U);
3369 else
3370 atomic_store(&c->task_thread.first, 0);
3371 atomic_compare_exchange_strong(&c->task_thread.reset_task_cur,
3372 &first, UINT_MAX);
3373 if (c->task_thread.cur && c->task_thread.cur < c->n_fc)
3374 c->task_thread.cur--;
3376 const int error = f->task_thread.retval;
3377 if (error) {
3378 f->task_thread.retval = 0;
3379 c->cached_error = error;
3380 dav1d_data_props_copy(&c->cached_error_props, &out_delayed->p.m);
3381 dav1d_thread_picture_unref(out_delayed);
3382 } else if (out_delayed->p.data[0]) {
3383 const unsigned progress = atomic_load_explicit(&out_delayed->progress[1],
3384 memory_order_relaxed);
3385 if ((out_delayed->visible || c->output_invisible_frames) &&
3386 progress != FRAME_ERROR)
3388 dav1d_thread_picture_ref(&c->out, out_delayed);
3389 c->event_flags |= dav1d_picture_get_event_flags(out_delayed);
3391 dav1d_thread_picture_unref(out_delayed);
3393 } else {
3394 f = c->fc;
3397 f->seq_hdr = c->seq_hdr;
3398 f->seq_hdr_ref = c->seq_hdr_ref;
3399 dav1d_ref_inc(f->seq_hdr_ref);
3400 f->frame_hdr = c->frame_hdr;
3401 f->frame_hdr_ref = c->frame_hdr_ref;
3402 c->frame_hdr = NULL;
3403 c->frame_hdr_ref = NULL;
3404 f->dsp = &c->dsp[f->seq_hdr->hbd];
3406 const int bpc = 8 + 2 * f->seq_hdr->hbd;
3408 if (!f->dsp->ipred.intra_pred[DC_PRED]) {
3409 Dav1dDSPContext *const dsp = &c->dsp[f->seq_hdr->hbd];
3411 switch (bpc) {
3412 #define assign_bitdepth_case(bd) \
3413 dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
3414 dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
3415 dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
3416 dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
3417 dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
3418 dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
3419 dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
3420 break
3421 #if CONFIG_8BPC
3422 case 8:
3423 assign_bitdepth_case(8);
3424 #endif
3425 #if CONFIG_16BPC
3426 case 10:
3427 case 12:
3428 assign_bitdepth_case(16);
3429 #endif
3430 #undef assign_bitdepth_case
3431 default:
3432 dav1d_log(c, "Compiled without support for %d-bit decoding\n",
3433 8 + 2 * f->seq_hdr->hbd);
3434 res = DAV1D_ERR(ENOPROTOOPT);
3435 goto error;
3439 #define assign_bitdepth_case(bd) \
3440 f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
3441 f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
3442 f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
3443 f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
3444 f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
3445 f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
3446 f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
3447 f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
3448 f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
3449 f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
3450 f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
3451 f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
3452 f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
3453 f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
3454 if (!f->seq_hdr->hbd) {
3455 #if CONFIG_8BPC
3456 assign_bitdepth_case(8);
3457 #endif
3458 } else {
3459 #if CONFIG_16BPC
3460 assign_bitdepth_case(16);
3461 #endif
3463 #undef assign_bitdepth_case
3465 int ref_coded_width[7];
3466 if (IS_INTER_OR_SWITCH(f->frame_hdr)) {
3467 if (f->frame_hdr->primary_ref_frame != DAV1D_PRIMARY_REF_NONE) {
3468 const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
3469 if (!c->refs[pri_ref].p.p.data[0]) {
3470 res = DAV1D_ERR(EINVAL);
3471 goto error;
3474 for (int i = 0; i < 7; i++) {
3475 const int refidx = f->frame_hdr->refidx[i];
3476 if (!c->refs[refidx].p.p.data[0] ||
3477 f->frame_hdr->width[0] * 2 < c->refs[refidx].p.p.p.w ||
3478 f->frame_hdr->height * 2 < c->refs[refidx].p.p.p.h ||
3479 f->frame_hdr->width[0] > c->refs[refidx].p.p.p.w * 16 ||
3480 f->frame_hdr->height > c->refs[refidx].p.p.p.h * 16 ||
3481 f->seq_hdr->layout != c->refs[refidx].p.p.p.layout ||
3482 bpc != c->refs[refidx].p.p.p.bpc)
3484 for (int j = 0; j < i; j++)
3485 dav1d_thread_picture_unref(&f->refp[j]);
3486 res = DAV1D_ERR(EINVAL);
3487 goto error;
3489 dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
3490 ref_coded_width[i] = c->refs[refidx].p.p.frame_hdr->width[0];
3491 if (f->frame_hdr->width[0] != c->refs[refidx].p.p.p.w ||
3492 f->frame_hdr->height != c->refs[refidx].p.p.p.h)
3494 #define scale_fac(ref_sz, this_sz) \
3495 ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
3496 f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
3497 f->frame_hdr->width[0]);
3498 f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
3499 f->frame_hdr->height);
3500 f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
3501 f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
3502 } else {
3503 f->svc[i][0].scale = f->svc[i][1].scale = 0;
3505 f->gmv_warp_allowed[i] = f->frame_hdr->gmv[i].type > DAV1D_WM_TYPE_TRANSLATION &&
3506 !f->frame_hdr->force_integer_mv &&
3507 !dav1d_get_shear_params(&f->frame_hdr->gmv[i]) &&
3508 !f->svc[i][0].scale;
3512 // setup entropy
3513 if (f->frame_hdr->primary_ref_frame == DAV1D_PRIMARY_REF_NONE) {
3514 dav1d_cdf_thread_init_static(&f->in_cdf, f->frame_hdr->quant.yac);
3515 } else {
3516 const int pri_ref = f->frame_hdr->refidx[f->frame_hdr->primary_ref_frame];
3517 dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
3519 if (f->frame_hdr->refresh_context) {
3520 res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1);
3521 if (res < 0) goto error;
3524 // FIXME qsort so tiles are in order (for frame threading)
3525 if (f->n_tile_data_alloc < c->n_tile_data) {
3526 dav1d_free(f->tile);
3527 assert(c->n_tile_data < INT_MAX / (int)sizeof(*f->tile));
3528 f->tile = dav1d_malloc(ALLOC_TILE, c->n_tile_data * sizeof(*f->tile));
3529 if (!f->tile) {
3530 f->n_tile_data_alloc = f->n_tile_data = 0;
3531 res = DAV1D_ERR(ENOMEM);
3532 goto error;
3534 f->n_tile_data_alloc = c->n_tile_data;
3536 memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile));
3537 memset(c->tile, 0, c->n_tile_data * sizeof(*c->tile));
3538 f->n_tile_data = c->n_tile_data;
3539 c->n_tile_data = 0;
3541 // allocate frame
3542 res = dav1d_thread_picture_alloc(c, f, bpc);
3543 if (res < 0) goto error;
3545 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
3546 res = dav1d_picture_alloc_copy(c, &f->cur, f->frame_hdr->width[0], &f->sr_cur.p);
3547 if (res < 0) goto error;
3548 } else {
3549 dav1d_picture_ref(&f->cur, &f->sr_cur.p);
3552 if (f->frame_hdr->width[0] != f->frame_hdr->width[1]) {
3553 f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w);
3554 const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
3555 const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor;
3556 const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
3557 f->resize_step[1] = scale_fac(in_cw, out_cw);
3558 #undef scale_fac
3559 f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]);
3560 f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]);
3563 // move f->cur into output queue
3564 if (c->n_fc == 1) {
3565 if (f->frame_hdr->show_frame || c->output_invisible_frames) {
3566 dav1d_thread_picture_ref(&c->out, &f->sr_cur);
3567 c->event_flags |= dav1d_picture_get_event_flags(&f->sr_cur);
3569 } else {
3570 dav1d_thread_picture_ref(out_delayed, &f->sr_cur);
3573 f->w4 = (f->frame_hdr->width[0] + 3) >> 2;
3574 f->h4 = (f->frame_hdr->height + 3) >> 2;
3575 f->bw = ((f->frame_hdr->width[0] + 7) >> 3) << 1;
3576 f->bh = ((f->frame_hdr->height + 7) >> 3) << 1;
3577 f->sb128w = (f->bw + 31) >> 5;
3578 f->sb128h = (f->bh + 31) >> 5;
3579 f->sb_shift = 4 + f->seq_hdr->sb128;
3580 f->sb_step = 16 << f->seq_hdr->sb128;
3581 f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
3582 f->b4_stride = (f->bw + 31) & ~31;
3583 f->bitdepth_max = (1 << f->cur.p.bpc) - 1;
3584 atomic_init(&f->task_thread.error, 0);
3585 const int uses_2pass = c->n_fc > 1;
3586 const int cols = f->frame_hdr->tiling.cols;
3587 const int rows = f->frame_hdr->tiling.rows;
3588 atomic_store(&f->task_thread.task_counter,
3589 (cols * rows + f->sbh) << uses_2pass);
3591 // ref_mvs
3592 if (IS_INTER_OR_SWITCH(f->frame_hdr) || f->frame_hdr->allow_intrabc) {
3593 f->mvs_ref = dav1d_ref_create_using_pool(c->refmvs_pool,
3594 sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1));
3595 if (!f->mvs_ref) {
3596 res = DAV1D_ERR(ENOMEM);
3597 goto error;
3599 f->mvs = f->mvs_ref->data;
3600 if (!f->frame_hdr->allow_intrabc) {
3601 for (int i = 0; i < 7; i++)
3602 f->refpoc[i] = f->refp[i].p.frame_hdr->frame_offset;
3603 } else {
3604 memset(f->refpoc, 0, sizeof(f->refpoc));
3606 if (f->frame_hdr->use_ref_frame_mvs) {
3607 for (int i = 0; i < 7; i++) {
3608 const int refidx = f->frame_hdr->refidx[i];
3609 const int ref_w = ((ref_coded_width[i] + 7) >> 3) << 1;
3610 const int ref_h = ((f->refp[i].p.p.h + 7) >> 3) << 1;
3611 if (c->refs[refidx].refmvs != NULL &&
3612 ref_w == f->bw && ref_h == f->bh)
3614 f->ref_mvs_ref[i] = c->refs[refidx].refmvs;
3615 dav1d_ref_inc(f->ref_mvs_ref[i]);
3616 f->ref_mvs[i] = c->refs[refidx].refmvs->data;
3617 } else {
3618 f->ref_mvs[i] = NULL;
3619 f->ref_mvs_ref[i] = NULL;
3621 memcpy(f->refrefpoc[i], c->refs[refidx].refpoc,
3622 sizeof(*f->refrefpoc));
3624 } else {
3625 memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
3627 } else {
3628 f->mvs_ref = NULL;
3629 memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
3632 // segmap
3633 if (f->frame_hdr->segmentation.enabled) {
3634 // By default, the previous segmentation map is not initialised.
3635 f->prev_segmap_ref = NULL;
3636 f->prev_segmap = NULL;
3638 // We might need a previous frame's segmentation map. This
3639 // happens if there is either no update or a temporal update.
3640 if (f->frame_hdr->segmentation.temporal || !f->frame_hdr->segmentation.update_map) {
3641 const int pri_ref = f->frame_hdr->primary_ref_frame;
3642 assert(pri_ref != DAV1D_PRIMARY_REF_NONE);
3643 const int ref_w = ((ref_coded_width[pri_ref] + 7) >> 3) << 1;
3644 const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1;
3645 if (ref_w == f->bw && ref_h == f->bh) {
3646 f->prev_segmap_ref = c->refs[f->frame_hdr->refidx[pri_ref]].segmap;
3647 if (f->prev_segmap_ref) {
3648 dav1d_ref_inc(f->prev_segmap_ref);
3649 f->prev_segmap = f->prev_segmap_ref->data;
3654 if (f->frame_hdr->segmentation.update_map) {
3655 // We're updating an existing map, but need somewhere to
3656 // put the new values. Allocate them here (the data
3657 // actually gets set elsewhere)
3658 f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool,
3659 sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h);
3660 if (!f->cur_segmap_ref) {
3661 dav1d_ref_dec(&f->prev_segmap_ref);
3662 res = DAV1D_ERR(ENOMEM);
3663 goto error;
3665 f->cur_segmap = f->cur_segmap_ref->data;
3666 } else if (f->prev_segmap_ref) {
3667 // We're not updating an existing map, and we have a valid
3668 // reference. Use that.
3669 f->cur_segmap_ref = f->prev_segmap_ref;
3670 dav1d_ref_inc(f->cur_segmap_ref);
3671 f->cur_segmap = f->prev_segmap_ref->data;
3672 } else {
3673 // We need to make a new map. Allocate one here and zero it out.
3674 const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h;
3675 f->cur_segmap_ref = dav1d_ref_create_using_pool(c->segmap_pool, segmap_size);
3676 if (!f->cur_segmap_ref) {
3677 res = DAV1D_ERR(ENOMEM);
3678 goto error;
3680 f->cur_segmap = f->cur_segmap_ref->data;
3681 memset(f->cur_segmap, 0, segmap_size);
3683 } else {
3684 f->cur_segmap = NULL;
3685 f->cur_segmap_ref = NULL;
3686 f->prev_segmap_ref = NULL;
3689 // update references etc.
3690 const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags;
3691 for (int i = 0; i < 8; i++) {
3692 if (refresh_frame_flags & (1 << i)) {
3693 if (c->refs[i].p.p.frame_hdr)
3694 dav1d_thread_picture_unref(&c->refs[i].p);
3695 dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);
3697 dav1d_cdf_thread_unref(&c->cdf[i]);
3698 if (f->frame_hdr->refresh_context) {
3699 dav1d_cdf_thread_ref(&c->cdf[i], &f->out_cdf);
3700 } else {
3701 dav1d_cdf_thread_ref(&c->cdf[i], &f->in_cdf);
3704 dav1d_ref_dec(&c->refs[i].segmap);
3705 c->refs[i].segmap = f->cur_segmap_ref;
3706 if (f->cur_segmap_ref)
3707 dav1d_ref_inc(f->cur_segmap_ref);
3708 dav1d_ref_dec(&c->refs[i].refmvs);
3709 if (!f->frame_hdr->allow_intrabc) {
3710 c->refs[i].refmvs = f->mvs_ref;
3711 if (f->mvs_ref)
3712 dav1d_ref_inc(f->mvs_ref);
3714 memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc));
3718 if (c->n_fc == 1) {
3719 if ((res = dav1d_decode_frame(f)) < 0) {
3720 dav1d_thread_picture_unref(&c->out);
3721 for (int i = 0; i < 8; i++) {
3722 if (refresh_frame_flags & (1 << i)) {
3723 if (c->refs[i].p.p.frame_hdr)
3724 dav1d_thread_picture_unref(&c->refs[i].p);
3725 dav1d_cdf_thread_unref(&c->cdf[i]);
3726 dav1d_ref_dec(&c->refs[i].segmap);
3727 dav1d_ref_dec(&c->refs[i].refmvs);
3730 goto error;
3732 } else {
3733 dav1d_task_frame_init(f);
3734 pthread_mutex_unlock(&c->task_thread.lock);
3737 return 0;
3738 error:
3739 atomic_init(&f->task_thread.error, 1);
3740 dav1d_cdf_thread_unref(&f->in_cdf);
3741 if (f->frame_hdr->refresh_context)
3742 dav1d_cdf_thread_unref(&f->out_cdf);
3743 for (int i = 0; i < 7; i++) {
3744 if (f->refp[i].p.frame_hdr)
3745 dav1d_thread_picture_unref(&f->refp[i]);
3746 dav1d_ref_dec(&f->ref_mvs_ref[i]);
3748 if (c->n_fc == 1)
3749 dav1d_thread_picture_unref(&c->out);
3750 else
3751 dav1d_thread_picture_unref(out_delayed);
3752 dav1d_picture_unref_internal(&f->cur);
3753 dav1d_thread_picture_unref(&f->sr_cur);
3754 dav1d_ref_dec(&f->mvs_ref);
3755 dav1d_ref_dec(&f->seq_hdr_ref);
3756 dav1d_ref_dec(&f->frame_hdr_ref);
3757 dav1d_data_props_copy(&c->cached_error_props, &c->in.m);
3759 for (int i = 0; i < f->n_tile_data; i++)
3760 dav1d_data_unref_internal(&f->tile[i].data);
3761 f->n_tile_data = 0;
3763 if (c->n_fc > 1)
3764 pthread_mutex_unlock(&c->task_thread.lock);
3766 return res;