2 * Copyright © 2018-2021, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include "dav1d/data.h"
38 #include "common/frame.h"
39 #include "common/intops.h"
42 #include "src/decode.h"
43 #include "src/dequant_tables.h"
45 #include "src/filmgrain.h"
48 #include "src/recon.h"
50 #include "src/tables.h"
51 #include "src/thread_task.h"
52 #include "src/warpmv.h"
54 static void init_quant_tables(const Dav1dSequenceHeader
*const seq_hdr
,
55 const Dav1dFrameHeader
*const frame_hdr
,
56 const int qidx
, uint16_t (*dq
)[3][2])
58 for (int i
= 0; i
< (frame_hdr
->segmentation
.enabled
? 8 : 1); i
++) {
59 const int yac
= frame_hdr
->segmentation
.enabled
?
60 iclip_u8(qidx
+ frame_hdr
->segmentation
.seg_data
.d
[i
].delta_q
) : qidx
;
61 const int ydc
= iclip_u8(yac
+ frame_hdr
->quant
.ydc_delta
);
62 const int uac
= iclip_u8(yac
+ frame_hdr
->quant
.uac_delta
);
63 const int udc
= iclip_u8(yac
+ frame_hdr
->quant
.udc_delta
);
64 const int vac
= iclip_u8(yac
+ frame_hdr
->quant
.vac_delta
);
65 const int vdc
= iclip_u8(yac
+ frame_hdr
->quant
.vdc_delta
);
67 dq
[i
][0][0] = dav1d_dq_tbl
[seq_hdr
->hbd
][ydc
][0];
68 dq
[i
][0][1] = dav1d_dq_tbl
[seq_hdr
->hbd
][yac
][1];
69 dq
[i
][1][0] = dav1d_dq_tbl
[seq_hdr
->hbd
][udc
][0];
70 dq
[i
][1][1] = dav1d_dq_tbl
[seq_hdr
->hbd
][uac
][1];
71 dq
[i
][2][0] = dav1d_dq_tbl
[seq_hdr
->hbd
][vdc
][0];
72 dq
[i
][2][1] = dav1d_dq_tbl
[seq_hdr
->hbd
][vac
][1];
76 static int read_mv_component_diff(MsacContext
*const msac
,
77 CdfMvComponent
*const mv_comp
,
80 const int sign
= dav1d_msac_decode_bool_adapt(msac
, mv_comp
->sign
);
81 const int cl
= dav1d_msac_decode_symbol_adapt16(msac
, mv_comp
->classes
, 10);
82 int up
, fp
= 3, hp
= 1;
85 up
= dav1d_msac_decode_bool_adapt(msac
, mv_comp
->class0
);
86 if (mv_prec
>= 0) { // !force_integer_mv
87 fp
= dav1d_msac_decode_symbol_adapt4(msac
, mv_comp
->class0_fp
[up
], 3);
88 if (mv_prec
> 0) // allow_high_precision_mv
89 hp
= dav1d_msac_decode_bool_adapt(msac
, mv_comp
->class0_hp
);
93 for (int n
= 0; n
< cl
; n
++)
94 up
|= dav1d_msac_decode_bool_adapt(msac
, mv_comp
->classN
[n
]) << n
;
95 if (mv_prec
>= 0) { // !force_integer_mv
96 fp
= dav1d_msac_decode_symbol_adapt4(msac
, mv_comp
->classN_fp
, 3);
97 if (mv_prec
> 0) // allow_high_precision_mv
98 hp
= dav1d_msac_decode_bool_adapt(msac
, mv_comp
->classN_hp
);
102 const int diff
= ((up
<< 3) | (fp
<< 1) | hp
) + 1;
104 return sign
? -diff
: diff
;
107 static void read_mv_residual(Dav1dTileState
*const ts
, mv
*const ref_mv
,
110 MsacContext
*const msac
= &ts
->msac
;
111 const enum MVJoint mv_joint
=
112 dav1d_msac_decode_symbol_adapt4(msac
, ts
->cdf
.mv
.joint
, N_MV_JOINTS
- 1);
113 if (mv_joint
& MV_JOINT_V
)
114 ref_mv
->y
+= read_mv_component_diff(msac
, &ts
->cdf
.mv
.comp
[0], mv_prec
);
115 if (mv_joint
& MV_JOINT_H
)
116 ref_mv
->x
+= read_mv_component_diff(msac
, &ts
->cdf
.mv
.comp
[1], mv_prec
);
119 static void read_tx_tree(Dav1dTaskContext
*const t
,
120 const enum RectTxfmSize from
,
121 const int depth
, uint16_t *const masks
,
122 const int x_off
, const int y_off
)
124 const Dav1dFrameContext
*const f
= t
->f
;
125 const int bx4
= t
->bx
& 31, by4
= t
->by
& 31;
126 const TxfmInfo
*const t_dim
= &dav1d_txfm_dimensions
[from
];
127 const int txw
= t_dim
->lw
, txh
= t_dim
->lh
;
130 if (depth
< 2 && from
> (int) TX_4X4
) {
131 const int cat
= 2 * (TX_64X64
- t_dim
->max
) - depth
;
132 const int a
= t
->a
->tx
[bx4
] < txw
;
133 const int l
= t
->l
.tx
[by4
] < txh
;
135 is_split
= dav1d_msac_decode_bool_adapt(&t
->ts
->msac
,
136 t
->ts
->cdf
.m
.txpart
[cat
][a
+ l
]);
138 masks
[depth
] |= 1 << (y_off
* 4 + x_off
);
143 if (is_split
&& t_dim
->max
> TX_8X8
) {
144 const enum RectTxfmSize sub
= t_dim
->sub
;
145 const TxfmInfo
*const sub_t_dim
= &dav1d_txfm_dimensions
[sub
];
146 const int txsw
= sub_t_dim
->w
, txsh
= sub_t_dim
->h
;
148 read_tx_tree(t
, sub
, depth
+ 1, masks
, x_off
* 2 + 0, y_off
* 2 + 0);
150 if (txw
>= txh
&& t
->bx
< f
->bw
)
151 read_tx_tree(t
, sub
, depth
+ 1, masks
, x_off
* 2 + 1, y_off
* 2 + 0);
154 if (txh
>= txw
&& t
->by
< f
->bh
) {
155 read_tx_tree(t
, sub
, depth
+ 1, masks
, x_off
* 2 + 0, y_off
* 2 + 1);
157 if (txw
>= txh
&& t
->bx
< f
->bw
)
158 read_tx_tree(t
, sub
, depth
+ 1, masks
,
159 x_off
* 2 + 1, y_off
* 2 + 1);
164 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
165 rep_macro(type, t->dir tx, off, is_split ? TX_4X4 : mul * txh)
166 case_set_upto16(t_dim
->h
, l
., 1, by4
);
168 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
169 rep_macro(type, t->dir tx, off, is_split ? TX_4X4 : mul * txw)
170 case_set_upto16(t_dim
->w
, a
->, 0, bx4
);
175 static int neg_deinterleave(int diff
, int ref
, int max
) {
176 if (!ref
) return diff
;
177 if (ref
>= (max
- 1)) return max
- diff
- 1;
179 if (diff
<= 2 * ref
) {
181 return ref
+ ((diff
+ 1) >> 1);
183 return ref
- (diff
>> 1);
187 if (diff
<= 2 * (max
- ref
- 1)) {
189 return ref
+ ((diff
+ 1) >> 1);
191 return ref
- (diff
>> 1);
193 return max
- (diff
+ 1);
197 static void find_matching_ref(const Dav1dTaskContext
*const t
,
198 const enum EdgeFlags intra_edge_flags
,
199 const int bw4
, const int bh4
,
200 const int w4
, const int h4
,
201 const int have_left
, const int have_top
,
202 const int ref
, uint64_t masks
[2])
204 /*const*/ refmvs_block
*const *r
= &t
->rt
.r
[(t
->by
& 31) + 5];
206 int have_topleft
= have_top
&& have_left
;
207 int have_topright
= imax(bw4
, bh4
) < 32 &&
208 have_top
&& t
->bx
+ bw4
< t
->ts
->tiling
.col_end
&&
209 (intra_edge_flags
& EDGE_I444_TOP_HAS_RIGHT
);
211 #define bs(rp) dav1d_block_dimensions[(rp)->bs]
212 #define matches(rp) ((rp)->ref.ref[0] == ref + 1 && (rp)->ref.ref[1] == -1)
215 const refmvs_block
*r2
= &r
[-1][t
->bx
];
222 const int off
= t
->bx
& (aw4
- 1);
223 if (off
) have_topleft
= 0;
224 if (aw4
- off
> bw4
) have_topright
= 0;
226 unsigned mask
= 1 << aw4
;
227 for (int x
= aw4
; x
< w4
; x
+= aw4
) {
231 if (++count
>= 8) return;
239 /*const*/ refmvs_block
*const *r2
= r
;
240 if (matches(&r2
[0][t
->bx
- 1])) {
242 if (++count
>= 8) return;
244 int lh4
= bs(&r2
[0][t
->bx
- 1])[1];
246 if (t
->by
& (lh4
- 1)) have_topleft
= 0;
248 unsigned mask
= 1 << lh4
;
249 for (int y
= lh4
; y
< h4
; y
+= lh4
) {
251 if (matches(&r2
[0][t
->bx
- 1])) {
253 if (++count
>= 8) return;
255 lh4
= bs(&r2
[0][t
->bx
- 1])[1];
260 if (have_topleft
&& matches(&r
[-1][t
->bx
- 1])) {
261 masks
[1] |= 1ULL << 32;
262 if (++count
>= 8) return;
264 if (have_topright
&& matches(&r
[-1][t
->bx
+ bw4
])) {
265 masks
[0] |= 1ULL << 32;
270 static void derive_warpmv(const Dav1dTaskContext
*const t
,
271 const int bw4
, const int bh4
,
272 const uint64_t masks
[2], const union mv mv
,
273 Dav1dWarpedMotionParams
*const wmp
)
275 int pts
[8][2 /* in, out */][2 /* x, y */], np
= 0;
276 /*const*/ refmvs_block
*const *r
= &t
->rt
.r
[(t
->by
& 31) + 5];
278 #define add_sample(dx, dy, sx, sy, rp) do { \
279 pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
280 pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
281 pts[np][1][0] = pts[np][0][0] + (rp)->mv.mv[0].x; \
282 pts[np][1][1] = pts[np][0][1] + (rp)->mv.mv[0].y; \
286 // use masks[] to find the projectable motion vectors in the edges
287 if ((unsigned) masks
[0] == 1 && !(masks
[1] >> 32)) {
288 const int off
= t
->bx
& (bs(&r
[-1][t
->bx
])[0] - 1);
289 add_sample(-off
, 0, 1, -1, &r
[-1][t
->bx
]);
290 } else for (unsigned off
= 0, xmask
= (uint32_t) masks
[0]; np
< 8 && xmask
;) { // top
291 const int tz
= ctz(xmask
);
294 add_sample(off
, 0, 1, -1, &r
[-1][t
->bx
+ off
]);
297 if (np
< 8 && masks
[1] == 1) {
298 const int off
= t
->by
& (bs(&r
[0][t
->bx
- 1])[1] - 1);
299 add_sample(0, -off
, -1, 1, &r
[-off
][t
->bx
- 1]);
300 } else for (unsigned off
= 0, ymask
= (uint32_t) masks
[1]; np
< 8 && ymask
;) { // left
301 const int tz
= ctz(ymask
);
304 add_sample(0, off
, -1, 1, &r
[off
][t
->bx
- 1]);
307 if (np
< 8 && masks
[1] >> 32) // top/left
308 add_sample(0, 0, -1, -1, &r
[-1][t
->bx
- 1]);
309 if (np
< 8 && masks
[0] >> 32) // top/right
310 add_sample(bw4
, 0, 1, -1, &r
[-1][t
->bx
+ bw4
]);
311 assert(np
> 0 && np
<= 8);
314 // select according to motion vector difference against a threshold
316 const int thresh
= 4 * iclip(imax(bw4
, bh4
), 4, 28);
317 for (int i
= 0; i
< np
; i
++) {
318 mvd
[i
] = abs(pts
[i
][1][0] - pts
[i
][0][0] - mv
.x
) +
319 abs(pts
[i
][1][1] - pts
[i
][0][1] - mv
.y
);
327 } else for (int i
= 0, j
= np
- 1, k
= 0; k
< np
- ret
; k
++, i
++, j
--) {
328 while (mvd
[i
] != -1) i
++;
329 while (mvd
[j
] == -1) j
--;
332 // replace the discarded samples;
334 memcpy(pts
[i
], pts
[j
], sizeof(*pts
));
337 if (!dav1d_find_affine_int(pts
, ret
, bw4
, bh4
, mv
, wmp
, t
->bx
, t
->by
) &&
338 !dav1d_get_shear_params(wmp
))
340 wmp
->type
= DAV1D_WM_TYPE_AFFINE
;
342 wmp
->type
= DAV1D_WM_TYPE_IDENTITY
;
345 static inline int findoddzero(const uint8_t *buf
, int len
) {
346 for (int n
= 0; n
< len
; n
++)
347 if (!buf
[n
* 2]) return 1;
351 // meant to be SIMD'able, so that theoretical complexity of this function
352 // times block size goes from w4*h4 to w4+h4-1
353 // a and b are previous two lines containing (a) top/left entries or (b)
354 // top/left entries, with a[0] being either the first top or first left entry,
355 // depending on top_offset being 1 or 0, and b being the first top/left entry
356 // for whichever has one. left_offset indicates whether the (len-1)th entry
357 // has a left neighbour.
358 // output is order[] and ctx for each member of this diagonal.
359 static void order_palette(const uint8_t *pal_idx
, const ptrdiff_t stride
,
360 const int i
, const int first
, const int last
,
361 uint8_t (*const order
)[8], uint8_t *const ctx
)
363 int have_top
= i
> first
;
366 pal_idx
+= first
+ (i
- first
) * stride
;
367 for (int j
= first
, n
= 0; j
>= last
; have_top
= 1, j
--, n
++, pal_idx
+= stride
- 1) {
368 const int have_left
= j
> 0;
370 assert(have_left
|| have_top
);
372 #define add(v_in) do { \
373 const int v = v_in; \
374 assert((unsigned)v < 8U); \
375 order[n][o_idx++] = v; \
383 add(pal_idx
[-stride
]);
384 } else if (!have_top
) {
388 const int l
= pal_idx
[-1], t
= pal_idx
[-stride
], tl
= pal_idx
[-(stride
+ 1)];
389 const int same_t_l
= t
== l
;
390 const int same_t_tl
= t
== tl
;
391 const int same_l_tl
= l
== tl
;
392 const int same_all
= same_t_l
& same_t_tl
& same_l_tl
;
397 } else if (same_t_l
) {
401 } else if (same_t_tl
| same_l_tl
) {
404 add(same_t_tl
? l
: t
);
412 for (unsigned m
= 1, bit
= 0; m
< 0x100; m
<<= 1, bit
++)
414 order
[n
][o_idx
++] = bit
;
420 static void read_pal_indices(Dav1dTaskContext
*const t
,
421 uint8_t *const pal_idx
,
422 const Av1Block
*const b
, const int pl
,
423 const int w4
, const int h4
,
424 const int bw4
, const int bh4
)
426 Dav1dTileState
*const ts
= t
->ts
;
427 const ptrdiff_t stride
= bw4
* 4;
429 pixel
*const pal_tmp
= t
->scratch
.pal_idx_uv
;
430 pal_tmp
[0] = dav1d_msac_decode_uniform(&ts
->msac
, b
->pal_sz
[pl
]);
431 uint16_t (*const color_map_cdf
)[8] =
432 ts
->cdf
.m
.color_map
[pl
][b
->pal_sz
[pl
] - 2];
433 uint8_t (*const order
)[8] = t
->scratch
.pal_order
;
434 uint8_t *const ctx
= t
->scratch
.pal_ctx
;
435 for (int i
= 1; i
< 4 * (w4
+ h4
) - 1; i
++) {
436 // top/left-to-bottom/right diagonals ("wave-front")
437 const int first
= imin(i
, w4
* 4 - 1);
438 const int last
= imax(0, i
- h4
* 4 + 1);
439 order_palette(pal_tmp
, stride
, i
, first
, last
, order
, ctx
);
440 for (int j
= first
, m
= 0; j
>= last
; j
--, m
++) {
441 const int color_idx
= dav1d_msac_decode_symbol_adapt8(&ts
->msac
,
442 color_map_cdf
[ctx
[m
]], b
->pal_sz
[pl
] - 1);
443 pal_tmp
[(i
- j
) * stride
+ j
] = order
[m
][color_idx
];
447 t
->c
->pal_dsp
.pal_idx_finish(pal_idx
, pal_tmp
, bw4
* 4, bh4
* 4,
451 static void read_vartx_tree(Dav1dTaskContext
*const t
,
452 Av1Block
*const b
, const enum BlockSize bs
,
453 const int bx4
, const int by4
)
455 const Dav1dFrameContext
*const f
= t
->f
;
456 const uint8_t *const b_dim
= dav1d_block_dimensions
[bs
];
457 const int bw4
= b_dim
[0], bh4
= b_dim
[1];
459 // var-tx tree coding
460 uint16_t tx_split
[2] = { 0 };
461 b
->max_ytx
= dav1d_max_txfm_size_for_bs
[bs
][0];
462 if (!b
->skip
&& (f
->frame_hdr
->segmentation
.lossless
[b
->seg_id
] ||
463 b
->max_ytx
== TX_4X4
))
465 b
->max_ytx
= b
->uvtx
= TX_4X4
;
466 if (f
->frame_hdr
->txfm_mode
== DAV1D_TX_SWITCHABLE
) {
467 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
468 rep_macro(type, t->dir tx, off, TX_4X4)
469 case_set(bh4
, l
., 1, by4
);
470 case_set(bw4
, a
->, 0, bx4
);
473 } else if (f
->frame_hdr
->txfm_mode
!= DAV1D_TX_SWITCHABLE
|| b
->skip
) {
474 if (f
->frame_hdr
->txfm_mode
== DAV1D_TX_SWITCHABLE
) {
475 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
476 rep_macro(type, t->dir tx, off, mul * b_dim[2 + diridx])
477 case_set(bh4
, l
., 1, by4
);
478 case_set(bw4
, a
->, 0, bx4
);
481 b
->uvtx
= dav1d_max_txfm_size_for_bs
[bs
][f
->cur
.p
.layout
];
483 assert(bw4
<= 16 || bh4
<= 16 || b
->max_ytx
== TX_64X64
);
484 int y
, x
, y_off
, x_off
;
485 const TxfmInfo
*const ytx
= &dav1d_txfm_dimensions
[b
->max_ytx
];
486 for (y
= 0, y_off
= 0; y
< bh4
; y
+= ytx
->h
, y_off
++) {
487 for (x
= 0, x_off
= 0; x
< bw4
; x
+= ytx
->w
, x_off
++) {
488 read_tx_tree(t
, b
->max_ytx
, 0, tx_split
, x_off
, y_off
);
489 // contexts are updated inside read_tx_tree()
496 if (DEBUG_BLOCK_INFO
)
497 printf("Post-vartxtree[%x/%x]: r=%d\n",
498 tx_split
[0], tx_split
[1], t
->ts
->msac
.rng
);
499 b
->uvtx
= dav1d_max_txfm_size_for_bs
[bs
][f
->cur
.p
.layout
];
501 assert(!(tx_split
[0] & ~0x33));
502 b
->tx_split0
= (uint8_t)tx_split
[0];
503 b
->tx_split1
= tx_split
[1];
506 static inline unsigned get_prev_frame_segid(const Dav1dFrameContext
*const f
,
507 const int by
, const int bx
,
508 const int w4
, int h4
,
509 const uint8_t *ref_seg_map
,
510 const ptrdiff_t stride
)
512 assert(f
->frame_hdr
->primary_ref_frame
!= DAV1D_PRIMARY_REF_NONE
);
515 ref_seg_map
+= by
* stride
+ bx
;
517 for (int x
= 0; x
< w4
; x
++)
518 seg_id
= imin(seg_id
, ref_seg_map
[x
]);
519 ref_seg_map
+= stride
;
520 } while (--h4
> 0 && seg_id
);
526 static inline void splat_oneref_mv(const Dav1dContext
*const c
,
527 Dav1dTaskContext
*const t
,
528 const enum BlockSize bs
,
529 const Av1Block
*const b
,
530 const int bw4
, const int bh4
)
532 const enum InterPredMode mode
= b
->inter_mode
;
533 const refmvs_block
ALIGN(tmpl
, 16) = (refmvs_block
) {
534 .ref
.ref
= { b
->ref
[0] + 1, b
->interintra_type
? 0 : -1 },
535 .mv
.mv
[0] = b
->mv
[0],
537 .mf
= (mode
== GLOBALMV
&& imin(bw4
, bh4
) >= 2) | ((mode
== NEWMV
) * 2),
539 c
->refmvs_dsp
.splat_mv(&t
->rt
.r
[(t
->by
& 31) + 5], &tmpl
, t
->bx
, bw4
, bh4
);
542 static inline void splat_intrabc_mv(const Dav1dContext
*const c
,
543 Dav1dTaskContext
*const t
,
544 const enum BlockSize bs
,
545 const Av1Block
*const b
,
546 const int bw4
, const int bh4
)
548 const refmvs_block
ALIGN(tmpl
, 16) = (refmvs_block
) {
549 .ref
.ref
= { 0, -1 },
550 .mv
.mv
[0] = b
->mv
[0],
554 c
->refmvs_dsp
.splat_mv(&t
->rt
.r
[(t
->by
& 31) + 5], &tmpl
, t
->bx
, bw4
, bh4
);
557 static inline void splat_tworef_mv(const Dav1dContext
*const c
,
558 Dav1dTaskContext
*const t
,
559 const enum BlockSize bs
,
560 const Av1Block
*const b
,
561 const int bw4
, const int bh4
)
563 assert(bw4
>= 2 && bh4
>= 2);
564 const enum CompInterPredMode mode
= b
->inter_mode
;
565 const refmvs_block
ALIGN(tmpl
, 16) = (refmvs_block
) {
566 .ref
.ref
= { b
->ref
[0] + 1, b
->ref
[1] + 1 },
567 .mv
.mv
= { b
->mv
[0], b
->mv
[1] },
569 .mf
= (mode
== GLOBALMV_GLOBALMV
) | !!((1 << mode
) & (0xbc)) * 2,
571 c
->refmvs_dsp
.splat_mv(&t
->rt
.r
[(t
->by
& 31) + 5], &tmpl
, t
->bx
, bw4
, bh4
);
574 static inline void splat_intraref(const Dav1dContext
*const c
,
575 Dav1dTaskContext
*const t
,
576 const enum BlockSize bs
,
577 const int bw4
, const int bh4
)
579 const refmvs_block
ALIGN(tmpl
, 16) = (refmvs_block
) {
580 .ref
.ref
= { 0, -1 },
581 .mv
.mv
[0].n
= INVALID_MV
,
585 c
->refmvs_dsp
.splat_mv(&t
->rt
.r
[(t
->by
& 31) + 5], &tmpl
, t
->bx
, bw4
, bh4
);
588 static void mc_lowest_px(int *const dst
, const int by4
, const int bh4
,
589 const int mvy
, const int ss_ver
,
590 const struct ScalableMotionParams
*const smp
)
592 const int v_mul
= 4 >> ss_ver
;
594 const int my
= mvy
>> (3 + ss_ver
), dy
= mvy
& (15 >> !ss_ver
);
595 *dst
= imax(*dst
, (by4
+ bh4
) * v_mul
+ my
+ 4 * !!dy
);
597 int y
= (by4
* v_mul
<< 4) + mvy
* (1 << !ss_ver
);
598 const int64_t tmp
= (int64_t)(y
) * smp
->scale
+ (smp
->scale
- 0x4000) * 8;
599 y
= apply_sign64((int)((llabs(tmp
) + 128) >> 8), tmp
) + 32;
600 const int bottom
= ((y
+ (bh4
* v_mul
- 1) * smp
->step
) >> 10) + 1 + 4;
601 *dst
= imax(*dst
, bottom
);
605 static ALWAYS_INLINE
void affine_lowest_px(Dav1dTaskContext
*const t
, int *const dst
,
606 const uint8_t *const b_dim
,
607 const Dav1dWarpedMotionParams
*const wmp
,
608 const int ss_ver
, const int ss_hor
)
610 const int h_mul
= 4 >> ss_hor
, v_mul
= 4 >> ss_ver
;
611 assert(!((b_dim
[0] * h_mul
) & 7) && !((b_dim
[1] * v_mul
) & 7));
612 const int32_t *const mat
= wmp
->matrix
;
613 const int y
= b_dim
[1] * v_mul
- 8; // lowest y
615 const int src_y
= t
->by
* 4 + ((y
+ 4) << ss_ver
);
616 const int64_t mat5_y
= (int64_t) mat
[5] * src_y
+ mat
[1];
617 // check left- and right-most blocks
618 for (int x
= 0; x
< b_dim
[0] * h_mul
; x
+= imax(8, b_dim
[0] * h_mul
- 8)) {
619 // calculate transformation relative to center of 8x8 block in
621 const int src_x
= t
->bx
* 4 + ((x
+ 4) << ss_hor
);
622 const int64_t mvy
= ((int64_t) mat
[4] * src_x
+ mat5_y
) >> ss_ver
;
623 const int dy
= (int) (mvy
>> 16) - 4;
624 *dst
= imax(*dst
, dy
+ 4 + 8);
628 static NOINLINE
void affine_lowest_px_luma(Dav1dTaskContext
*const t
, int *const dst
,
629 const uint8_t *const b_dim
,
630 const Dav1dWarpedMotionParams
*const wmp
)
632 affine_lowest_px(t
, dst
, b_dim
, wmp
, 0, 0);
635 static NOINLINE
void affine_lowest_px_chroma(Dav1dTaskContext
*const t
, int *const dst
,
636 const uint8_t *const b_dim
,
637 const Dav1dWarpedMotionParams
*const wmp
)
639 const Dav1dFrameContext
*const f
= t
->f
;
640 assert(f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I400
);
641 if (f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I444
)
642 affine_lowest_px_luma(t
, dst
, b_dim
, wmp
);
644 affine_lowest_px(t
, dst
, b_dim
, wmp
, f
->cur
.p
.layout
& DAV1D_PIXEL_LAYOUT_I420
, 1);
647 static void obmc_lowest_px(Dav1dTaskContext
*const t
,
648 int (*const dst
)[2], const int is_chroma
,
649 const uint8_t *const b_dim
,
650 const int bx4
, const int by4
, const int w4
, const int h4
)
652 assert(!(t
->bx
& 1) && !(t
->by
& 1));
653 const Dav1dFrameContext
*const f
= t
->f
;
654 /*const*/ refmvs_block
**r
= &t
->rt
.r
[(t
->by
& 31) + 5];
655 const int ss_ver
= is_chroma
&& f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I420
;
656 const int ss_hor
= is_chroma
&& f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I444
;
657 const int h_mul
= 4 >> ss_hor
, v_mul
= 4 >> ss_ver
;
659 if (t
->by
> t
->ts
->tiling
.row_start
&&
660 (!is_chroma
|| b_dim
[0] * h_mul
+ b_dim
[1] * v_mul
>= 16))
662 for (int i
= 0, x
= 0; x
< w4
&& i
< imin(b_dim
[2], 4); ) {
663 // only odd blocks are considered for overlap handling, hence +1
664 const refmvs_block
*const a_r
= &r
[-1][t
->bx
+ x
+ 1];
665 const uint8_t *const a_b_dim
= dav1d_block_dimensions
[a_r
->bs
];
667 if (a_r
->ref
.ref
[0] > 0) {
668 const int oh4
= imin(b_dim
[1], 16) >> 1;
669 mc_lowest_px(&dst
[a_r
->ref
.ref
[0] - 1][is_chroma
], t
->by
,
670 (oh4
* 3 + 3) >> 2, a_r
->mv
.mv
[0].y
, ss_ver
,
671 &f
->svc
[a_r
->ref
.ref
[0] - 1][1]);
674 x
+= imax(a_b_dim
[0], 2);
678 if (t
->bx
> t
->ts
->tiling
.col_start
)
679 for (int i
= 0, y
= 0; y
< h4
&& i
< imin(b_dim
[3], 4); ) {
680 // only odd blocks are considered for overlap handling, hence +1
681 const refmvs_block
*const l_r
= &r
[y
+ 1][t
->bx
- 1];
682 const uint8_t *const l_b_dim
= dav1d_block_dimensions
[l_r
->bs
];
684 if (l_r
->ref
.ref
[0] > 0) {
685 const int oh4
= iclip(l_b_dim
[1], 2, b_dim
[1]);
686 mc_lowest_px(&dst
[l_r
->ref
.ref
[0] - 1][is_chroma
],
687 t
->by
+ y
, oh4
, l_r
->mv
.mv
[0].y
, ss_ver
,
688 &f
->svc
[l_r
->ref
.ref
[0] - 1][1]);
691 y
+= imax(l_b_dim
[1], 2);
695 static int decode_b(Dav1dTaskContext
*const t
,
696 const enum BlockLevel bl
,
697 const enum BlockSize bs
,
698 const enum BlockPartition bp
,
699 const enum EdgeFlags intra_edge_flags
)
701 Dav1dTileState
*const ts
= t
->ts
;
702 const Dav1dFrameContext
*const f
= t
->f
;
703 Av1Block b_mem
, *const b
= t
->frame_thread
.pass
?
704 &f
->frame_thread
.b
[t
->by
* f
->b4_stride
+ t
->bx
] : &b_mem
;
705 const uint8_t *const b_dim
= dav1d_block_dimensions
[bs
];
706 const int bx4
= t
->bx
& 31, by4
= t
->by
& 31;
707 const int ss_ver
= f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I420
;
708 const int ss_hor
= f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I444
;
709 const int cbx4
= bx4
>> ss_hor
, cby4
= by4
>> ss_ver
;
710 const int bw4
= b_dim
[0], bh4
= b_dim
[1];
711 const int w4
= imin(bw4
, f
->bw
- t
->bx
), h4
= imin(bh4
, f
->bh
- t
->by
);
712 const int cbw4
= (bw4
+ ss_hor
) >> ss_hor
, cbh4
= (bh4
+ ss_ver
) >> ss_ver
;
713 const int have_left
= t
->bx
> ts
->tiling
.col_start
;
714 const int have_top
= t
->by
> ts
->tiling
.row_start
;
715 const int has_chroma
= f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I400
&&
716 (bw4
> ss_hor
|| t
->bx
& 1) &&
717 (bh4
> ss_ver
|| t
->by
& 1);
719 if (t
->frame_thread
.pass
== 2) {
721 f
->bd_fn
.recon_b_intra(t
, bs
, intra_edge_flags
, b
);
723 const enum IntraPredMode y_mode_nofilt
=
724 b
->y_mode
== FILTER_PRED
? DC_PRED
: b
->y_mode
;
725 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
726 rep_macro(type, t->dir mode, off, mul * y_mode_nofilt); \
727 rep_macro(type, t->dir intra, off, mul)
728 case_set(bh4
, l
., 1, by4
);
729 case_set(bw4
, a
->, 0, bx4
);
731 if (IS_INTER_OR_SWITCH(f
->frame_hdr
)) {
732 refmvs_block
*const r
= &t
->rt
.r
[(t
->by
& 31) + 5 + bh4
- 1][t
->bx
];
733 for (int x
= 0; x
< bw4
; x
++) {
737 refmvs_block
*const *rr
= &t
->rt
.r
[(t
->by
& 31) + 5];
738 for (int y
= 0; y
< bh4
- 1; y
++) {
739 rr
[y
][t
->bx
+ bw4
- 1].ref
.ref
[0] = 0;
740 rr
[y
][t
->bx
+ bw4
- 1].bs
= bs
;
745 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
746 rep_macro(type, t->dir uvmode, off, mul * b->uv_mode)
747 case_set(cbh4
, l
., 1, cby4
);
748 case_set(cbw4
, a
->, 0, cbx4
);
752 if (IS_INTER_OR_SWITCH(f
->frame_hdr
) /* not intrabc */ &&
753 b
->comp_type
== COMP_INTER_NONE
&& b
->motion_mode
== MM_WARP
)
755 if (b
->matrix
[0] == SHRT_MIN
) {
756 t
->warpmv
.type
= DAV1D_WM_TYPE_IDENTITY
;
758 t
->warpmv
.type
= DAV1D_WM_TYPE_AFFINE
;
759 t
->warpmv
.matrix
[2] = b
->matrix
[0] + 0x10000;
760 t
->warpmv
.matrix
[3] = b
->matrix
[1];
761 t
->warpmv
.matrix
[4] = b
->matrix
[2];
762 t
->warpmv
.matrix
[5] = b
->matrix
[3] + 0x10000;
763 dav1d_set_affine_mv2d(bw4
, bh4
, b
->mv2d
, &t
->warpmv
,
765 dav1d_get_shear_params(&t
->warpmv
);
766 #define signabs(v) v < 0 ? '-' : ' ', abs(v)
767 if (DEBUG_BLOCK_INFO
)
768 printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n"
769 "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, mv=y:%d,x:%d\n",
770 signabs(t
->warpmv
.matrix
[0]),
771 signabs(t
->warpmv
.matrix
[1]),
772 signabs(t
->warpmv
.matrix
[2]),
773 signabs(t
->warpmv
.matrix
[3]),
774 signabs(t
->warpmv
.matrix
[4]),
775 signabs(t
->warpmv
.matrix
[5]),
776 signabs(t
->warpmv
.u
.p
.alpha
),
777 signabs(t
->warpmv
.u
.p
.beta
),
778 signabs(t
->warpmv
.u
.p
.gamma
),
779 signabs(t
->warpmv
.u
.p
.delta
),
780 b
->mv2d
.y
, b
->mv2d
.x
);
784 if (f
->bd_fn
.recon_b_inter(t
, bs
, b
)) return -1;
786 const uint8_t *const filter
= dav1d_filter_dir
[b
->filter2d
];
787 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
788 rep_macro(type, t->dir filter[0], off, mul * filter[0]); \
789 rep_macro(type, t->dir filter[1], off, mul * filter[1]); \
790 rep_macro(type, t->dir intra, off, 0)
791 case_set(bh4
, l
., 1, by4
);
792 case_set(bw4
, a
->, 0, bx4
);
795 if (IS_INTER_OR_SWITCH(f
->frame_hdr
)) {
796 refmvs_block
*const r
= &t
->rt
.r
[(t
->by
& 31) + 5 + bh4
- 1][t
->bx
];
797 for (int x
= 0; x
< bw4
; x
++) {
798 r
[x
].ref
.ref
[0] = b
->ref
[0] + 1;
799 r
[x
].mv
.mv
[0] = b
->mv
[0];
802 refmvs_block
*const *rr
= &t
->rt
.r
[(t
->by
& 31) + 5];
803 for (int y
= 0; y
< bh4
- 1; y
++) {
804 rr
[y
][t
->bx
+ bw4
- 1].ref
.ref
[0] = b
->ref
[0] + 1;
805 rr
[y
][t
->bx
+ bw4
- 1].mv
.mv
[0] = b
->mv
[0];
806 rr
[y
][t
->bx
+ bw4
- 1].bs
= bs
;
811 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
812 rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
813 case_set(cbh4
, l
., 1, cby4
);
814 case_set(cbw4
, a
->, 0, cbx4
);
821 const int cw4
= (w4
+ ss_hor
) >> ss_hor
, ch4
= (h4
+ ss_ver
) >> ss_ver
;
827 const Dav1dSegmentationData
*seg
= NULL
;
829 // segment_id (if seg_feature for skip/ref/gmv is enabled)
831 if (f
->frame_hdr
->segmentation
.enabled
) {
832 if (!f
->frame_hdr
->segmentation
.update_map
) {
833 if (f
->prev_segmap
) {
834 unsigned seg_id
= get_prev_frame_segid(f
, t
->by
, t
->bx
, w4
, h4
,
837 if (seg_id
>= 8) return -1;
842 seg
= &f
->frame_hdr
->segmentation
.seg_data
.d
[b
->seg_id
];
843 } else if (f
->frame_hdr
->segmentation
.seg_data
.preskip
) {
844 if (f
->frame_hdr
->segmentation
.temporal
&&
845 (seg_pred
= dav1d_msac_decode_bool_adapt(&ts
->msac
,
846 ts
->cdf
.m
.seg_pred
[t
->a
->seg_pred
[bx4
] +
847 t
->l
.seg_pred
[by4
]])))
849 // temporal predicted seg_id
850 if (f
->prev_segmap
) {
851 unsigned seg_id
= get_prev_frame_segid(f
, t
->by
, t
->bx
,
855 if (seg_id
>= 8) return -1;
862 const unsigned pred_seg_id
=
863 get_cur_frame_segid(t
->by
, t
->bx
, have_top
, have_left
,
864 &seg_ctx
, f
->cur_segmap
, f
->b4_stride
);
865 const unsigned diff
= dav1d_msac_decode_symbol_adapt8(&ts
->msac
,
866 ts
->cdf
.m
.seg_id
[seg_ctx
],
867 DAV1D_MAX_SEGMENTS
- 1);
868 const unsigned last_active_seg_id
=
869 f
->frame_hdr
->segmentation
.seg_data
.last_active_segid
;
870 b
->seg_id
= neg_deinterleave(diff
, pred_seg_id
,
871 last_active_seg_id
+ 1);
872 if (b
->seg_id
> last_active_seg_id
) b
->seg_id
= 0; // error?
873 if (b
->seg_id
>= DAV1D_MAX_SEGMENTS
) b
->seg_id
= 0; // error?
876 if (DEBUG_BLOCK_INFO
)
877 printf("Post-segid[preskip;%d]: r=%d\n",
878 b
->seg_id
, ts
->msac
.rng
);
880 seg
= &f
->frame_hdr
->segmentation
.seg_data
.d
[b
->seg_id
];
887 if ((!seg
|| (!seg
->globalmv
&& seg
->ref
== -1 && !seg
->skip
)) &&
888 f
->frame_hdr
->skip_mode_enabled
&& imin(bw4
, bh4
) > 1)
890 const int smctx
= t
->a
->skip_mode
[bx4
] + t
->l
.skip_mode
[by4
];
891 b
->skip_mode
= dav1d_msac_decode_bool_adapt(&ts
->msac
,
892 ts
->cdf
.m
.skip_mode
[smctx
]);
893 if (DEBUG_BLOCK_INFO
)
894 printf("Post-skipmode[%d]: r=%d\n", b
->skip_mode
, ts
->msac
.rng
);
900 if (b
->skip_mode
|| (seg
&& seg
->skip
)) {
903 const int sctx
= t
->a
->skip
[bx4
] + t
->l
.skip
[by4
];
904 b
->skip
= dav1d_msac_decode_bool_adapt(&ts
->msac
, ts
->cdf
.m
.skip
[sctx
]);
905 if (DEBUG_BLOCK_INFO
)
906 printf("Post-skip[%d]: r=%d\n", b
->skip
, ts
->msac
.rng
);
910 if (f
->frame_hdr
->segmentation
.enabled
&&
911 f
->frame_hdr
->segmentation
.update_map
&&
912 !f
->frame_hdr
->segmentation
.seg_data
.preskip
)
914 if (!b
->skip
&& f
->frame_hdr
->segmentation
.temporal
&&
915 (seg_pred
= dav1d_msac_decode_bool_adapt(&ts
->msac
,
916 ts
->cdf
.m
.seg_pred
[t
->a
->seg_pred
[bx4
] +
917 t
->l
.seg_pred
[by4
]])))
919 // temporal predicted seg_id
920 if (f
->prev_segmap
) {
921 unsigned seg_id
= get_prev_frame_segid(f
, t
->by
, t
->bx
, w4
, h4
,
924 if (seg_id
>= 8) return -1;
931 const unsigned pred_seg_id
=
932 get_cur_frame_segid(t
->by
, t
->bx
, have_top
, have_left
,
933 &seg_ctx
, f
->cur_segmap
, f
->b4_stride
);
935 b
->seg_id
= pred_seg_id
;
937 const unsigned diff
= dav1d_msac_decode_symbol_adapt8(&ts
->msac
,
938 ts
->cdf
.m
.seg_id
[seg_ctx
],
939 DAV1D_MAX_SEGMENTS
- 1);
940 const unsigned last_active_seg_id
=
941 f
->frame_hdr
->segmentation
.seg_data
.last_active_segid
;
942 b
->seg_id
= neg_deinterleave(diff
, pred_seg_id
,
943 last_active_seg_id
+ 1);
944 if (b
->seg_id
> last_active_seg_id
) b
->seg_id
= 0; // error?
946 if (b
->seg_id
>= DAV1D_MAX_SEGMENTS
) b
->seg_id
= 0; // error?
949 seg
= &f
->frame_hdr
->segmentation
.seg_data
.d
[b
->seg_id
];
951 if (DEBUG_BLOCK_INFO
)
952 printf("Post-segid[postskip;%d]: r=%d\n",
953 b
->seg_id
, ts
->msac
.rng
);
958 const int idx
= f
->seq_hdr
->sb128
? ((t
->bx
& 16) >> 4) +
959 ((t
->by
& 16) >> 3) : 0;
960 if (t
->cur_sb_cdef_idx_ptr
[idx
] == -1) {
961 const int v
= dav1d_msac_decode_bools(&ts
->msac
,
962 f
->frame_hdr
->cdef
.n_bits
);
963 t
->cur_sb_cdef_idx_ptr
[idx
] = v
;
964 if (bw4
> 16) t
->cur_sb_cdef_idx_ptr
[idx
+ 1] = v
;
965 if (bh4
> 16) t
->cur_sb_cdef_idx_ptr
[idx
+ 2] = v
;
966 if (bw4
== 32 && bh4
== 32) t
->cur_sb_cdef_idx_ptr
[idx
+ 3] = v
;
968 if (DEBUG_BLOCK_INFO
)
969 printf("Post-cdef_idx[%d]: r=%d\n",
970 *t
->cur_sb_cdef_idx_ptr
, ts
->msac
.rng
);
975 if (!(t
->bx
& (31 >> !f
->seq_hdr
->sb128
)) &&
976 !(t
->by
& (31 >> !f
->seq_hdr
->sb128
)))
978 const int prev_qidx
= ts
->last_qidx
;
979 const int have_delta_q
= f
->frame_hdr
->delta
.q
.present
&&
980 (bs
!= (f
->seq_hdr
->sb128
? BS_128x128
: BS_64x64
) || !b
->skip
);
982 uint32_t prev_delta_lf
= ts
->last_delta_lf
.u32
;
985 int delta_q
= dav1d_msac_decode_symbol_adapt4(&ts
->msac
,
986 ts
->cdf
.m
.delta_q
, 3);
988 const int n_bits
= 1 + dav1d_msac_decode_bools(&ts
->msac
, 3);
989 delta_q
= dav1d_msac_decode_bools(&ts
->msac
, n_bits
) +
993 if (dav1d_msac_decode_bool_equi(&ts
->msac
)) delta_q
= -delta_q
;
994 delta_q
*= 1 << f
->frame_hdr
->delta
.q
.res_log2
;
996 ts
->last_qidx
= iclip(ts
->last_qidx
+ delta_q
, 1, 255);
997 if (have_delta_q
&& DEBUG_BLOCK_INFO
)
998 printf("Post-delta_q[%d->%d]: r=%d\n",
999 delta_q
, ts
->last_qidx
, ts
->msac
.rng
);
1001 if (f
->frame_hdr
->delta
.lf
.present
) {
1002 const int n_lfs
= f
->frame_hdr
->delta
.lf
.multi
?
1003 f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I400
? 4 : 2 : 1;
1005 for (int i
= 0; i
< n_lfs
; i
++) {
1006 int delta_lf
= dav1d_msac_decode_symbol_adapt4(&ts
->msac
,
1007 ts
->cdf
.m
.delta_lf
[i
+ f
->frame_hdr
->delta
.lf
.multi
], 3);
1008 if (delta_lf
== 3) {
1009 const int n_bits
= 1 + dav1d_msac_decode_bools(&ts
->msac
, 3);
1010 delta_lf
= dav1d_msac_decode_bools(&ts
->msac
, n_bits
) +
1014 if (dav1d_msac_decode_bool_equi(&ts
->msac
))
1015 delta_lf
= -delta_lf
;
1016 delta_lf
*= 1 << f
->frame_hdr
->delta
.lf
.res_log2
;
1018 ts
->last_delta_lf
.i8
[i
] =
1019 iclip(ts
->last_delta_lf
.i8
[i
] + delta_lf
, -63, 63);
1020 if (have_delta_q
&& DEBUG_BLOCK_INFO
)
1021 printf("Post-delta_lf[%d:%d]: r=%d\n", i
, delta_lf
,
1026 if (ts
->last_qidx
== f
->frame_hdr
->quant
.yac
) {
1027 // assign frame-wide q values to this sb
1029 } else if (ts
->last_qidx
!= prev_qidx
) {
1030 // find sb-specific quant parameters
1031 init_quant_tables(f
->seq_hdr
, f
->frame_hdr
, ts
->last_qidx
, ts
->dqmem
);
1034 if (!ts
->last_delta_lf
.u32
) {
1035 // assign frame-wide lf values to this sb
1036 ts
->lflvl
= f
->lf
.lvl
;
1037 } else if (ts
->last_delta_lf
.u32
!= prev_delta_lf
) {
1038 // find sb-specific lf lvl parameters
1039 ts
->lflvl
= ts
->lflvlmem
;
1040 dav1d_calc_lf_values(ts
->lflvlmem
, f
->frame_hdr
, ts
->last_delta_lf
.i8
);
1046 } else if (IS_INTER_OR_SWITCH(f
->frame_hdr
)) {
1047 if (seg
&& (seg
->ref
>= 0 || seg
->globalmv
)) {
1048 b
->intra
= !seg
->ref
;
1050 const int ictx
= get_intra_ctx(t
->a
, &t
->l
, by4
, bx4
,
1051 have_top
, have_left
);
1052 b
->intra
= !dav1d_msac_decode_bool_adapt(&ts
->msac
,
1053 ts
->cdf
.m
.intra
[ictx
]);
1054 if (DEBUG_BLOCK_INFO
)
1055 printf("Post-intra[%d]: r=%d\n", b
->intra
, ts
->msac
.rng
);
1057 } else if (f
->frame_hdr
->allow_intrabc
) {
1058 b
->intra
= !dav1d_msac_decode_bool_adapt(&ts
->msac
, ts
->cdf
.m
.intrabc
);
1059 if (DEBUG_BLOCK_INFO
)
1060 printf("Post-intrabcflag[%d]: r=%d\n", b
->intra
, ts
->msac
.rng
);
1065 // intra/inter-specific stuff
1067 uint16_t *const ymode_cdf
= IS_INTER_OR_SWITCH(f
->frame_hdr
) ?
1068 ts
->cdf
.m
.y_mode
[dav1d_ymode_size_context
[bs
]] :
1069 ts
->cdf
.kfym
[dav1d_intra_mode_context
[t
->a
->mode
[bx4
]]]
1070 [dav1d_intra_mode_context
[t
->l
.mode
[by4
]]];
1071 b
->y_mode
= dav1d_msac_decode_symbol_adapt16(&ts
->msac
, ymode_cdf
,
1072 N_INTRA_PRED_MODES
- 1);
1073 if (DEBUG_BLOCK_INFO
)
1074 printf("Post-ymode[%d]: r=%d\n", b
->y_mode
, ts
->msac
.rng
);
1077 if (b_dim
[2] + b_dim
[3] >= 2 && b
->y_mode
>= VERT_PRED
&&
1078 b
->y_mode
<= VERT_LEFT_PRED
)
1080 uint16_t *const acdf
= ts
->cdf
.m
.angle_delta
[b
->y_mode
- VERT_PRED
];
1081 const int angle
= dav1d_msac_decode_symbol_adapt8(&ts
->msac
, acdf
, 6);
1082 b
->y_angle
= angle
- 3;
1088 const int cfl_allowed
= f
->frame_hdr
->segmentation
.lossless
[b
->seg_id
] ?
1089 cbw4
== 1 && cbh4
== 1 : !!(cfl_allowed_mask
& (1 << bs
));
1090 uint16_t *const uvmode_cdf
= ts
->cdf
.m
.uv_mode
[cfl_allowed
][b
->y_mode
];
1091 b
->uv_mode
= dav1d_msac_decode_symbol_adapt16(&ts
->msac
, uvmode_cdf
,
1092 N_UV_INTRA_PRED_MODES
- 1 - !cfl_allowed
);
1093 if (DEBUG_BLOCK_INFO
)
1094 printf("Post-uvmode[%d]: r=%d\n", b
->uv_mode
, ts
->msac
.rng
);
1097 if (b
->uv_mode
== CFL_PRED
) {
1098 #define SIGN(a) (!!(a) + ((a) > 0))
1099 const int sign
= dav1d_msac_decode_symbol_adapt8(&ts
->msac
,
1100 ts
->cdf
.m
.cfl_sign
, 7) + 1;
1101 const int sign_u
= sign
* 0x56 >> 8, sign_v
= sign
- sign_u
* 3;
1102 assert(sign_u
== sign
/ 3);
1104 const int ctx
= (sign_u
== 2) * 3 + sign_v
;
1105 b
->cfl_alpha
[0] = dav1d_msac_decode_symbol_adapt16(&ts
->msac
,
1106 ts
->cdf
.m
.cfl_alpha
[ctx
], 15) + 1;
1107 if (sign_u
== 1) b
->cfl_alpha
[0] = -b
->cfl_alpha
[0];
1109 b
->cfl_alpha
[0] = 0;
1112 const int ctx
= (sign_v
== 2) * 3 + sign_u
;
1113 b
->cfl_alpha
[1] = dav1d_msac_decode_symbol_adapt16(&ts
->msac
,
1114 ts
->cdf
.m
.cfl_alpha
[ctx
], 15) + 1;
1115 if (sign_v
== 1) b
->cfl_alpha
[1] = -b
->cfl_alpha
[1];
1117 b
->cfl_alpha
[1] = 0;
1120 if (DEBUG_BLOCK_INFO
)
1121 printf("Post-uvalphas[%d/%d]: r=%d\n",
1122 b
->cfl_alpha
[0], b
->cfl_alpha
[1], ts
->msac
.rng
);
1123 } else if (b_dim
[2] + b_dim
[3] >= 2 && b
->uv_mode
>= VERT_PRED
&&
1124 b
->uv_mode
<= VERT_LEFT_PRED
)
1126 uint16_t *const acdf
= ts
->cdf
.m
.angle_delta
[b
->uv_mode
- VERT_PRED
];
1127 const int angle
= dav1d_msac_decode_symbol_adapt8(&ts
->msac
, acdf
, 6);
1128 b
->uv_angle
= angle
- 3;
1132 b
->pal_sz
[0] = b
->pal_sz
[1] = 0;
1133 if (f
->frame_hdr
->allow_screen_content_tools
&&
1134 imax(bw4
, bh4
) <= 16 && bw4
+ bh4
>= 4)
1136 const int sz_ctx
= b_dim
[2] + b_dim
[3] - 2;
1137 if (b
->y_mode
== DC_PRED
) {
1138 const int pal_ctx
= (t
->a
->pal_sz
[bx4
] > 0) + (t
->l
.pal_sz
[by4
] > 0);
1139 const int use_y_pal
= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1140 ts
->cdf
.m
.pal_y
[sz_ctx
][pal_ctx
]);
1141 if (DEBUG_BLOCK_INFO
)
1142 printf("Post-y_pal[%d]: r=%d\n", use_y_pal
, ts
->msac
.rng
);
1144 f
->bd_fn
.read_pal_plane(t
, b
, 0, sz_ctx
, bx4
, by4
);
1147 if (has_chroma
&& b
->uv_mode
== DC_PRED
) {
1148 const int pal_ctx
= b
->pal_sz
[0] > 0;
1149 const int use_uv_pal
= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1150 ts
->cdf
.m
.pal_uv
[pal_ctx
]);
1151 if (DEBUG_BLOCK_INFO
)
1152 printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal
, ts
->msac
.rng
);
1153 if (use_uv_pal
) // see aomedia bug 2183 for why we use luma coordinates
1154 f
->bd_fn
.read_pal_uv(t
, b
, sz_ctx
, bx4
, by4
);
1158 if (b
->y_mode
== DC_PRED
&& !b
->pal_sz
[0] &&
1159 imax(b_dim
[2], b_dim
[3]) <= 3 && f
->seq_hdr
->filter_intra
)
1161 const int is_filter
= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1162 ts
->cdf
.m
.use_filter_intra
[bs
]);
1164 b
->y_mode
= FILTER_PRED
;
1165 b
->y_angle
= dav1d_msac_decode_symbol_adapt4(&ts
->msac
,
1166 ts
->cdf
.m
.filter_intra
, 4);
1168 if (DEBUG_BLOCK_INFO
)
1169 printf("Post-filterintramode[%d/%d]: r=%d\n",
1170 b
->y_mode
, b
->y_angle
, ts
->msac
.rng
);
1175 if (t
->frame_thread
.pass
) {
1176 const int p
= t
->frame_thread
.pass
& 1;
1177 assert(ts
->frame_thread
[p
].pal_idx
);
1178 pal_idx
= ts
->frame_thread
[p
].pal_idx
;
1179 ts
->frame_thread
[p
].pal_idx
+= bw4
* bh4
* 8;
1181 pal_idx
= t
->scratch
.pal_idx_y
;
1182 read_pal_indices(t
, pal_idx
, b
, 0, w4
, h4
, bw4
, bh4
);
1183 if (DEBUG_BLOCK_INFO
)
1184 printf("Post-y-pal-indices: r=%d\n", ts
->msac
.rng
);
1187 if (has_chroma
&& b
->pal_sz
[1]) {
1189 if (t
->frame_thread
.pass
) {
1190 const int p
= t
->frame_thread
.pass
& 1;
1191 assert(ts
->frame_thread
[p
].pal_idx
);
1192 pal_idx
= ts
->frame_thread
[p
].pal_idx
;
1193 ts
->frame_thread
[p
].pal_idx
+= cbw4
* cbh4
* 8;
1195 pal_idx
= t
->scratch
.pal_idx_uv
;
1196 read_pal_indices(t
, pal_idx
, b
, 1, cw4
, ch4
, cbw4
, cbh4
);
1197 if (DEBUG_BLOCK_INFO
)
1198 printf("Post-uv-pal-indices: r=%d\n", ts
->msac
.rng
);
1201 const TxfmInfo
*t_dim
;
1202 if (f
->frame_hdr
->segmentation
.lossless
[b
->seg_id
]) {
1203 b
->tx
= b
->uvtx
= (int) TX_4X4
;
1204 t_dim
= &dav1d_txfm_dimensions
[TX_4X4
];
1206 b
->tx
= dav1d_max_txfm_size_for_bs
[bs
][0];
1207 b
->uvtx
= dav1d_max_txfm_size_for_bs
[bs
][f
->cur
.p
.layout
];
1208 t_dim
= &dav1d_txfm_dimensions
[b
->tx
];
1209 if (f
->frame_hdr
->txfm_mode
== DAV1D_TX_SWITCHABLE
&& t_dim
->max
> TX_4X4
) {
1210 const int tctx
= get_tx_ctx(t
->a
, &t
->l
, t_dim
, by4
, bx4
);
1211 uint16_t *const tx_cdf
= ts
->cdf
.m
.txsz
[t_dim
->max
- 1][tctx
];
1212 int depth
= dav1d_msac_decode_symbol_adapt4(&ts
->msac
, tx_cdf
,
1213 imin(t_dim
->max
, 2));
1217 t_dim
= &dav1d_txfm_dimensions
[b
->tx
];
1220 if (DEBUG_BLOCK_INFO
)
1221 printf("Post-tx[%d]: r=%d\n", b
->tx
, ts
->msac
.rng
);
1225 if (t
->frame_thread
.pass
== 1) {
1226 f
->bd_fn
.read_coef_blocks(t
, bs
, b
);
1228 f
->bd_fn
.recon_b_intra(t
, bs
, intra_edge_flags
, b
);
1231 if (f
->frame_hdr
->loopfilter
.level_y
[0] ||
1232 f
->frame_hdr
->loopfilter
.level_y
[1])
1234 dav1d_create_lf_mask_intra(t
->lf_mask
, f
->lf
.level
, f
->b4_stride
,
1235 (const uint8_t (*)[8][2])
1236 &ts
->lflvl
[b
->seg_id
][0][0][0],
1237 t
->bx
, t
->by
, f
->w4
, f
->h4
, bs
,
1238 b
->tx
, b
->uvtx
, f
->cur
.p
.layout
,
1239 &t
->a
->tx_lpf_y
[bx4
], &t
->l
.tx_lpf_y
[by4
],
1240 has_chroma
? &t
->a
->tx_lpf_uv
[cbx4
] : NULL
,
1241 has_chroma
? &t
->l
.tx_lpf_uv
[cby4
] : NULL
);
1245 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1246 rep_macro(type, t->dir tx_intra, off, mul * (((uint8_t *) &t_dim->lw)[diridx])); \
1247 rep_macro(type, t->dir tx, off, mul * (((uint8_t *) &t_dim->lw)[diridx])); \
1248 rep_macro(type, t->dir mode, off, mul * y_mode_nofilt); \
1249 rep_macro(type, t->dir pal_sz, off, mul * b->pal_sz[0]); \
1250 rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1251 rep_macro(type, t->dir skip_mode, off, 0); \
1252 rep_macro(type, t->dir intra, off, mul); \
1253 rep_macro(type, t->dir skip, off, mul * b->skip); \
1254 /* see aomedia bug 2183 for why we use luma coordinates here */ \
1255 rep_macro(type, t->pal_sz_uv[diridx], off, mul * (has_chroma ? b->pal_sz[1] : 0)); \
1256 if (IS_INTER_OR_SWITCH(f->frame_hdr)) { \
1257 rep_macro(type, t->dir comp_type, off, mul * COMP_INTER_NONE); \
1258 rep_macro(type, t->dir ref[0], off, mul * ((uint8_t) -1)); \
1259 rep_macro(type, t->dir ref[1], off, mul * ((uint8_t) -1)); \
1260 rep_macro(type, t->dir filter[0], off, mul * DAV1D_N_SWITCHABLE_FILTERS); \
1261 rep_macro(type, t->dir filter[1], off, mul * DAV1D_N_SWITCHABLE_FILTERS); \
1263 const enum IntraPredMode y_mode_nofilt
=
1264 b
->y_mode
== FILTER_PRED
? DC_PRED
: b
->y_mode
;
1265 case_set(bh4
, l
., 1, by4
);
1266 case_set(bw4
, a
->, 0, bx4
);
1269 f
->bd_fn
.copy_pal_block_y(t
, bx4
, by4
, bw4
, bh4
);
1271 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1272 rep_macro(type, t->dir uvmode, off, mul * b->uv_mode)
1273 case_set(cbh4
, l
., 1, cby4
);
1274 case_set(cbw4
, a
->, 0, cbx4
);
1277 f
->bd_fn
.copy_pal_block_uv(t
, bx4
, by4
, bw4
, bh4
);
1279 if (IS_INTER_OR_SWITCH(f
->frame_hdr
) || f
->frame_hdr
->allow_intrabc
)
1280 splat_intraref(f
->c
, t
, bs
, bw4
, bh4
);
1281 } else if (IS_KEY_OR_INTRA(f
->frame_hdr
)) {
1283 refmvs_candidate mvstack
[8];
1285 dav1d_refmvs_find(&t
->rt
, mvstack
, &n_mvs
, &ctx
,
1286 (union refmvs_refpair
) { .ref
= { 0, -1 }},
1287 bs
, intra_edge_flags
, t
->by
, t
->bx
);
1289 if (mvstack
[0].mv
.mv
[0].n
)
1290 b
->mv
[0] = mvstack
[0].mv
.mv
[0];
1291 else if (mvstack
[1].mv
.mv
[0].n
)
1292 b
->mv
[0] = mvstack
[1].mv
.mv
[0];
1294 if (t
->by
- (16 << f
->seq_hdr
->sb128
) < ts
->tiling
.row_start
) {
1296 b
->mv
[0].x
= -(512 << f
->seq_hdr
->sb128
) - 2048;
1298 b
->mv
[0].y
= -(512 << f
->seq_hdr
->sb128
);
1303 const union mv ref
= b
->mv
[0];
1304 read_mv_residual(ts
, &b
->mv
[0], -1);
1306 // clip intrabc motion vector to decoded parts of current tile
1307 int border_left
= ts
->tiling
.col_start
* 4;
1308 int border_top
= ts
->tiling
.row_start
* 4;
1310 if (bw4
< 2 && ss_hor
)
1312 if (bh4
< 2 && ss_ver
)
1315 int src_left
= t
->bx
* 4 + (b
->mv
[0].x
>> 3);
1316 int src_top
= t
->by
* 4 + (b
->mv
[0].y
>> 3);
1317 int src_right
= src_left
+ bw4
* 4;
1318 int src_bottom
= src_top
+ bh4
* 4;
1319 const int border_right
= ((ts
->tiling
.col_end
+ (bw4
- 1)) & ~(bw4
- 1)) * 4;
1321 // check against left or right tile boundary and adjust if necessary
1322 if (src_left
< border_left
) {
1323 src_right
+= border_left
- src_left
;
1324 src_left
+= border_left
- src_left
;
1325 } else if (src_right
> border_right
) {
1326 src_left
-= src_right
- border_right
;
1327 src_right
-= src_right
- border_right
;
1329 // check against top tile boundary and adjust if necessary
1330 if (src_top
< border_top
) {
1331 src_bottom
+= border_top
- src_top
;
1332 src_top
+= border_top
- src_top
;
1335 const int sbx
= (t
->bx
>> (4 + f
->seq_hdr
->sb128
)) << (6 + f
->seq_hdr
->sb128
);
1336 const int sby
= (t
->by
>> (4 + f
->seq_hdr
->sb128
)) << (6 + f
->seq_hdr
->sb128
);
1337 const int sb_size
= 1 << (6 + f
->seq_hdr
->sb128
);
1338 // check for overlap with current superblock
1339 if (src_bottom
> sby
&& src_right
> sbx
) {
1340 if (src_top
- border_top
>= src_bottom
- sby
) {
1341 // if possible move src up into the previous suberblock row
1342 src_top
-= src_bottom
- sby
;
1343 src_bottom
-= src_bottom
- sby
;
1344 } else if (src_left
- border_left
>= src_right
- sbx
) {
1345 // if possible move src left into the previous suberblock
1346 src_left
-= src_right
- sbx
;
1347 src_right
-= src_right
- sbx
;
1350 // move src up if it is below current superblock row
1351 if (src_bottom
> sby
+ sb_size
) {
1352 src_top
-= src_bottom
- (sby
+ sb_size
);
1353 src_bottom
-= src_bottom
- (sby
+ sb_size
);
1355 // error out if mv still overlaps with the current superblock
1356 if (src_bottom
> sby
&& src_right
> sbx
)
1359 b
->mv
[0].x
= (src_left
- t
->bx
* 4) * 8;
1360 b
->mv
[0].y
= (src_top
- t
->by
* 4) * 8;
1362 if (DEBUG_BLOCK_INFO
)
1363 printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
1364 b
->mv
[0].y
, b
->mv
[0].x
, ref
.y
, ref
.x
,
1365 mvstack
[0].mv
.mv
[0].y
, mvstack
[0].mv
.mv
[0].x
, ts
->msac
.rng
);
1366 read_vartx_tree(t
, b
, bs
, bx4
, by4
);
1369 if (t
->frame_thread
.pass
== 1) {
1370 f
->bd_fn
.read_coef_blocks(t
, bs
, b
);
1371 b
->filter2d
= FILTER_2D_BILINEAR
;
1373 if (f
->bd_fn
.recon_b_inter(t
, bs
, b
)) return -1;
1376 splat_intrabc_mv(f
->c
, t
, bs
, b
, bw4
, bh4
);
1378 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1379 rep_macro(type, t->dir tx_intra, off, mul * b_dim[2 + diridx]); \
1380 rep_macro(type, t->dir mode, off, mul * DC_PRED); \
1381 rep_macro(type, t->dir pal_sz, off, 0); \
1382 /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
1383 rep_macro(type, t->pal_sz_uv[diridx], off, 0); \
1384 rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1385 rep_macro(type, t->dir skip_mode, off, 0); \
1386 rep_macro(type, t->dir intra, off, 0); \
1387 rep_macro(type, t->dir skip, off, mul * b->skip)
1388 case_set(bh4
, l
., 1, by4
);
1389 case_set(bw4
, a
->, 0, bx4
);
1392 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1393 rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
1394 case_set(cbh4
, l
., 1, cby4
);
1395 case_set(cbw4
, a
->, 0, cbx4
);
1399 // inter-specific mode/mv coding
1400 int is_comp
, has_subpel_filter
;
1404 } else if ((!seg
|| (seg
->ref
== -1 && !seg
->globalmv
&& !seg
->skip
)) &&
1405 f
->frame_hdr
->switchable_comp_refs
&& imin(bw4
, bh4
) > 1)
1407 const int ctx
= get_comp_ctx(t
->a
, &t
->l
, by4
, bx4
,
1408 have_top
, have_left
);
1409 is_comp
= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1410 ts
->cdf
.m
.comp
[ctx
]);
1411 if (DEBUG_BLOCK_INFO
)
1412 printf("Post-compflag[%d]: r=%d\n", is_comp
, ts
->msac
.rng
);
1418 b
->ref
[0] = f
->frame_hdr
->skip_mode_refs
[0];
1419 b
->ref
[1] = f
->frame_hdr
->skip_mode_refs
[1];
1420 b
->comp_type
= COMP_INTER_AVG
;
1421 b
->inter_mode
= NEARESTMV_NEARESTMV
;
1422 b
->drl_idx
= NEAREST_DRL
;
1423 has_subpel_filter
= 0;
1425 refmvs_candidate mvstack
[8];
1427 dav1d_refmvs_find(&t
->rt
, mvstack
, &n_mvs
, &ctx
,
1428 (union refmvs_refpair
) { .ref
= {
1429 b
->ref
[0] + 1, b
->ref
[1] + 1 }},
1430 bs
, intra_edge_flags
, t
->by
, t
->bx
);
1432 b
->mv
[0] = mvstack
[0].mv
.mv
[0];
1433 b
->mv
[1] = mvstack
[0].mv
.mv
[1];
1434 fix_mv_precision(f
->frame_hdr
, &b
->mv
[0]);
1435 fix_mv_precision(f
->frame_hdr
, &b
->mv
[1]);
1436 if (DEBUG_BLOCK_INFO
)
1437 printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
1438 b
->mv
[0].y
, b
->mv
[0].x
, b
->mv
[1].y
, b
->mv
[1].x
,
1439 b
->ref
[0], b
->ref
[1]);
1440 } else if (is_comp
) {
1441 const int dir_ctx
= get_comp_dir_ctx(t
->a
, &t
->l
, by4
, bx4
,
1442 have_top
, have_left
);
1443 if (dav1d_msac_decode_bool_adapt(&ts
->msac
,
1444 ts
->cdf
.m
.comp_dir
[dir_ctx
]))
1446 // bidir - first reference (fw)
1447 const int ctx1
= av1_get_fwd_ref_ctx(t
->a
, &t
->l
, by4
, bx4
,
1448 have_top
, have_left
);
1449 if (dav1d_msac_decode_bool_adapt(&ts
->msac
,
1450 ts
->cdf
.m
.comp_fwd_ref
[0][ctx1
]))
1452 const int ctx2
= av1_get_fwd_ref_2_ctx(t
->a
, &t
->l
, by4
, bx4
,
1453 have_top
, have_left
);
1454 b
->ref
[0] = 2 + dav1d_msac_decode_bool_adapt(&ts
->msac
,
1455 ts
->cdf
.m
.comp_fwd_ref
[2][ctx2
]);
1457 const int ctx2
= av1_get_fwd_ref_1_ctx(t
->a
, &t
->l
, by4
, bx4
,
1458 have_top
, have_left
);
1459 b
->ref
[0] = dav1d_msac_decode_bool_adapt(&ts
->msac
,
1460 ts
->cdf
.m
.comp_fwd_ref
[1][ctx2
]);
1463 // second reference (bw)
1464 const int ctx3
= av1_get_bwd_ref_ctx(t
->a
, &t
->l
, by4
, bx4
,
1465 have_top
, have_left
);
1466 if (dav1d_msac_decode_bool_adapt(&ts
->msac
,
1467 ts
->cdf
.m
.comp_bwd_ref
[0][ctx3
]))
1471 const int ctx4
= av1_get_bwd_ref_1_ctx(t
->a
, &t
->l
, by4
, bx4
,
1472 have_top
, have_left
);
1473 b
->ref
[1] = 4 + dav1d_msac_decode_bool_adapt(&ts
->msac
,
1474 ts
->cdf
.m
.comp_bwd_ref
[1][ctx4
]);
1478 const int uctx_p
= av1_get_uni_p_ctx(t
->a
, &t
->l
, by4
, bx4
,
1479 have_top
, have_left
);
1480 if (dav1d_msac_decode_bool_adapt(&ts
->msac
,
1481 ts
->cdf
.m
.comp_uni_ref
[0][uctx_p
]))
1486 const int uctx_p1
= av1_get_uni_p1_ctx(t
->a
, &t
->l
, by4
, bx4
,
1487 have_top
, have_left
);
1489 b
->ref
[1] = 1 + dav1d_msac_decode_bool_adapt(&ts
->msac
,
1490 ts
->cdf
.m
.comp_uni_ref
[1][uctx_p1
]);
1491 if (b
->ref
[1] == 2) {
1492 const int uctx_p2
= av1_get_uni_p2_ctx(t
->a
, &t
->l
, by4
, bx4
,
1493 have_top
, have_left
);
1494 b
->ref
[1] += dav1d_msac_decode_bool_adapt(&ts
->msac
,
1495 ts
->cdf
.m
.comp_uni_ref
[2][uctx_p2
]);
1499 if (DEBUG_BLOCK_INFO
)
1500 printf("Post-refs[%d/%d]: r=%d\n",
1501 b
->ref
[0], b
->ref
[1], ts
->msac
.rng
);
1503 refmvs_candidate mvstack
[8];
1505 dav1d_refmvs_find(&t
->rt
, mvstack
, &n_mvs
, &ctx
,
1506 (union refmvs_refpair
) { .ref
= {
1507 b
->ref
[0] + 1, b
->ref
[1] + 1 }},
1508 bs
, intra_edge_flags
, t
->by
, t
->bx
);
1510 b
->inter_mode
= dav1d_msac_decode_symbol_adapt8(&ts
->msac
,
1511 ts
->cdf
.m
.comp_inter_mode
[ctx
],
1512 N_COMP_INTER_PRED_MODES
- 1);
1513 if (DEBUG_BLOCK_INFO
)
1514 printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
1515 b
->inter_mode
, ctx
, n_mvs
, ts
->msac
.rng
);
1517 const uint8_t *const im
= dav1d_comp_inter_pred_modes
[b
->inter_mode
];
1518 b
->drl_idx
= NEAREST_DRL
;
1519 if (b
->inter_mode
== NEWMV_NEWMV
) {
1520 if (n_mvs
> 1) { // NEARER, NEAR or NEARISH
1521 const int drl_ctx_v1
= get_drl_context(mvstack
, 0);
1522 b
->drl_idx
+= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1523 ts
->cdf
.m
.drl_bit
[drl_ctx_v1
]);
1524 if (b
->drl_idx
== NEARER_DRL
&& n_mvs
> 2) {
1525 const int drl_ctx_v2
= get_drl_context(mvstack
, 1);
1526 b
->drl_idx
+= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1527 ts
->cdf
.m
.drl_bit
[drl_ctx_v2
]);
1529 if (DEBUG_BLOCK_INFO
)
1530 printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
1531 b
->drl_idx
, n_mvs
, ts
->msac
.rng
);
1533 } else if (im
[0] == NEARMV
|| im
[1] == NEARMV
) {
1534 b
->drl_idx
= NEARER_DRL
;
1535 if (n_mvs
> 2) { // NEAR or NEARISH
1536 const int drl_ctx_v2
= get_drl_context(mvstack
, 1);
1537 b
->drl_idx
+= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1538 ts
->cdf
.m
.drl_bit
[drl_ctx_v2
]);
1539 if (b
->drl_idx
== NEAR_DRL
&& n_mvs
> 3) {
1540 const int drl_ctx_v3
= get_drl_context(mvstack
, 2);
1541 b
->drl_idx
+= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1542 ts
->cdf
.m
.drl_bit
[drl_ctx_v3
]);
1544 if (DEBUG_BLOCK_INFO
)
1545 printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
1546 b
->drl_idx
, n_mvs
, ts
->msac
.rng
);
1549 assert(b
->drl_idx
>= NEAREST_DRL
&& b
->drl_idx
<= NEARISH_DRL
);
1551 #define assign_comp_mv(idx) \
1552 switch (im[idx]) { \
1555 b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
1556 fix_mv_precision(f->frame_hdr, &b->mv[idx]); \
1559 has_subpel_filter |= \
1560 f->frame_hdr->gmv[b->ref[idx]].type == DAV1D_WM_TYPE_TRANSLATION; \
1561 b->mv[idx] = get_gmv_2d(&f->frame_hdr->gmv[b->ref[idx]], \
1562 t->bx, t->by, bw4, bh4, f->frame_hdr); \
1565 b->mv[idx] = mvstack[b->drl_idx].mv.mv[idx]; \
1566 const int mv_prec = f->frame_hdr->hp - f->frame_hdr->force_integer_mv; \
1567 read_mv_residual(ts, &b->mv[idx], mv_prec); \
1570 has_subpel_filter
= imin(bw4
, bh4
) == 1 ||
1571 b
->inter_mode
!= GLOBALMV_GLOBALMV
;
1574 #undef assign_comp_mv
1575 if (DEBUG_BLOCK_INFO
)
1576 printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n",
1577 b
->mv
[0].y
, b
->mv
[0].x
, b
->mv
[1].y
, b
->mv
[1].x
,
1580 // jnt_comp vs. seg vs. wedge
1581 int is_segwedge
= 0;
1582 if (f
->seq_hdr
->masked_compound
) {
1583 const int mask_ctx
= get_mask_comp_ctx(t
->a
, &t
->l
, by4
, bx4
);
1585 is_segwedge
= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1586 ts
->cdf
.m
.mask_comp
[mask_ctx
]);
1587 if (DEBUG_BLOCK_INFO
)
1588 printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n",
1589 is_segwedge
, mask_ctx
, ts
->msac
.rng
);
1593 if (f
->seq_hdr
->jnt_comp
) {
1595 get_jnt_comp_ctx(f
->seq_hdr
->order_hint_n_bits
,
1596 f
->cur
.frame_hdr
->frame_offset
,
1597 f
->refp
[b
->ref
[0]].p
.frame_hdr
->frame_offset
,
1598 f
->refp
[b
->ref
[1]].p
.frame_hdr
->frame_offset
,
1599 t
->a
, &t
->l
, by4
, bx4
);
1600 b
->comp_type
= COMP_INTER_WEIGHTED_AVG
+
1601 dav1d_msac_decode_bool_adapt(&ts
->msac
,
1602 ts
->cdf
.m
.jnt_comp
[jnt_ctx
]);
1603 if (DEBUG_BLOCK_INFO
)
1604 printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n",
1605 b
->comp_type
== COMP_INTER_AVG
,
1606 jnt_ctx
, t
->a
->comp_type
[bx4
], t
->a
->ref
[0][bx4
],
1607 t
->l
.comp_type
[by4
], t
->l
.ref
[0][by4
],
1610 b
->comp_type
= COMP_INTER_AVG
;
1613 if (wedge_allowed_mask
& (1 << bs
)) {
1614 const int ctx
= dav1d_wedge_ctx_lut
[bs
];
1615 b
->comp_type
= COMP_INTER_WEDGE
-
1616 dav1d_msac_decode_bool_adapt(&ts
->msac
,
1617 ts
->cdf
.m
.wedge_comp
[ctx
]);
1618 if (b
->comp_type
== COMP_INTER_WEDGE
)
1619 b
->wedge_idx
= dav1d_msac_decode_symbol_adapt16(&ts
->msac
,
1620 ts
->cdf
.m
.wedge_idx
[ctx
], 15);
1622 b
->comp_type
= COMP_INTER_SEG
;
1624 b
->mask_sign
= dav1d_msac_decode_bool_equi(&ts
->msac
);
1625 if (DEBUG_BLOCK_INFO
)
1626 printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n",
1627 b
->comp_type
== COMP_INTER_WEDGE
,
1628 b
->wedge_idx
, b
->mask_sign
, ts
->msac
.rng
);
1631 b
->comp_type
= COMP_INTER_NONE
;
1634 if (seg
&& seg
->ref
> 0) {
1635 b
->ref
[0] = seg
->ref
- 1;
1636 } else if (seg
&& (seg
->globalmv
|| seg
->skip
)) {
1639 const int ctx1
= av1_get_ref_ctx(t
->a
, &t
->l
, by4
, bx4
,
1640 have_top
, have_left
);
1641 if (dav1d_msac_decode_bool_adapt(&ts
->msac
,
1642 ts
->cdf
.m
.ref
[0][ctx1
]))
1644 const int ctx2
= av1_get_ref_2_ctx(t
->a
, &t
->l
, by4
, bx4
,
1645 have_top
, have_left
);
1646 if (dav1d_msac_decode_bool_adapt(&ts
->msac
,
1647 ts
->cdf
.m
.ref
[1][ctx2
]))
1651 const int ctx3
= av1_get_ref_6_ctx(t
->a
, &t
->l
, by4
, bx4
,
1652 have_top
, have_left
);
1653 b
->ref
[0] = 4 + dav1d_msac_decode_bool_adapt(&ts
->msac
,
1654 ts
->cdf
.m
.ref
[5][ctx3
]);
1657 const int ctx2
= av1_get_ref_3_ctx(t
->a
, &t
->l
, by4
, bx4
,
1658 have_top
, have_left
);
1659 if (dav1d_msac_decode_bool_adapt(&ts
->msac
,
1660 ts
->cdf
.m
.ref
[2][ctx2
]))
1662 const int ctx3
= av1_get_ref_5_ctx(t
->a
, &t
->l
, by4
, bx4
,
1663 have_top
, have_left
);
1664 b
->ref
[0] = 2 + dav1d_msac_decode_bool_adapt(&ts
->msac
,
1665 ts
->cdf
.m
.ref
[4][ctx3
]);
1667 const int ctx3
= av1_get_ref_4_ctx(t
->a
, &t
->l
, by4
, bx4
,
1668 have_top
, have_left
);
1669 b
->ref
[0] = dav1d_msac_decode_bool_adapt(&ts
->msac
,
1670 ts
->cdf
.m
.ref
[3][ctx3
]);
1673 if (DEBUG_BLOCK_INFO
)
1674 printf("Post-ref[%d]: r=%d\n", b
->ref
[0], ts
->msac
.rng
);
1678 refmvs_candidate mvstack
[8];
1680 dav1d_refmvs_find(&t
->rt
, mvstack
, &n_mvs
, &ctx
,
1681 (union refmvs_refpair
) { .ref
= { b
->ref
[0] + 1, -1 }},
1682 bs
, intra_edge_flags
, t
->by
, t
->bx
);
1684 // mode parsing and mv derivation from ref_mvs
1685 if ((seg
&& (seg
->skip
|| seg
->globalmv
)) ||
1686 dav1d_msac_decode_bool_adapt(&ts
->msac
,
1687 ts
->cdf
.m
.newmv_mode
[ctx
& 7]))
1689 if ((seg
&& (seg
->skip
|| seg
->globalmv
)) ||
1690 !dav1d_msac_decode_bool_adapt(&ts
->msac
,
1691 ts
->cdf
.m
.globalmv_mode
[(ctx
>> 3) & 1]))
1693 b
->inter_mode
= GLOBALMV
;
1694 b
->mv
[0] = get_gmv_2d(&f
->frame_hdr
->gmv
[b
->ref
[0]],
1695 t
->bx
, t
->by
, bw4
, bh4
, f
->frame_hdr
);
1696 has_subpel_filter
= imin(bw4
, bh4
) == 1 ||
1697 f
->frame_hdr
->gmv
[b
->ref
[0]].type
== DAV1D_WM_TYPE_TRANSLATION
;
1699 has_subpel_filter
= 1;
1700 if (dav1d_msac_decode_bool_adapt(&ts
->msac
,
1701 ts
->cdf
.m
.refmv_mode
[(ctx
>> 4) & 15]))
1702 { // NEAREST, NEARER, NEAR or NEARISH
1703 b
->inter_mode
= NEARMV
;
1704 b
->drl_idx
= NEARER_DRL
;
1705 if (n_mvs
> 2) { // NEARER, NEAR or NEARISH
1706 const int drl_ctx_v2
= get_drl_context(mvstack
, 1);
1707 b
->drl_idx
+= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1708 ts
->cdf
.m
.drl_bit
[drl_ctx_v2
]);
1709 if (b
->drl_idx
== NEAR_DRL
&& n_mvs
> 3) { // NEAR or NEARISH
1710 const int drl_ctx_v3
=
1711 get_drl_context(mvstack
, 2);
1712 b
->drl_idx
+= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1713 ts
->cdf
.m
.drl_bit
[drl_ctx_v3
]);
1717 b
->inter_mode
= NEARESTMV
;
1718 b
->drl_idx
= NEAREST_DRL
;
1720 assert(b
->drl_idx
>= NEAREST_DRL
&& b
->drl_idx
<= NEARISH_DRL
);
1721 b
->mv
[0] = mvstack
[b
->drl_idx
].mv
.mv
[0];
1722 if (b
->drl_idx
< NEAR_DRL
)
1723 fix_mv_precision(f
->frame_hdr
, &b
->mv
[0]);
1726 if (DEBUG_BLOCK_INFO
)
1727 printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n",
1728 b
->inter_mode
, b
->drl_idx
, b
->mv
[0].y
, b
->mv
[0].x
, n_mvs
,
1731 has_subpel_filter
= 1;
1732 b
->inter_mode
= NEWMV
;
1733 b
->drl_idx
= NEAREST_DRL
;
1734 if (n_mvs
> 1) { // NEARER, NEAR or NEARISH
1735 const int drl_ctx_v1
= get_drl_context(mvstack
, 0);
1736 b
->drl_idx
+= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1737 ts
->cdf
.m
.drl_bit
[drl_ctx_v1
]);
1738 if (b
->drl_idx
== NEARER_DRL
&& n_mvs
> 2) { // NEAR or NEARISH
1739 const int drl_ctx_v2
= get_drl_context(mvstack
, 1);
1740 b
->drl_idx
+= dav1d_msac_decode_bool_adapt(&ts
->msac
,
1741 ts
->cdf
.m
.drl_bit
[drl_ctx_v2
]);
1744 assert(b
->drl_idx
>= NEAREST_DRL
&& b
->drl_idx
<= NEARISH_DRL
);
1746 b
->mv
[0] = mvstack
[b
->drl_idx
].mv
.mv
[0];
1748 assert(!b
->drl_idx
);
1749 b
->mv
[0] = mvstack
[0].mv
.mv
[0];
1750 fix_mv_precision(f
->frame_hdr
, &b
->mv
[0]);
1752 if (DEBUG_BLOCK_INFO
)
1753 printf("Post-intermode[%d,drl=%d]: r=%d\n",
1754 b
->inter_mode
, b
->drl_idx
, ts
->msac
.rng
);
1755 const int mv_prec
= f
->frame_hdr
->hp
- f
->frame_hdr
->force_integer_mv
;
1756 read_mv_residual(ts
, &b
->mv
[0], mv_prec
);
1757 if (DEBUG_BLOCK_INFO
)
1758 printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
1759 b
->mv
[0].y
, b
->mv
[0].x
, ts
->msac
.rng
);
1763 const int ii_sz_grp
= dav1d_ymode_size_context
[bs
];
1764 if (f
->seq_hdr
->inter_intra
&&
1765 interintra_allowed_mask
& (1 << bs
) &&
1766 dav1d_msac_decode_bool_adapt(&ts
->msac
,
1767 ts
->cdf
.m
.interintra
[ii_sz_grp
]))
1769 b
->interintra_mode
= dav1d_msac_decode_symbol_adapt4(&ts
->msac
,
1770 ts
->cdf
.m
.interintra_mode
[ii_sz_grp
],
1771 N_INTER_INTRA_PRED_MODES
- 1);
1772 const int wedge_ctx
= dav1d_wedge_ctx_lut
[bs
];
1773 b
->interintra_type
= INTER_INTRA_BLEND
+
1774 dav1d_msac_decode_bool_adapt(&ts
->msac
,
1775 ts
->cdf
.m
.interintra_wedge
[wedge_ctx
]);
1776 if (b
->interintra_type
== INTER_INTRA_WEDGE
)
1777 b
->wedge_idx
= dav1d_msac_decode_symbol_adapt16(&ts
->msac
,
1778 ts
->cdf
.m
.wedge_idx
[wedge_ctx
], 15);
1780 b
->interintra_type
= INTER_INTRA_NONE
;
1782 if (DEBUG_BLOCK_INFO
&& f
->seq_hdr
->inter_intra
&&
1783 interintra_allowed_mask
& (1 << bs
))
1785 printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n",
1786 b
->interintra_type
, b
->interintra_mode
,
1787 b
->wedge_idx
, ts
->msac
.rng
);
1791 if (f
->frame_hdr
->switchable_motion_mode
&&
1792 b
->interintra_type
== INTER_INTRA_NONE
&& imin(bw4
, bh4
) >= 2 &&
1793 // is not warped global motion
1794 !(!f
->frame_hdr
->force_integer_mv
&& b
->inter_mode
== GLOBALMV
&&
1795 f
->frame_hdr
->gmv
[b
->ref
[0]].type
> DAV1D_WM_TYPE_TRANSLATION
) &&
1796 // has overlappable neighbours
1797 ((have_left
&& findoddzero(&t
->l
.intra
[by4
+ 1], h4
>> 1)) ||
1798 (have_top
&& findoddzero(&t
->a
->intra
[bx4
+ 1], w4
>> 1))))
1800 // reaching here means the block allows obmc - check warp by
1801 // finding matching-ref blocks in top/left edges
1802 uint64_t mask
[2] = { 0, 0 };
1803 find_matching_ref(t
, intra_edge_flags
, bw4
, bh4
, w4
, h4
,
1804 have_left
, have_top
, b
->ref
[0], mask
);
1805 const int allow_warp
= !f
->svc
[b
->ref
[0]][0].scale
&&
1806 !f
->frame_hdr
->force_integer_mv
&&
1807 f
->frame_hdr
->warp_motion
&& (mask
[0] | mask
[1]);
1809 b
->motion_mode
= allow_warp
?
1810 dav1d_msac_decode_symbol_adapt4(&ts
->msac
,
1811 ts
->cdf
.m
.motion_mode
[bs
], 2) :
1812 dav1d_msac_decode_bool_adapt(&ts
->msac
, ts
->cdf
.m
.obmc
[bs
]);
1813 if (b
->motion_mode
== MM_WARP
) {
1814 has_subpel_filter
= 0;
1815 derive_warpmv(t
, bw4
, bh4
, mask
, b
->mv
[0], &t
->warpmv
);
1816 #define signabs(v) v < 0 ? '-' : ' ', abs(v)
1817 if (DEBUG_BLOCK_INFO
)
1818 printf("[ %c%x %c%x %c%x\n %c%x %c%x %c%x ]\n"
1819 "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x, "
1821 signabs(t
->warpmv
.matrix
[0]),
1822 signabs(t
->warpmv
.matrix
[1]),
1823 signabs(t
->warpmv
.matrix
[2]),
1824 signabs(t
->warpmv
.matrix
[3]),
1825 signabs(t
->warpmv
.matrix
[4]),
1826 signabs(t
->warpmv
.matrix
[5]),
1827 signabs(t
->warpmv
.u
.p
.alpha
),
1828 signabs(t
->warpmv
.u
.p
.beta
),
1829 signabs(t
->warpmv
.u
.p
.gamma
),
1830 signabs(t
->warpmv
.u
.p
.delta
),
1831 b
->mv
[0].y
, b
->mv
[0].x
);
1833 if (t
->frame_thread
.pass
) {
1834 if (t
->warpmv
.type
== DAV1D_WM_TYPE_AFFINE
) {
1835 b
->matrix
[0] = t
->warpmv
.matrix
[2] - 0x10000;
1836 b
->matrix
[1] = t
->warpmv
.matrix
[3];
1837 b
->matrix
[2] = t
->warpmv
.matrix
[4];
1838 b
->matrix
[3] = t
->warpmv
.matrix
[5] - 0x10000;
1840 b
->matrix
[0] = SHRT_MIN
;
1845 if (DEBUG_BLOCK_INFO
)
1846 printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIx64
"/0x%"
1847 PRIx64
"]\n", b
->motion_mode
, ts
->msac
.rng
, mask
[0],
1850 b
->motion_mode
= MM_TRANSLATION
;
1855 enum Dav1dFilterMode filter
[2];
1856 if (f
->frame_hdr
->subpel_filter_mode
== DAV1D_FILTER_SWITCHABLE
) {
1857 if (has_subpel_filter
) {
1858 const int comp
= b
->comp_type
!= COMP_INTER_NONE
;
1859 const int ctx1
= get_filter_ctx(t
->a
, &t
->l
, comp
, 0, b
->ref
[0],
1861 filter
[0] = dav1d_msac_decode_symbol_adapt4(&ts
->msac
,
1862 ts
->cdf
.m
.filter
[0][ctx1
],
1863 DAV1D_N_SWITCHABLE_FILTERS
- 1);
1864 if (f
->seq_hdr
->dual_filter
) {
1865 const int ctx2
= get_filter_ctx(t
->a
, &t
->l
, comp
, 1,
1866 b
->ref
[0], by4
, bx4
);
1867 if (DEBUG_BLOCK_INFO
)
1868 printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n",
1869 filter
[0], ctx1
, ts
->msac
.rng
);
1870 filter
[1] = dav1d_msac_decode_symbol_adapt4(&ts
->msac
,
1871 ts
->cdf
.m
.filter
[1][ctx2
],
1872 DAV1D_N_SWITCHABLE_FILTERS
- 1);
1873 if (DEBUG_BLOCK_INFO
)
1874 printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n",
1875 filter
[1], ctx2
, ts
->msac
.rng
);
1877 filter
[1] = filter
[0];
1878 if (DEBUG_BLOCK_INFO
)
1879 printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n",
1880 filter
[0], ctx1
, ts
->msac
.rng
);
1883 filter
[0] = filter
[1] = DAV1D_FILTER_8TAP_REGULAR
;
1886 filter
[0] = filter
[1] = f
->frame_hdr
->subpel_filter_mode
;
1888 b
->filter2d
= dav1d_filter_2d
[filter
[1]][filter
[0]];
1890 read_vartx_tree(t
, b
, bs
, bx4
, by4
);
1893 if (t
->frame_thread
.pass
== 1) {
1894 f
->bd_fn
.read_coef_blocks(t
, bs
, b
);
1896 if (f
->bd_fn
.recon_b_inter(t
, bs
, b
)) return -1;
1899 if (f
->frame_hdr
->loopfilter
.level_y
[0] ||
1900 f
->frame_hdr
->loopfilter
.level_y
[1])
1902 const int is_globalmv
=
1903 b
->inter_mode
== (is_comp
? GLOBALMV_GLOBALMV
: GLOBALMV
);
1904 const uint8_t (*const lf_lvls
)[8][2] = (const uint8_t (*)[8][2])
1905 &ts
->lflvl
[b
->seg_id
][0][b
->ref
[0] + 1][!is_globalmv
];
1906 const uint16_t tx_split
[2] = { b
->tx_split0
, b
->tx_split1
};
1907 enum RectTxfmSize ytx
= b
->max_ytx
, uvtx
= b
->uvtx
;
1908 if (f
->frame_hdr
->segmentation
.lossless
[b
->seg_id
]) {
1909 ytx
= (enum RectTxfmSize
) TX_4X4
;
1910 uvtx
= (enum RectTxfmSize
) TX_4X4
;
1912 dav1d_create_lf_mask_inter(t
->lf_mask
, f
->lf
.level
, f
->b4_stride
, lf_lvls
,
1913 t
->bx
, t
->by
, f
->w4
, f
->h4
, b
->skip
, bs
,
1914 ytx
, tx_split
, uvtx
, f
->cur
.p
.layout
,
1915 &t
->a
->tx_lpf_y
[bx4
], &t
->l
.tx_lpf_y
[by4
],
1916 has_chroma
? &t
->a
->tx_lpf_uv
[cbx4
] : NULL
,
1917 has_chroma
? &t
->l
.tx_lpf_uv
[cby4
] : NULL
);
1922 splat_tworef_mv(f
->c
, t
, bs
, b
, bw4
, bh4
);
1924 splat_oneref_mv(f
->c
, t
, bs
, b
, bw4
, bh4
);
1926 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1927 rep_macro(type, t->dir seg_pred, off, mul * seg_pred); \
1928 rep_macro(type, t->dir skip_mode, off, mul * b->skip_mode); \
1929 rep_macro(type, t->dir intra, off, 0); \
1930 rep_macro(type, t->dir skip, off, mul * b->skip); \
1931 rep_macro(type, t->dir pal_sz, off, 0); \
1932 /* see aomedia bug 2183 for why this is outside if (has_chroma) */ \
1933 rep_macro(type, t->pal_sz_uv[diridx], off, 0); \
1934 rep_macro(type, t->dir tx_intra, off, mul * b_dim[2 + diridx]); \
1935 rep_macro(type, t->dir comp_type, off, mul * b->comp_type); \
1936 rep_macro(type, t->dir filter[0], off, mul * filter[0]); \
1937 rep_macro(type, t->dir filter[1], off, mul * filter[1]); \
1938 rep_macro(type, t->dir mode, off, mul * b->inter_mode); \
1939 rep_macro(type, t->dir ref[0], off, mul * b->ref[0]); \
1940 rep_macro(type, t->dir ref[1], off, mul * ((uint8_t) b->ref[1]))
1941 case_set(bh4
, l
., 1, by4
);
1942 case_set(bw4
, a
->, 0, bx4
);
1946 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1947 rep_macro(type, t->dir uvmode, off, mul * DC_PRED)
1948 case_set(cbh4
, l
., 1, cby4
);
1949 case_set(cbw4
, a
->, 0, cbx4
);
1955 if (f
->frame_hdr
->segmentation
.enabled
&&
1956 f
->frame_hdr
->segmentation
.update_map
)
1958 uint8_t *seg_ptr
= &f
->cur_segmap
[t
->by
* f
->b4_stride
+ t
->bx
];
1959 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
1960 for (int y = 0; y < bh4; y++) { \
1961 rep_macro(type, seg_ptr, 0, mul * b->seg_id); \
1962 seg_ptr += f->b4_stride; \
1964 case_set(bw4
, NULL
, 0, 0);
1968 uint16_t (*noskip_mask
)[2] = &t
->lf_mask
->noskip_mask
[by4
>> 1];
1969 const unsigned mask
= (~0U >> (32 - bw4
)) << (bx4
& 15);
1970 const int bx_idx
= (bx4
& 16) >> 4;
1971 for (int y
= 0; y
< bh4
; y
+= 2, noskip_mask
++) {
1972 (*noskip_mask
)[bx_idx
] |= mask
;
1973 if (bw4
== 32) // this should be mask >> 16, but it's 0xffffffff anyway
1974 (*noskip_mask
)[1] |= mask
;
1978 if (t
->frame_thread
.pass
== 1 && !b
->intra
&& IS_INTER_OR_SWITCH(f
->frame_hdr
)) {
1979 const int sby
= (t
->by
- ts
->tiling
.row_start
) >> f
->sb_shift
;
1980 int (*const lowest_px
)[2] = ts
->lowest_pixel
[sby
];
1982 // keep track of motion vectors for each reference
1983 if (b
->comp_type
== COMP_INTER_NONE
) {
1985 if (imin(bw4
, bh4
) > 1 &&
1986 ((b
->inter_mode
== GLOBALMV
&& f
->gmv_warp_allowed
[b
->ref
[0]]) ||
1987 (b
->motion_mode
== MM_WARP
&& t
->warpmv
.type
> DAV1D_WM_TYPE_TRANSLATION
)))
1989 affine_lowest_px_luma(t
, &lowest_px
[b
->ref
[0]][0], b_dim
,
1990 b
->motion_mode
== MM_WARP
? &t
->warpmv
:
1991 &f
->frame_hdr
->gmv
[b
->ref
[0]]);
1993 mc_lowest_px(&lowest_px
[b
->ref
[0]][0], t
->by
, bh4
, b
->mv
[0].y
,
1994 0, &f
->svc
[b
->ref
[0]][1]);
1995 if (b
->motion_mode
== MM_OBMC
) {
1996 obmc_lowest_px(t
, lowest_px
, 0, b_dim
, bx4
, by4
, w4
, h4
);
2002 // sub8x8 derivation
2003 int is_sub8x8
= bw4
== ss_hor
|| bh4
== ss_ver
;
2004 refmvs_block
*const *r
;
2006 assert(ss_hor
== 1);
2007 r
= &t
->rt
.r
[(t
->by
& 31) + 5];
2008 if (bw4
== 1) is_sub8x8
&= r
[0][t
->bx
- 1].ref
.ref
[0] > 0;
2009 if (bh4
== ss_ver
) is_sub8x8
&= r
[-1][t
->bx
].ref
.ref
[0] > 0;
2010 if (bw4
== 1 && bh4
== ss_ver
)
2011 is_sub8x8
&= r
[-1][t
->bx
- 1].ref
.ref
[0] > 0;
2014 // chroma prediction
2016 assert(ss_hor
== 1);
2017 if (bw4
== 1 && bh4
== ss_ver
) {
2018 const refmvs_block
*const rr
= &r
[-1][t
->bx
- 1];
2019 mc_lowest_px(&lowest_px
[rr
->ref
.ref
[0] - 1][1],
2020 t
->by
- 1, bh4
, rr
->mv
.mv
[0].y
, ss_ver
,
2021 &f
->svc
[rr
->ref
.ref
[0] - 1][1]);
2024 const refmvs_block
*const rr
= &r
[0][t
->bx
- 1];
2025 mc_lowest_px(&lowest_px
[rr
->ref
.ref
[0] - 1][1],
2026 t
->by
, bh4
, rr
->mv
.mv
[0].y
, ss_ver
,
2027 &f
->svc
[rr
->ref
.ref
[0] - 1][1]);
2029 if (bh4
== ss_ver
) {
2030 const refmvs_block
*const rr
= &r
[-1][t
->bx
];
2031 mc_lowest_px(&lowest_px
[rr
->ref
.ref
[0] - 1][1],
2032 t
->by
- 1, bh4
, rr
->mv
.mv
[0].y
, ss_ver
,
2033 &f
->svc
[rr
->ref
.ref
[0] - 1][1]);
2035 mc_lowest_px(&lowest_px
[b
->ref
[0]][1], t
->by
, bh4
,
2036 b
->mv
[0].y
, ss_ver
, &f
->svc
[b
->ref
[0]][1]);
2038 if (imin(cbw4
, cbh4
) > 1 &&
2039 ((b
->inter_mode
== GLOBALMV
&& f
->gmv_warp_allowed
[b
->ref
[0]]) ||
2040 (b
->motion_mode
== MM_WARP
&& t
->warpmv
.type
> DAV1D_WM_TYPE_TRANSLATION
)))
2042 affine_lowest_px_chroma(t
, &lowest_px
[b
->ref
[0]][1], b_dim
,
2043 b
->motion_mode
== MM_WARP
? &t
->warpmv
:
2044 &f
->frame_hdr
->gmv
[b
->ref
[0]]);
2046 mc_lowest_px(&lowest_px
[b
->ref
[0]][1],
2047 t
->by
& ~ss_ver
, bh4
<< (bh4
== ss_ver
),
2048 b
->mv
[0].y
, ss_ver
, &f
->svc
[b
->ref
[0]][1]);
2049 if (b
->motion_mode
== MM_OBMC
) {
2050 obmc_lowest_px(t
, lowest_px
, 1, b_dim
, bx4
, by4
, w4
, h4
);
2057 for (int i
= 0; i
< 2; i
++) {
2058 if (b
->inter_mode
== GLOBALMV_GLOBALMV
&& f
->gmv_warp_allowed
[b
->ref
[i
]]) {
2059 affine_lowest_px_luma(t
, &lowest_px
[b
->ref
[i
]][0], b_dim
,
2060 &f
->frame_hdr
->gmv
[b
->ref
[i
]]);
2062 mc_lowest_px(&lowest_px
[b
->ref
[i
]][0], t
->by
, bh4
,
2063 b
->mv
[i
].y
, 0, &f
->svc
[b
->ref
[i
]][1]);
2068 if (has_chroma
) for (int i
= 0; i
< 2; i
++) {
2069 if (b
->inter_mode
== GLOBALMV_GLOBALMV
&&
2070 imin(cbw4
, cbh4
) > 1 && f
->gmv_warp_allowed
[b
->ref
[i
]])
2072 affine_lowest_px_chroma(t
, &lowest_px
[b
->ref
[i
]][1], b_dim
,
2073 &f
->frame_hdr
->gmv
[b
->ref
[i
]]);
2075 mc_lowest_px(&lowest_px
[b
->ref
[i
]][1], t
->by
, bh4
,
2076 b
->mv
[i
].y
, ss_ver
, &f
->svc
[b
->ref
[i
]][1]);
2085 #if __has_feature(memory_sanitizer)
2087 #include <sanitizer/msan_interface.h>
2089 static int checked_decode_b(Dav1dTaskContext
*const t
,
2090 const enum BlockLevel bl
,
2091 const enum BlockSize bs
,
2092 const enum BlockPartition bp
,
2093 const enum EdgeFlags intra_edge_flags
)
2095 const Dav1dFrameContext
*const f
= t
->f
;
2096 const int err
= decode_b(t
, bl
, bs
, bp
, intra_edge_flags
);
2098 if (err
== 0 && !(t
->frame_thread
.pass
& 1)) {
2099 const int ss_ver
= f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I420
;
2100 const int ss_hor
= f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I444
;
2101 const uint8_t *const b_dim
= dav1d_block_dimensions
[bs
];
2102 const int bw4
= b_dim
[0], bh4
= b_dim
[1];
2103 const int w4
= imin(bw4
, f
->bw
- t
->bx
), h4
= imin(bh4
, f
->bh
- t
->by
);
2104 const int has_chroma
= f
->seq_hdr
->layout
!= DAV1D_PIXEL_LAYOUT_I400
&&
2105 (bw4
> ss_hor
|| t
->bx
& 1) &&
2106 (bh4
> ss_ver
|| t
->by
& 1);
2108 for (int p
= 0; p
< 1 + 2 * has_chroma
; p
++) {
2109 const int ss_ver
= p
&& f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I420
;
2110 const int ss_hor
= p
&& f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I444
;
2111 const ptrdiff_t stride
= f
->cur
.stride
[!!p
];
2112 const int bx
= t
->bx
& ~ss_hor
;
2113 const int by
= t
->by
& ~ss_ver
;
2114 const int width
= w4
<< (2 - ss_hor
+ (bw4
== ss_hor
));
2115 const int height
= h4
<< (2 - ss_ver
+ (bh4
== ss_ver
));
2117 const uint8_t *data
= f
->cur
.data
[p
] + (by
<< (2 - ss_ver
)) * stride
+
2118 (bx
<< (2 - ss_hor
+ !!f
->seq_hdr
->hbd
));
2120 for (int y
= 0; y
< height
; data
+= stride
, y
++) {
2121 const size_t line_sz
= width
<< !!f
->seq_hdr
->hbd
;
2122 if (__msan_test_shadow(data
, line_sz
) != -1) {
2123 fprintf(stderr
, "B[%d](%d, %d) w4:%d, h4:%d, row:%d\n",
2124 p
, bx
, by
, w4
, h4
, y
);
2125 __msan_check_mem_is_initialized(data
, line_sz
);
2134 #define decode_b checked_decode_b
2136 #endif /* defined(__has_feature) */
2138 static int decode_sb(Dav1dTaskContext
*const t
, const enum BlockLevel bl
,
2139 const EdgeNode
*const node
)
2141 const Dav1dFrameContext
*const f
= t
->f
;
2142 Dav1dTileState
*const ts
= t
->ts
;
2143 const int hsz
= 16 >> bl
;
2144 const int have_h_split
= f
->bw
> t
->bx
+ hsz
;
2145 const int have_v_split
= f
->bh
> t
->by
+ hsz
;
2147 if (!have_h_split
&& !have_v_split
) {
2148 assert(bl
< BL_8X8
);
2149 return decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 0));
2153 enum BlockPartition bp
;
2155 if (t
->frame_thread
.pass
!= 2) {
2156 if (0 && bl
== BL_64X64
)
2157 printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n",
2158 f
->frame_hdr
->frame_offset
, t
->by
, t
->bx
, bl
, ts
->msac
.rng
);
2159 bx8
= (t
->bx
& 31) >> 1;
2160 by8
= (t
->by
& 31) >> 1;
2161 ctx
= get_partition_ctx(t
->a
, &t
->l
, bl
, by8
, bx8
);
2162 pc
= ts
->cdf
.m
.partition
[bl
][ctx
];
2165 if (have_h_split
&& have_v_split
) {
2166 if (t
->frame_thread
.pass
== 2) {
2167 const Av1Block
*const b
= &f
->frame_thread
.b
[t
->by
* f
->b4_stride
+ t
->bx
];
2168 bp
= b
->bl
== bl
? b
->bp
: PARTITION_SPLIT
;
2170 bp
= dav1d_msac_decode_symbol_adapt16(&ts
->msac
, pc
,
2171 dav1d_partition_type_count
[bl
]);
2172 if (f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I422
&&
2173 (bp
== PARTITION_V
|| bp
== PARTITION_V4
||
2174 bp
== PARTITION_T_LEFT_SPLIT
|| bp
== PARTITION_T_RIGHT_SPLIT
))
2178 if (DEBUG_BLOCK_INFO
)
2179 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2180 f
->frame_hdr
->frame_offset
, t
->by
, t
->bx
, bl
, ctx
, bp
,
2183 const uint8_t *const b
= dav1d_block_sizes
[bl
][bp
];
2186 case PARTITION_NONE
:
2187 if (decode_b(t
, bl
, b
[0], PARTITION_NONE
, node
->o
))
2191 if (decode_b(t
, bl
, b
[0], PARTITION_H
, node
->h
[0]))
2194 if (decode_b(t
, bl
, b
[0], PARTITION_H
, node
->h
[1]))
2199 if (decode_b(t
, bl
, b
[0], PARTITION_V
, node
->v
[0]))
2202 if (decode_b(t
, bl
, b
[0], PARTITION_V
, node
->v
[1]))
2206 case PARTITION_SPLIT
:
2208 const EdgeTip
*const tip
= (const EdgeTip
*) node
;
2210 if (decode_b(t
, bl
, BS_4x4
, PARTITION_SPLIT
, EDGE_ALL_TR_AND_BL
))
2212 const enum Filter2d tl_filter
= t
->tl_4x4_filter
;
2214 if (decode_b(t
, bl
, BS_4x4
, PARTITION_SPLIT
, tip
->split
[0]))
2218 if (decode_b(t
, bl
, BS_4x4
, PARTITION_SPLIT
, tip
->split
[1]))
2221 t
->tl_4x4_filter
= tl_filter
;
2222 if (decode_b(t
, bl
, BS_4x4
, PARTITION_SPLIT
, tip
->split
[2]))
2227 if (t
->frame_thread
.pass
) {
2228 /* In 8-bit mode with 2-pass decoding the coefficient buffer
2229 * can end up misaligned due to skips here. Work around
2230 * the issue by explicitly realigning the buffer. */
2231 const int p
= t
->frame_thread
.pass
& 1;
2232 ts
->frame_thread
[p
].cf
=
2233 (void*)(((uintptr_t)ts
->frame_thread
[p
].cf
+ 63) & ~63);
2237 if (decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 0)))
2240 if (decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 1)))
2244 if (decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 2)))
2247 if (decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 3)))
2253 case PARTITION_T_TOP_SPLIT
: {
2254 if (decode_b(t
, bl
, b
[0], PARTITION_T_TOP_SPLIT
, EDGE_ALL_TR_AND_BL
))
2257 if (decode_b(t
, bl
, b
[0], PARTITION_T_TOP_SPLIT
, node
->v
[1]))
2261 if (decode_b(t
, bl
, b
[1], PARTITION_T_TOP_SPLIT
, node
->h
[1]))
2266 case PARTITION_T_BOTTOM_SPLIT
: {
2267 if (decode_b(t
, bl
, b
[0], PARTITION_T_BOTTOM_SPLIT
, node
->h
[0]))
2270 if (decode_b(t
, bl
, b
[1], PARTITION_T_BOTTOM_SPLIT
, node
->v
[0]))
2273 if (decode_b(t
, bl
, b
[1], PARTITION_T_BOTTOM_SPLIT
, 0))
2279 case PARTITION_T_LEFT_SPLIT
: {
2280 if (decode_b(t
, bl
, b
[0], PARTITION_T_LEFT_SPLIT
, EDGE_ALL_TR_AND_BL
))
2283 if (decode_b(t
, bl
, b
[0], PARTITION_T_LEFT_SPLIT
, node
->h
[1]))
2287 if (decode_b(t
, bl
, b
[1], PARTITION_T_LEFT_SPLIT
, node
->v
[1]))
2292 case PARTITION_T_RIGHT_SPLIT
: {
2293 if (decode_b(t
, bl
, b
[0], PARTITION_T_RIGHT_SPLIT
, node
->v
[0]))
2296 if (decode_b(t
, bl
, b
[1], PARTITION_T_RIGHT_SPLIT
, node
->h
[0]))
2299 if (decode_b(t
, bl
, b
[1], PARTITION_T_RIGHT_SPLIT
, 0))
2305 case PARTITION_H4
: {
2306 const EdgeBranch
*const branch
= (const EdgeBranch
*) node
;
2307 if (decode_b(t
, bl
, b
[0], PARTITION_H4
, node
->h
[0]))
2310 if (decode_b(t
, bl
, b
[0], PARTITION_H4
, branch
->h4
))
2313 if (decode_b(t
, bl
, b
[0], PARTITION_H4
, EDGE_ALL_LEFT_HAS_BOTTOM
))
2317 if (decode_b(t
, bl
, b
[0], PARTITION_H4
, node
->h
[1]))
2319 t
->by
-= hsz
* 3 >> 1;
2322 case PARTITION_V4
: {
2323 const EdgeBranch
*const branch
= (const EdgeBranch
*) node
;
2324 if (decode_b(t
, bl
, b
[0], PARTITION_V4
, node
->v
[0]))
2327 if (decode_b(t
, bl
, b
[0], PARTITION_V4
, branch
->v4
))
2330 if (decode_b(t
, bl
, b
[0], PARTITION_V4
, EDGE_ALL_TOP_HAS_RIGHT
))
2334 if (decode_b(t
, bl
, b
[0], PARTITION_V4
, node
->v
[1]))
2336 t
->bx
-= hsz
* 3 >> 1;
2341 } else if (have_h_split
) {
2343 if (t
->frame_thread
.pass
== 2) {
2344 const Av1Block
*const b
= &f
->frame_thread
.b
[t
->by
* f
->b4_stride
+ t
->bx
];
2345 is_split
= b
->bl
!= bl
;
2347 is_split
= dav1d_msac_decode_bool(&ts
->msac
,
2348 gather_top_partition_prob(pc
, bl
));
2349 if (DEBUG_BLOCK_INFO
)
2350 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2351 f
->frame_hdr
->frame_offset
, t
->by
, t
->bx
, bl
, ctx
,
2352 is_split
? PARTITION_SPLIT
: PARTITION_H
, ts
->msac
.rng
);
2355 assert(bl
< BL_8X8
);
2357 bp
= PARTITION_SPLIT
;
2358 if (decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 0))) return 1;
2360 if (decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 1))) return 1;
2364 if (decode_b(t
, bl
, dav1d_block_sizes
[bl
][PARTITION_H
][0],
2365 PARTITION_H
, node
->h
[0]))
2369 assert(have_v_split
);
2371 if (t
->frame_thread
.pass
== 2) {
2372 const Av1Block
*const b
= &f
->frame_thread
.b
[t
->by
* f
->b4_stride
+ t
->bx
];
2373 is_split
= b
->bl
!= bl
;
2375 is_split
= dav1d_msac_decode_bool(&ts
->msac
,
2376 gather_left_partition_prob(pc
, bl
));
2377 if (f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I422
&& !is_split
)
2379 if (DEBUG_BLOCK_INFO
)
2380 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
2381 f
->frame_hdr
->frame_offset
, t
->by
, t
->bx
, bl
, ctx
,
2382 is_split
? PARTITION_SPLIT
: PARTITION_V
, ts
->msac
.rng
);
2385 assert(bl
< BL_8X8
);
2387 bp
= PARTITION_SPLIT
;
2388 if (decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 0))) return 1;
2390 if (decode_sb(t
, bl
+ 1, INTRA_EDGE_SPLIT(node
, 2))) return 1;
2394 if (decode_b(t
, bl
, dav1d_block_sizes
[bl
][PARTITION_V
][0],
2395 PARTITION_V
, node
->v
[0]))
2400 if (t
->frame_thread
.pass
!= 2 && (bp
!= PARTITION_SPLIT
|| bl
== BL_8X8
)) {
2401 #define set_ctx(type, dir, diridx, off, mul, rep_macro) \
2402 rep_macro(type, t->a->partition, bx8, mul * dav1d_al_part_ctx[0][bl][bp]); \
2403 rep_macro(type, t->l.partition, by8, mul * dav1d_al_part_ctx[1][bl][bp])
2404 case_set_upto16(hsz
,,,);
2411 static void reset_context(BlockContext
*const ctx
, const int keyframe
, const int pass
) {
2412 memset(ctx
->intra
, keyframe
, sizeof(ctx
->intra
));
2413 memset(ctx
->uvmode
, DC_PRED
, sizeof(ctx
->uvmode
));
2415 memset(ctx
->mode
, DC_PRED
, sizeof(ctx
->mode
));
2417 if (pass
== 2) return;
2419 memset(ctx
->partition
, 0, sizeof(ctx
->partition
));
2420 memset(ctx
->skip
, 0, sizeof(ctx
->skip
));
2421 memset(ctx
->skip_mode
, 0, sizeof(ctx
->skip_mode
));
2422 memset(ctx
->tx_lpf_y
, 2, sizeof(ctx
->tx_lpf_y
));
2423 memset(ctx
->tx_lpf_uv
, 1, sizeof(ctx
->tx_lpf_uv
));
2424 memset(ctx
->tx_intra
, -1, sizeof(ctx
->tx_intra
));
2425 memset(ctx
->tx
, TX_64X64
, sizeof(ctx
->tx
));
2427 memset(ctx
->ref
, -1, sizeof(ctx
->ref
));
2428 memset(ctx
->comp_type
, 0, sizeof(ctx
->comp_type
));
2429 memset(ctx
->mode
, NEARESTMV
, sizeof(ctx
->mode
));
2431 memset(ctx
->lcoef
, 0x40, sizeof(ctx
->lcoef
));
2432 memset(ctx
->ccoef
, 0x40, sizeof(ctx
->ccoef
));
2433 memset(ctx
->filter
, DAV1D_N_SWITCHABLE_FILTERS
, sizeof(ctx
->filter
));
2434 memset(ctx
->seg_pred
, 0, sizeof(ctx
->seg_pred
));
2435 memset(ctx
->pal_sz
, 0, sizeof(ctx
->pal_sz
));
2438 // { Y+U+V, Y+U } * 4
2439 static const uint8_t ss_size_mul
[4][2] = {
2440 [DAV1D_PIXEL_LAYOUT_I400
] = { 4, 4 },
2441 [DAV1D_PIXEL_LAYOUT_I420
] = { 6, 5 },
2442 [DAV1D_PIXEL_LAYOUT_I422
] = { 8, 6 },
2443 [DAV1D_PIXEL_LAYOUT_I444
] = { 12, 8 },
2446 static void setup_tile(Dav1dTileState
*const ts
,
2447 const Dav1dFrameContext
*const f
,
2448 const uint8_t *const data
, const size_t sz
,
2449 const int tile_row
, const int tile_col
,
2450 const unsigned tile_start_off
)
2452 const int col_sb_start
= f
->frame_hdr
->tiling
.col_start_sb
[tile_col
];
2453 const int col_sb128_start
= col_sb_start
>> !f
->seq_hdr
->sb128
;
2454 const int col_sb_end
= f
->frame_hdr
->tiling
.col_start_sb
[tile_col
+ 1];
2455 const int row_sb_start
= f
->frame_hdr
->tiling
.row_start_sb
[tile_row
];
2456 const int row_sb_end
= f
->frame_hdr
->tiling
.row_start_sb
[tile_row
+ 1];
2457 const int sb_shift
= f
->sb_shift
;
2459 const uint8_t *const size_mul
= ss_size_mul
[f
->cur
.p
.layout
];
2460 for (int p
= 0; p
< 2; p
++) {
2461 ts
->frame_thread
[p
].pal_idx
= f
->frame_thread
.pal_idx
?
2462 &f
->frame_thread
.pal_idx
[(size_t)tile_start_off
* size_mul
[1] / 8] :
2464 ts
->frame_thread
[p
].cbi
= f
->frame_thread
.cbi
?
2465 &f
->frame_thread
.cbi
[(size_t)tile_start_off
* size_mul
[0] / 64] :
2467 ts
->frame_thread
[p
].cf
= f
->frame_thread
.cf
?
2468 (uint8_t*)f
->frame_thread
.cf
+
2469 (((size_t)tile_start_off
* size_mul
[0]) >> !f
->seq_hdr
->hbd
) :
2473 dav1d_cdf_thread_copy(&ts
->cdf
, &f
->in_cdf
);
2474 ts
->last_qidx
= f
->frame_hdr
->quant
.yac
;
2475 ts
->last_delta_lf
.u32
= 0;
2477 dav1d_msac_init(&ts
->msac
, data
, sz
, f
->frame_hdr
->disable_cdf_update
);
2479 ts
->tiling
.row
= tile_row
;
2480 ts
->tiling
.col
= tile_col
;
2481 ts
->tiling
.col_start
= col_sb_start
<< sb_shift
;
2482 ts
->tiling
.col_end
= imin(col_sb_end
<< sb_shift
, f
->bw
);
2483 ts
->tiling
.row_start
= row_sb_start
<< sb_shift
;
2484 ts
->tiling
.row_end
= imin(row_sb_end
<< sb_shift
, f
->bh
);
2486 // Reference Restoration Unit (used for exp coding)
2487 int sb_idx
, unit_idx
;
2488 if (f
->frame_hdr
->width
[0] != f
->frame_hdr
->width
[1]) {
2489 // vertical components only
2490 sb_idx
= (ts
->tiling
.row_start
>> 5) * f
->sr_sb128w
;
2491 unit_idx
= (ts
->tiling
.row_start
& 16) >> 3;
2493 sb_idx
= (ts
->tiling
.row_start
>> 5) * f
->sb128w
+ col_sb128_start
;
2494 unit_idx
= ((ts
->tiling
.row_start
& 16) >> 3) +
2495 ((ts
->tiling
.col_start
& 16) >> 4);
2497 for (int p
= 0; p
< 3; p
++) {
2498 if (!((f
->lf
.restore_planes
>> p
) & 1U))
2501 if (f
->frame_hdr
->width
[0] != f
->frame_hdr
->width
[1]) {
2502 const int ss_hor
= p
&& f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I444
;
2503 const int d
= f
->frame_hdr
->super_res
.width_scale_denominator
;
2504 const int unit_size_log2
= f
->frame_hdr
->restoration
.unit_size
[!!p
];
2505 const int rnd
= (8 << unit_size_log2
) - 1, shift
= unit_size_log2
+ 3;
2506 const int x
= ((4 * ts
->tiling
.col_start
* d
>> ss_hor
) + rnd
) >> shift
;
2507 const int px_x
= x
<< (unit_size_log2
+ ss_hor
);
2508 const int u_idx
= unit_idx
+ ((px_x
& 64) >> 6);
2509 const int sb128x
= px_x
>> 7;
2510 if (sb128x
>= f
->sr_sb128w
) continue;
2511 ts
->lr_ref
[p
] = &f
->lf
.lr_mask
[sb_idx
+ sb128x
].lr
[p
][u_idx
];
2513 ts
->lr_ref
[p
] = &f
->lf
.lr_mask
[sb_idx
].lr
[p
][unit_idx
];
2516 ts
->lr_ref
[p
]->filter_v
[0] = 3;
2517 ts
->lr_ref
[p
]->filter_v
[1] = -7;
2518 ts
->lr_ref
[p
]->filter_v
[2] = 15;
2519 ts
->lr_ref
[p
]->filter_h
[0] = 3;
2520 ts
->lr_ref
[p
]->filter_h
[1] = -7;
2521 ts
->lr_ref
[p
]->filter_h
[2] = 15;
2522 ts
->lr_ref
[p
]->sgr_weights
[0] = -32;
2523 ts
->lr_ref
[p
]->sgr_weights
[1] = 31;
2526 if (f
->c
->n_tc
> 1) {
2527 for (int p
= 0; p
< 2; p
++)
2528 atomic_init(&ts
->progress
[p
], row_sb_start
);
2532 static void read_restoration_info(Dav1dTaskContext
*const t
,
2533 Av1RestorationUnit
*const lr
, const int p
,
2534 const enum Dav1dRestorationType frame_type
)
2536 const Dav1dFrameContext
*const f
= t
->f
;
2537 Dav1dTileState
*const ts
= t
->ts
;
2539 if (frame_type
== DAV1D_RESTORATION_SWITCHABLE
) {
2540 const int filter
= dav1d_msac_decode_symbol_adapt4(&ts
->msac
,
2541 ts
->cdf
.m
.restore_switchable
, 2);
2542 lr
->type
= filter
+ !!filter
; /* NONE/WIENER/SGRPROJ */
2544 const unsigned type
=
2545 dav1d_msac_decode_bool_adapt(&ts
->msac
,
2546 frame_type
== DAV1D_RESTORATION_WIENER
?
2547 ts
->cdf
.m
.restore_wiener
: ts
->cdf
.m
.restore_sgrproj
);
2548 lr
->type
= type
? frame_type
: DAV1D_RESTORATION_NONE
;
2551 if (lr
->type
== DAV1D_RESTORATION_WIENER
) {
2552 lr
->filter_v
[0] = p
? 0 :
2553 dav1d_msac_decode_subexp(&ts
->msac
,
2554 ts
->lr_ref
[p
]->filter_v
[0] + 5, 16, 1) - 5;
2556 dav1d_msac_decode_subexp(&ts
->msac
,
2557 ts
->lr_ref
[p
]->filter_v
[1] + 23, 32, 2) - 23;
2559 dav1d_msac_decode_subexp(&ts
->msac
,
2560 ts
->lr_ref
[p
]->filter_v
[2] + 17, 64, 3) - 17;
2562 lr
->filter_h
[0] = p
? 0 :
2563 dav1d_msac_decode_subexp(&ts
->msac
,
2564 ts
->lr_ref
[p
]->filter_h
[0] + 5, 16, 1) - 5;
2566 dav1d_msac_decode_subexp(&ts
->msac
,
2567 ts
->lr_ref
[p
]->filter_h
[1] + 23, 32, 2) - 23;
2569 dav1d_msac_decode_subexp(&ts
->msac
,
2570 ts
->lr_ref
[p
]->filter_h
[2] + 17, 64, 3) - 17;
2571 memcpy(lr
->sgr_weights
, ts
->lr_ref
[p
]->sgr_weights
, sizeof(lr
->sgr_weights
));
2573 if (DEBUG_BLOCK_INFO
)
2574 printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
2575 p
, lr
->filter_v
[0], lr
->filter_v
[1],
2576 lr
->filter_v
[2], lr
->filter_h
[0],
2577 lr
->filter_h
[1], lr
->filter_h
[2], ts
->msac
.rng
);
2578 } else if (lr
->type
== DAV1D_RESTORATION_SGRPROJ
) {
2579 const unsigned idx
= dav1d_msac_decode_bools(&ts
->msac
, 4);
2580 const uint16_t *const sgr_params
= dav1d_sgr_params
[idx
];
2582 lr
->sgr_weights
[0] = sgr_params
[0] ? dav1d_msac_decode_subexp(&ts
->msac
,
2583 ts
->lr_ref
[p
]->sgr_weights
[0] + 96, 128, 4) - 96 : 0;
2584 lr
->sgr_weights
[1] = sgr_params
[1] ? dav1d_msac_decode_subexp(&ts
->msac
,
2585 ts
->lr_ref
[p
]->sgr_weights
[1] + 32, 128, 4) - 32 : 95;
2586 memcpy(lr
->filter_v
, ts
->lr_ref
[p
]->filter_v
, sizeof(lr
->filter_v
));
2587 memcpy(lr
->filter_h
, ts
->lr_ref
[p
]->filter_h
, sizeof(lr
->filter_h
));
2589 if (DEBUG_BLOCK_INFO
)
2590 printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
2591 p
, idx
, lr
->sgr_weights
[0],
2592 lr
->sgr_weights
[1], ts
->msac
.rng
);
2596 // modeled after the equivalent function in aomdec:decodeframe.c
2597 static int check_trailing_bits_after_symbol_coder(const MsacContext
*const msac
) {
2598 // check marker bit (single 1), followed by zeroes
2599 const int n_bits
= -(msac
->cnt
+ 14);
2600 assert(n_bits
<= 0); // this assumes we errored out when cnt <= -15 in caller
2601 const int n_bytes
= (n_bits
+ 7) >> 3;
2602 const uint8_t *p
= &msac
->buf_pos
[n_bytes
];
2603 const int pattern
= 128 >> ((n_bits
- 1) & 7);
2604 if ((p
[-1] & (2 * pattern
- 1)) != pattern
)
2607 // check remainder zero bytes
2608 for (; p
< msac
->buf_end
; p
++)
2615 int dav1d_decode_tile_sbrow(Dav1dTaskContext
*const t
) {
2616 const Dav1dFrameContext
*const f
= t
->f
;
2617 const enum BlockLevel root_bl
= f
->seq_hdr
->sb128
? BL_128X128
: BL_64X64
;
2618 Dav1dTileState
*const ts
= t
->ts
;
2619 const Dav1dContext
*const c
= f
->c
;
2620 const int sb_step
= f
->sb_step
;
2621 const int tile_row
= ts
->tiling
.row
, tile_col
= ts
->tiling
.col
;
2622 const int col_sb_start
= f
->frame_hdr
->tiling
.col_start_sb
[tile_col
];
2623 const int col_sb128_start
= col_sb_start
>> !f
->seq_hdr
->sb128
;
2625 if (IS_INTER_OR_SWITCH(f
->frame_hdr
) || f
->frame_hdr
->allow_intrabc
) {
2626 dav1d_refmvs_tile_sbrow_init(&t
->rt
, &f
->rf
, ts
->tiling
.col_start
,
2627 ts
->tiling
.col_end
, ts
->tiling
.row_start
,
2628 ts
->tiling
.row_end
, t
->by
>> f
->sb_shift
,
2629 ts
->tiling
.row
, t
->frame_thread
.pass
);
2632 if (IS_INTER_OR_SWITCH(f
->frame_hdr
) && c
->n_fc
> 1) {
2633 const int sby
= (t
->by
- ts
->tiling
.row_start
) >> f
->sb_shift
;
2634 int (*const lowest_px
)[2] = ts
->lowest_pixel
[sby
];
2635 for (int n
= 0; n
< 7; n
++)
2636 for (int m
= 0; m
< 2; m
++)
2637 lowest_px
[n
][m
] = INT_MIN
;
2640 reset_context(&t
->l
, IS_KEY_OR_INTRA(f
->frame_hdr
), t
->frame_thread
.pass
);
2641 if (t
->frame_thread
.pass
== 2) {
2642 const int off_2pass
= c
->n_tc
> 1 ? f
->sb128w
* f
->frame_hdr
->tiling
.rows
: 0;
2643 for (t
->bx
= ts
->tiling
.col_start
,
2644 t
->a
= f
->a
+ off_2pass
+ col_sb128_start
+ tile_row
* f
->sb128w
;
2645 t
->bx
< ts
->tiling
.col_end
; t
->bx
+= sb_step
)
2647 if (atomic_load_explicit(c
->flush
, memory_order_acquire
))
2649 if (decode_sb(t
, root_bl
, dav1d_intra_edge_tree
[root_bl
]))
2651 if (t
->bx
& 16 || f
->seq_hdr
->sb128
)
2654 f
->bd_fn
.backup_ipred_edge(t
);
2658 if (f
->c
->n_tc
> 1 && f
->frame_hdr
->use_ref_frame_mvs
) {
2659 f
->c
->refmvs_dsp
.load_tmvs(&f
->rf
, ts
->tiling
.row
,
2660 ts
->tiling
.col_start
>> 1, ts
->tiling
.col_end
>> 1,
2661 t
->by
>> 1, (t
->by
+ sb_step
) >> 1);
2663 memset(t
->pal_sz_uv
[1], 0, sizeof(*t
->pal_sz_uv
));
2664 const int sb128y
= t
->by
>> 5;
2665 for (t
->bx
= ts
->tiling
.col_start
, t
->a
= f
->a
+ col_sb128_start
+ tile_row
* f
->sb128w
,
2666 t
->lf_mask
= f
->lf
.mask
+ sb128y
* f
->sb128w
+ col_sb128_start
;
2667 t
->bx
< ts
->tiling
.col_end
; t
->bx
+= sb_step
)
2669 if (atomic_load_explicit(c
->flush
, memory_order_acquire
))
2671 if (root_bl
== BL_128X128
) {
2672 t
->cur_sb_cdef_idx_ptr
= t
->lf_mask
->cdef_idx
;
2673 t
->cur_sb_cdef_idx_ptr
[0] = -1;
2674 t
->cur_sb_cdef_idx_ptr
[1] = -1;
2675 t
->cur_sb_cdef_idx_ptr
[2] = -1;
2676 t
->cur_sb_cdef_idx_ptr
[3] = -1;
2678 t
->cur_sb_cdef_idx_ptr
=
2679 &t
->lf_mask
->cdef_idx
[((t
->bx
& 16) >> 4) +
2680 ((t
->by
& 16) >> 3)];
2681 t
->cur_sb_cdef_idx_ptr
[0] = -1;
2683 // Restoration filter
2684 for (int p
= 0; p
< 3; p
++) {
2685 if (!((f
->lf
.restore_planes
>> p
) & 1U))
2688 const int ss_ver
= p
&& f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I420
;
2689 const int ss_hor
= p
&& f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I444
;
2690 const int unit_size_log2
= f
->frame_hdr
->restoration
.unit_size
[!!p
];
2691 const int y
= t
->by
* 4 >> ss_ver
;
2692 const int h
= (f
->cur
.p
.h
+ ss_ver
) >> ss_ver
;
2694 const int unit_size
= 1 << unit_size_log2
;
2695 const unsigned mask
= unit_size
- 1;
2696 if (y
& mask
) continue;
2697 const int half_unit
= unit_size
>> 1;
2698 // Round half up at frame boundaries, if there's more than one
2700 if (y
&& y
+ half_unit
> h
) continue;
2702 const enum Dav1dRestorationType frame_type
= f
->frame_hdr
->restoration
.type
[p
];
2704 if (f
->frame_hdr
->width
[0] != f
->frame_hdr
->width
[1]) {
2705 const int w
= (f
->sr_cur
.p
.p
.w
+ ss_hor
) >> ss_hor
;
2706 const int n_units
= imax(1, (w
+ half_unit
) >> unit_size_log2
);
2708 const int d
= f
->frame_hdr
->super_res
.width_scale_denominator
;
2709 const int rnd
= unit_size
* 8 - 1, shift
= unit_size_log2
+ 3;
2710 const int x0
= ((4 * t
->bx
* d
>> ss_hor
) + rnd
) >> shift
;
2711 const int x1
= ((4 * (t
->bx
+ sb_step
) * d
>> ss_hor
) + rnd
) >> shift
;
2713 for (int x
= x0
; x
< imin(x1
, n_units
); x
++) {
2714 const int px_x
= x
<< (unit_size_log2
+ ss_hor
);
2715 const int sb_idx
= (t
->by
>> 5) * f
->sr_sb128w
+ (px_x
>> 7);
2716 const int unit_idx
= ((t
->by
& 16) >> 3) + ((px_x
& 64) >> 6);
2717 Av1RestorationUnit
*const lr
= &f
->lf
.lr_mask
[sb_idx
].lr
[p
][unit_idx
];
2719 read_restoration_info(t
, lr
, p
, frame_type
);
2722 const int x
= 4 * t
->bx
>> ss_hor
;
2723 if (x
& mask
) continue;
2724 const int w
= (f
->cur
.p
.w
+ ss_hor
) >> ss_hor
;
2725 // Round half up at frame boundaries, if there's more than one
2727 if (x
&& x
+ half_unit
> w
) continue;
2728 const int sb_idx
= (t
->by
>> 5) * f
->sr_sb128w
+ (t
->bx
>> 5);
2729 const int unit_idx
= ((t
->by
& 16) >> 3) + ((t
->bx
& 16) >> 4);
2730 Av1RestorationUnit
*const lr
= &f
->lf
.lr_mask
[sb_idx
].lr
[p
][unit_idx
];
2732 read_restoration_info(t
, lr
, p
, frame_type
);
2735 if (decode_sb(t
, root_bl
, dav1d_intra_edge_tree
[root_bl
]))
2737 if (t
->bx
& 16 || f
->seq_hdr
->sb128
) {
2743 if (f
->seq_hdr
->ref_frame_mvs
&& f
->c
->n_tc
> 1 && IS_INTER_OR_SWITCH(f
->frame_hdr
)) {
2744 dav1d_refmvs_save_tmvs(&f
->c
->refmvs_dsp
, &t
->rt
,
2745 ts
->tiling
.col_start
>> 1, ts
->tiling
.col_end
>> 1,
2746 t
->by
>> 1, (t
->by
+ sb_step
) >> 1);
2749 // backup pre-loopfilter pixels for intra prediction of the next sbrow
2750 if (t
->frame_thread
.pass
!= 1)
2751 f
->bd_fn
.backup_ipred_edge(t
);
2753 // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix"
2754 // up the initial value in neighbour tiles when running the loopfilter
2755 int align_h
= (f
->bh
+ 31) & ~31;
2756 memcpy(&f
->lf
.tx_lpf_right_edge
[0][align_h
* tile_col
+ t
->by
],
2757 &t
->l
.tx_lpf_y
[t
->by
& 16], sb_step
);
2758 const int ss_ver
= f
->cur
.p
.layout
== DAV1D_PIXEL_LAYOUT_I420
;
2760 memcpy(&f
->lf
.tx_lpf_right_edge
[1][align_h
* tile_col
+ (t
->by
>> ss_ver
)],
2761 &t
->l
.tx_lpf_uv
[(t
->by
& 16) >> ss_ver
], sb_step
>> ss_ver
);
2763 // error out on symbol decoder overread
2764 if (ts
->msac
.cnt
<= -15) return 1;
2766 return c
->strict_std_compliance
&&
2767 (t
->by
>> f
->sb_shift
) + 1 >= f
->frame_hdr
->tiling
.row_start_sb
[tile_row
+ 1] &&
2768 check_trailing_bits_after_symbol_coder(&ts
->msac
);
2771 int dav1d_decode_frame_init(Dav1dFrameContext
*const f
) {
2772 const Dav1dContext
*const c
= f
->c
;
2773 int retval
= DAV1D_ERR(ENOMEM
);
2775 if (f
->sbh
> f
->lf
.start_of_tile_row_sz
) {
2776 dav1d_free(f
->lf
.start_of_tile_row
);
2777 f
->lf
.start_of_tile_row
= dav1d_malloc(ALLOC_TILE
, f
->sbh
* sizeof(uint8_t));
2778 if (!f
->lf
.start_of_tile_row
) {
2779 f
->lf
.start_of_tile_row_sz
= 0;
2782 f
->lf
.start_of_tile_row_sz
= f
->sbh
;
2785 for (int tile_row
= 0; tile_row
< f
->frame_hdr
->tiling
.rows
; tile_row
++) {
2786 f
->lf
.start_of_tile_row
[sby
++] = tile_row
;
2787 while (sby
< f
->frame_hdr
->tiling
.row_start_sb
[tile_row
+ 1])
2788 f
->lf
.start_of_tile_row
[sby
++] = 0;
2791 const int n_ts
= f
->frame_hdr
->tiling
.cols
* f
->frame_hdr
->tiling
.rows
;
2792 if (n_ts
!= f
->n_ts
) {
2794 dav1d_free(f
->frame_thread
.tile_start_off
);
2795 f
->frame_thread
.tile_start_off
=
2796 dav1d_malloc(ALLOC_TILE
, sizeof(*f
->frame_thread
.tile_start_off
) * n_ts
);
2797 if (!f
->frame_thread
.tile_start_off
) {
2802 dav1d_free_aligned(f
->ts
);
2803 f
->ts
= dav1d_alloc_aligned(ALLOC_TILE
, sizeof(*f
->ts
) * n_ts
, 32);
2804 if (!f
->ts
) goto error
;
2808 const int a_sz
= f
->sb128w
* f
->frame_hdr
->tiling
.rows
* (1 + (c
->n_fc
> 1 && c
->n_tc
> 1));
2809 if (a_sz
!= f
->a_sz
) {
2811 f
->a
= dav1d_malloc(ALLOC_TILE
, sizeof(*f
->a
) * a_sz
);
2819 const int num_sb128
= f
->sb128w
* f
->sb128h
;
2820 const uint8_t *const size_mul
= ss_size_mul
[f
->cur
.p
.layout
];
2821 const int hbd
= !!f
->seq_hdr
->hbd
;
2823 const unsigned sb_step4
= f
->sb_step
* 4;
2825 for (int tile_row
= 0; tile_row
< f
->frame_hdr
->tiling
.rows
; tile_row
++) {
2826 const unsigned row_off
= f
->frame_hdr
->tiling
.row_start_sb
[tile_row
] *
2827 sb_step4
* f
->sb128w
* 128;
2828 const unsigned b_diff
= (f
->frame_hdr
->tiling
.row_start_sb
[tile_row
+ 1] -
2829 f
->frame_hdr
->tiling
.row_start_sb
[tile_row
]) * sb_step4
;
2830 for (int tile_col
= 0; tile_col
< f
->frame_hdr
->tiling
.cols
; tile_col
++) {
2831 f
->frame_thread
.tile_start_off
[tile_idx
++] = row_off
+ b_diff
*
2832 f
->frame_hdr
->tiling
.col_start_sb
[tile_col
] * sb_step4
;
2836 const int lowest_pixel_mem_sz
= f
->frame_hdr
->tiling
.cols
* f
->sbh
;
2837 if (lowest_pixel_mem_sz
!= f
->tile_thread
.lowest_pixel_mem_sz
) {
2838 dav1d_free(f
->tile_thread
.lowest_pixel_mem
);
2839 f
->tile_thread
.lowest_pixel_mem
=
2840 dav1d_malloc(ALLOC_TILE
, lowest_pixel_mem_sz
*
2841 sizeof(*f
->tile_thread
.lowest_pixel_mem
));
2842 if (!f
->tile_thread
.lowest_pixel_mem
) {
2843 f
->tile_thread
.lowest_pixel_mem_sz
= 0;
2846 f
->tile_thread
.lowest_pixel_mem_sz
= lowest_pixel_mem_sz
;
2848 int (*lowest_pixel_ptr
)[7][2] = f
->tile_thread
.lowest_pixel_mem
;
2849 for (int tile_row
= 0, tile_row_base
= 0; tile_row
< f
->frame_hdr
->tiling
.rows
;
2850 tile_row
++, tile_row_base
+= f
->frame_hdr
->tiling
.cols
)
2852 const int tile_row_sb_h
= f
->frame_hdr
->tiling
.row_start_sb
[tile_row
+ 1] -
2853 f
->frame_hdr
->tiling
.row_start_sb
[tile_row
];
2854 for (int tile_col
= 0; tile_col
< f
->frame_hdr
->tiling
.cols
; tile_col
++) {
2855 f
->ts
[tile_row_base
+ tile_col
].lowest_pixel
= lowest_pixel_ptr
;
2856 lowest_pixel_ptr
+= tile_row_sb_h
;
2860 const int cbi_sz
= num_sb128
* size_mul
[0];
2861 if (cbi_sz
!= f
->frame_thread
.cbi_sz
) {
2862 dav1d_free_aligned(f
->frame_thread
.cbi
);
2863 f
->frame_thread
.cbi
=
2864 dav1d_alloc_aligned(ALLOC_BLOCK
, sizeof(*f
->frame_thread
.cbi
) *
2865 cbi_sz
* 32 * 32 / 4, 64);
2866 if (!f
->frame_thread
.cbi
) {
2867 f
->frame_thread
.cbi_sz
= 0;
2870 f
->frame_thread
.cbi_sz
= cbi_sz
;
2873 const int cf_sz
= (num_sb128
* size_mul
[0]) << hbd
;
2874 if (cf_sz
!= f
->frame_thread
.cf_sz
) {
2875 dav1d_free_aligned(f
->frame_thread
.cf
);
2876 f
->frame_thread
.cf
=
2877 dav1d_alloc_aligned(ALLOC_COEF
, (size_t)cf_sz
* 128 * 128 / 2, 64);
2878 if (!f
->frame_thread
.cf
) {
2879 f
->frame_thread
.cf_sz
= 0;
2882 memset(f
->frame_thread
.cf
, 0, (size_t)cf_sz
* 128 * 128 / 2);
2883 f
->frame_thread
.cf_sz
= cf_sz
;
2886 if (f
->frame_hdr
->allow_screen_content_tools
) {
2887 const int pal_sz
= num_sb128
<< hbd
;
2888 if (pal_sz
!= f
->frame_thread
.pal_sz
) {
2889 dav1d_free_aligned(f
->frame_thread
.pal
);
2890 f
->frame_thread
.pal
=
2891 dav1d_alloc_aligned(ALLOC_PAL
, sizeof(*f
->frame_thread
.pal
) *
2892 pal_sz
* 16 * 16, 64);
2893 if (!f
->frame_thread
.pal
) {
2894 f
->frame_thread
.pal_sz
= 0;
2897 f
->frame_thread
.pal_sz
= pal_sz
;
2900 const int pal_idx_sz
= num_sb128
* size_mul
[1];
2901 if (pal_idx_sz
!= f
->frame_thread
.pal_idx_sz
) {
2902 dav1d_free_aligned(f
->frame_thread
.pal_idx
);
2903 f
->frame_thread
.pal_idx
=
2904 dav1d_alloc_aligned(ALLOC_PAL
, sizeof(*f
->frame_thread
.pal_idx
) *
2905 pal_idx_sz
* 128 * 128 / 8, 64);
2906 if (!f
->frame_thread
.pal_idx
) {
2907 f
->frame_thread
.pal_idx_sz
= 0;
2910 f
->frame_thread
.pal_idx_sz
= pal_idx_sz
;
2912 } else if (f
->frame_thread
.pal
) {
2913 dav1d_freep_aligned(&f
->frame_thread
.pal
);
2914 dav1d_freep_aligned(&f
->frame_thread
.pal_idx
);
2915 f
->frame_thread
.pal_sz
= f
->frame_thread
.pal_idx_sz
= 0;
2919 // update allocation of block contexts for above
2920 ptrdiff_t y_stride
= f
->cur
.stride
[0], uv_stride
= f
->cur
.stride
[1];
2921 const int has_resize
= f
->frame_hdr
->width
[0] != f
->frame_hdr
->width
[1];
2922 const int need_cdef_lpf_copy
= c
->n_tc
> 1 && has_resize
;
2923 if (y_stride
* f
->sbh
* 4 != f
->lf
.cdef_buf_plane_sz
[0] ||
2924 uv_stride
* f
->sbh
* 8 != f
->lf
.cdef_buf_plane_sz
[1] ||
2925 need_cdef_lpf_copy
!= f
->lf
.need_cdef_lpf_copy
||
2926 f
->sbh
!= f
->lf
.cdef_buf_sbh
)
2928 dav1d_free_aligned(f
->lf
.cdef_line_buf
);
2929 size_t alloc_sz
= 64;
2930 alloc_sz
+= (size_t)llabs(y_stride
) * 4 * f
->sbh
<< need_cdef_lpf_copy
;
2931 alloc_sz
+= (size_t)llabs(uv_stride
) * 8 * f
->sbh
<< need_cdef_lpf_copy
;
2932 uint8_t *ptr
= f
->lf
.cdef_line_buf
= dav1d_alloc_aligned(ALLOC_CDEF
, alloc_sz
, 32);
2934 f
->lf
.cdef_buf_plane_sz
[0] = f
->lf
.cdef_buf_plane_sz
[1] = 0;
2940 f
->lf
.cdef_line
[0][0] = ptr
- y_stride
* (f
->sbh
* 4 - 1);
2941 f
->lf
.cdef_line
[1][0] = ptr
- y_stride
* (f
->sbh
* 4 - 3);
2943 f
->lf
.cdef_line
[0][0] = ptr
+ y_stride
* 0;
2944 f
->lf
.cdef_line
[1][0] = ptr
+ y_stride
* 2;
2946 ptr
+= llabs(y_stride
) * f
->sbh
* 4;
2947 if (uv_stride
< 0) {
2948 f
->lf
.cdef_line
[0][1] = ptr
- uv_stride
* (f
->sbh
* 8 - 1);
2949 f
->lf
.cdef_line
[0][2] = ptr
- uv_stride
* (f
->sbh
* 8 - 3);
2950 f
->lf
.cdef_line
[1][1] = ptr
- uv_stride
* (f
->sbh
* 8 - 5);
2951 f
->lf
.cdef_line
[1][2] = ptr
- uv_stride
* (f
->sbh
* 8 - 7);
2953 f
->lf
.cdef_line
[0][1] = ptr
+ uv_stride
* 0;
2954 f
->lf
.cdef_line
[0][2] = ptr
+ uv_stride
* 2;
2955 f
->lf
.cdef_line
[1][1] = ptr
+ uv_stride
* 4;
2956 f
->lf
.cdef_line
[1][2] = ptr
+ uv_stride
* 6;
2959 if (need_cdef_lpf_copy
) {
2960 ptr
+= llabs(uv_stride
) * f
->sbh
* 8;
2962 f
->lf
.cdef_lpf_line
[0] = ptr
- y_stride
* (f
->sbh
* 4 - 1);
2964 f
->lf
.cdef_lpf_line
[0] = ptr
;
2965 ptr
+= llabs(y_stride
) * f
->sbh
* 4;
2966 if (uv_stride
< 0) {
2967 f
->lf
.cdef_lpf_line
[1] = ptr
- uv_stride
* (f
->sbh
* 4 - 1);
2968 f
->lf
.cdef_lpf_line
[2] = ptr
- uv_stride
* (f
->sbh
* 8 - 1);
2970 f
->lf
.cdef_lpf_line
[1] = ptr
;
2971 f
->lf
.cdef_lpf_line
[2] = ptr
+ uv_stride
* f
->sbh
* 4;
2975 f
->lf
.cdef_buf_plane_sz
[0] = (int) y_stride
* f
->sbh
* 4;
2976 f
->lf
.cdef_buf_plane_sz
[1] = (int) uv_stride
* f
->sbh
* 8;
2977 f
->lf
.need_cdef_lpf_copy
= need_cdef_lpf_copy
;
2978 f
->lf
.cdef_buf_sbh
= f
->sbh
;
2981 const int sb128
= f
->seq_hdr
->sb128
;
2982 const int num_lines
= c
->n_tc
> 1 ? f
->sbh
* 4 << sb128
: 12;
2983 y_stride
= f
->sr_cur
.p
.stride
[0], uv_stride
= f
->sr_cur
.p
.stride
[1];
2984 if (y_stride
* num_lines
!= f
->lf
.lr_buf_plane_sz
[0] ||
2985 uv_stride
* num_lines
* 2 != f
->lf
.lr_buf_plane_sz
[1])
2987 dav1d_free_aligned(f
->lf
.lr_line_buf
);
2988 // lr simd may overread the input, so slightly over-allocate the lpf buffer
2989 size_t alloc_sz
= 128;
2990 alloc_sz
+= (size_t)llabs(y_stride
) * num_lines
;
2991 alloc_sz
+= (size_t)llabs(uv_stride
) * num_lines
* 2;
2992 uint8_t *ptr
= f
->lf
.lr_line_buf
= dav1d_alloc_aligned(ALLOC_LR
, alloc_sz
, 64);
2994 f
->lf
.lr_buf_plane_sz
[0] = f
->lf
.lr_buf_plane_sz
[1] = 0;
3000 f
->lf
.lr_lpf_line
[0] = ptr
- y_stride
* (num_lines
- 1);
3002 f
->lf
.lr_lpf_line
[0] = ptr
;
3003 ptr
+= llabs(y_stride
) * num_lines
;
3004 if (uv_stride
< 0) {
3005 f
->lf
.lr_lpf_line
[1] = ptr
- uv_stride
* (num_lines
* 1 - 1);
3006 f
->lf
.lr_lpf_line
[2] = ptr
- uv_stride
* (num_lines
* 2 - 1);
3008 f
->lf
.lr_lpf_line
[1] = ptr
;
3009 f
->lf
.lr_lpf_line
[2] = ptr
+ uv_stride
* num_lines
;
3012 f
->lf
.lr_buf_plane_sz
[0] = (int) y_stride
* num_lines
;
3013 f
->lf
.lr_buf_plane_sz
[1] = (int) uv_stride
* num_lines
* 2;
3016 // update allocation for loopfilter masks
3017 if (num_sb128
!= f
->lf
.mask_sz
) {
3018 dav1d_free(f
->lf
.mask
);
3019 dav1d_free(f
->lf
.level
);
3020 f
->lf
.mask
= dav1d_malloc(ALLOC_LF
, sizeof(*f
->lf
.mask
) * num_sb128
);
3021 // over-allocate by 3 bytes since some of the SIMD implementations
3022 // index this from the level type and can thus over-read by up to 3
3023 f
->lf
.level
= dav1d_malloc(ALLOC_LF
, sizeof(*f
->lf
.level
) * num_sb128
* 32 * 32 + 3);
3024 if (!f
->lf
.mask
|| !f
->lf
.level
) {
3029 dav1d_free(f
->frame_thread
.b
);
3030 f
->frame_thread
.b
= dav1d_malloc(ALLOC_BLOCK
, sizeof(*f
->frame_thread
.b
) *
3031 num_sb128
* 32 * 32);
3032 if (!f
->frame_thread
.b
) {
3037 f
->lf
.mask_sz
= num_sb128
;
3040 f
->sr_sb128w
= (f
->sr_cur
.p
.p
.w
+ 127) >> 7;
3041 const int lr_mask_sz
= f
->sr_sb128w
* f
->sb128h
;
3042 if (lr_mask_sz
!= f
->lf
.lr_mask_sz
) {
3043 dav1d_free(f
->lf
.lr_mask
);
3044 f
->lf
.lr_mask
= dav1d_malloc(ALLOC_LR
, sizeof(*f
->lf
.lr_mask
) * lr_mask_sz
);
3045 if (!f
->lf
.lr_mask
) {
3046 f
->lf
.lr_mask_sz
= 0;
3049 f
->lf
.lr_mask_sz
= lr_mask_sz
;
3051 f
->lf
.restore_planes
=
3052 ((f
->frame_hdr
->restoration
.type
[0] != DAV1D_RESTORATION_NONE
) << 0) +
3053 ((f
->frame_hdr
->restoration
.type
[1] != DAV1D_RESTORATION_NONE
) << 1) +
3054 ((f
->frame_hdr
->restoration
.type
[2] != DAV1D_RESTORATION_NONE
) << 2);
3055 if (f
->frame_hdr
->loopfilter
.sharpness
!= f
->lf
.last_sharpness
) {
3056 dav1d_calc_eih(&f
->lf
.lim_lut
, f
->frame_hdr
->loopfilter
.sharpness
);
3057 f
->lf
.last_sharpness
= f
->frame_hdr
->loopfilter
.sharpness
;
3059 dav1d_calc_lf_values(f
->lf
.lvl
, f
->frame_hdr
, (int8_t[4]) { 0, 0, 0, 0 });
3060 memset(f
->lf
.mask
, 0, sizeof(*f
->lf
.mask
) * num_sb128
);
3062 const int ipred_edge_sz
= f
->sbh
* f
->sb128w
<< hbd
;
3063 if (ipred_edge_sz
!= f
->ipred_edge_sz
) {
3064 dav1d_free_aligned(f
->ipred_edge
[0]);
3065 uint8_t *ptr
= f
->ipred_edge
[0] =
3066 dav1d_alloc_aligned(ALLOC_IPRED
, ipred_edge_sz
* 128 * 3, 64);
3068 f
->ipred_edge_sz
= 0;
3071 f
->ipred_edge
[1] = ptr
+ ipred_edge_sz
* 128 * 1;
3072 f
->ipred_edge
[2] = ptr
+ ipred_edge_sz
* 128 * 2;
3073 f
->ipred_edge_sz
= ipred_edge_sz
;
3076 const int re_sz
= f
->sb128h
* f
->frame_hdr
->tiling
.cols
;
3077 if (re_sz
!= f
->lf
.re_sz
) {
3078 dav1d_free(f
->lf
.tx_lpf_right_edge
[0]);
3079 f
->lf
.tx_lpf_right_edge
[0] = dav1d_malloc(ALLOC_LF
, re_sz
* 32 * 2);
3080 if (!f
->lf
.tx_lpf_right_edge
[0]) {
3084 f
->lf
.tx_lpf_right_edge
[1] = f
->lf
.tx_lpf_right_edge
[0] + re_sz
* 32;
3085 f
->lf
.re_sz
= re_sz
;
3089 if (IS_INTER_OR_SWITCH(f
->frame_hdr
) || f
->frame_hdr
->allow_intrabc
) {
3091 dav1d_refmvs_init_frame(&f
->rf
, f
->seq_hdr
, f
->frame_hdr
,
3092 f
->refpoc
, f
->mvs
, f
->refrefpoc
, f
->ref_mvs
,
3093 f
->c
->n_tc
, f
->c
->n_fc
);
3094 if (ret
< 0) goto error
;
3097 // setup dequant tables
3098 init_quant_tables(f
->seq_hdr
, f
->frame_hdr
, f
->frame_hdr
->quant
.yac
, f
->dq
);
3099 if (f
->frame_hdr
->quant
.qm
)
3100 for (int i
= 0; i
< N_RECT_TX_SIZES
; i
++) {
3101 f
->qm
[i
][0] = dav1d_qm_tbl
[f
->frame_hdr
->quant
.qm_y
][0][i
];
3102 f
->qm
[i
][1] = dav1d_qm_tbl
[f
->frame_hdr
->quant
.qm_u
][1][i
];
3103 f
->qm
[i
][2] = dav1d_qm_tbl
[f
->frame_hdr
->quant
.qm_v
][1][i
];
3106 memset(f
->qm
, 0, sizeof(f
->qm
));
3108 // setup jnt_comp weights
3109 if (f
->frame_hdr
->switchable_comp_refs
) {
3110 for (int i
= 0; i
< 7; i
++) {
3111 const unsigned ref0poc
= f
->refp
[i
].p
.frame_hdr
->frame_offset
;
3113 for (int j
= i
+ 1; j
< 7; j
++) {
3114 const unsigned ref1poc
= f
->refp
[j
].p
.frame_hdr
->frame_offset
;
3117 imin(abs(get_poc_diff(f
->seq_hdr
->order_hint_n_bits
, ref0poc
,
3118 f
->cur
.frame_hdr
->frame_offset
)), 31);
3120 imin(abs(get_poc_diff(f
->seq_hdr
->order_hint_n_bits
, ref1poc
,
3121 f
->cur
.frame_hdr
->frame_offset
)), 31);
3122 const int order
= d0
<= d1
;
3124 static const uint8_t quant_dist_weight
[3][2] = {
3125 { 2, 3 }, { 2, 5 }, { 2, 7 }
3127 static const uint8_t quant_dist_lookup_table
[4][2] = {
3128 { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 }
3132 for (k
= 0; k
< 3; k
++) {
3133 const int c0
= quant_dist_weight
[k
][order
];
3134 const int c1
= quant_dist_weight
[k
][!order
];
3135 const int d0_c0
= d0
* c0
;
3136 const int d1_c1
= d1
* c1
;
3137 if ((d0
> d1
&& d0_c0
< d1_c1
) || (d0
<= d1
&& d0_c0
> d1_c1
)) break;
3140 f
->jnt_weights
[i
][j
] = quant_dist_lookup_table
[k
][order
];
3145 /* Init loopfilter pointers. Increasing NULL pointers is technically UB,
3146 * so just point the chroma pointers in 4:0:0 to the luma plane here to
3147 * avoid having additional in-loop branches in various places. We never
3148 * dereference those pointers so it doesn't really matter what they
3149 * point at, as long as the pointers are valid. */
3150 const int has_chroma
= f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I400
;
3151 f
->lf
.p
[0] = f
->cur
.data
[0];
3152 f
->lf
.p
[1] = f
->cur
.data
[has_chroma
? 1 : 0];
3153 f
->lf
.p
[2] = f
->cur
.data
[has_chroma
? 2 : 0];
3154 f
->lf
.sr_p
[0] = f
->sr_cur
.p
.data
[0];
3155 f
->lf
.sr_p
[1] = f
->sr_cur
.p
.data
[has_chroma
? 1 : 0];
3156 f
->lf
.sr_p
[2] = f
->sr_cur
.p
.data
[has_chroma
? 2 : 0];
3163 int dav1d_decode_frame_init_cdf(Dav1dFrameContext
*const f
) {
3164 const Dav1dContext
*const c
= f
->c
;
3165 int retval
= DAV1D_ERR(EINVAL
);
3167 if (f
->frame_hdr
->refresh_context
)
3168 dav1d_cdf_thread_copy(f
->out_cdf
.data
.cdf
, &f
->in_cdf
);
3170 // parse individual tiles per tile group
3171 int tile_row
= 0, tile_col
= 0;
3172 f
->task_thread
.update_set
= 0;
3173 for (int i
= 0; i
< f
->n_tile_data
; i
++) {
3174 const uint8_t *data
= f
->tile
[i
].data
.data
;
3175 size_t size
= f
->tile
[i
].data
.sz
;
3177 for (int j
= f
->tile
[i
].start
; j
<= f
->tile
[i
].end
; j
++) {
3179 if (j
== f
->tile
[i
].end
) {
3182 if (f
->frame_hdr
->tiling
.n_bytes
> size
) goto error
;
3184 for (unsigned k
= 0; k
< f
->frame_hdr
->tiling
.n_bytes
; k
++)
3185 tile_sz
|= (unsigned)*data
++ << (k
* 8);
3187 size
-= f
->frame_hdr
->tiling
.n_bytes
;
3188 if (tile_sz
> size
) goto error
;
3191 setup_tile(&f
->ts
[j
], f
, data
, tile_sz
, tile_row
, tile_col
++,
3192 c
->n_fc
> 1 ? f
->frame_thread
.tile_start_off
[j
] : 0);
3194 if (tile_col
== f
->frame_hdr
->tiling
.cols
) {
3198 if (j
== f
->frame_hdr
->tiling
.update
&& f
->frame_hdr
->refresh_context
)
3199 f
->task_thread
.update_set
= 1;
3206 const int uses_2pass
= c
->n_fc
> 1;
3207 for (int n
= 0; n
< f
->sb128w
* f
->frame_hdr
->tiling
.rows
* (1 + uses_2pass
); n
++)
3208 reset_context(&f
->a
[n
], IS_KEY_OR_INTRA(f
->frame_hdr
),
3209 uses_2pass
? 1 + (n
>= f
->sb128w
* f
->frame_hdr
->tiling
.rows
) : 0);
3217 int dav1d_decode_frame_main(Dav1dFrameContext
*const f
) {
3218 const Dav1dContext
*const c
= f
->c
;
3219 int retval
= DAV1D_ERR(EINVAL
);
3221 assert(f
->c
->n_tc
== 1);
3223 Dav1dTaskContext
*const t
= &c
->tc
[f
- c
->fc
];
3225 t
->frame_thread
.pass
= 0;
3227 for (int n
= 0; n
< f
->sb128w
* f
->frame_hdr
->tiling
.rows
; n
++)
3228 reset_context(&f
->a
[n
], IS_KEY_OR_INTRA(f
->frame_hdr
), 0);
3230 // no threading - we explicitly interleave tile/sbrow decoding
3231 // and post-filtering, so that the full process runs in-line
3232 for (int tile_row
= 0; tile_row
< f
->frame_hdr
->tiling
.rows
; tile_row
++) {
3234 imin(f
->frame_hdr
->tiling
.row_start_sb
[tile_row
+ 1], f
->sbh
);
3235 for (int sby
= f
->frame_hdr
->tiling
.row_start_sb
[tile_row
];
3236 sby
< sbh_end
; sby
++)
3238 t
->by
= sby
<< (4 + f
->seq_hdr
->sb128
);
3239 const int by_end
= (t
->by
+ f
->sb_step
) >> 1;
3240 if (f
->frame_hdr
->use_ref_frame_mvs
) {
3241 f
->c
->refmvs_dsp
.load_tmvs(&f
->rf
, tile_row
,
3242 0, f
->bw
>> 1, t
->by
>> 1, by_end
);
3244 for (int tile_col
= 0; tile_col
< f
->frame_hdr
->tiling
.cols
; tile_col
++) {
3245 t
->ts
= &f
->ts
[tile_row
* f
->frame_hdr
->tiling
.cols
+ tile_col
];
3246 if (dav1d_decode_tile_sbrow(t
)) goto error
;
3248 if (IS_INTER_OR_SWITCH(f
->frame_hdr
)) {
3249 dav1d_refmvs_save_tmvs(&f
->c
->refmvs_dsp
, &t
->rt
,
3250 0, f
->bw
>> 1, t
->by
>> 1, by_end
);
3253 // loopfilter + cdef + restoration
3254 f
->bd_fn
.filter_sbrow(f
, sby
);
3263 void dav1d_decode_frame_exit(Dav1dFrameContext
*const f
, int retval
) {
3264 const Dav1dContext
*const c
= f
->c
;
3266 if (f
->sr_cur
.p
.data
[0])
3267 atomic_init(&f
->task_thread
.error
, 0);
3269 if (c
->n_fc
> 1 && retval
&& f
->frame_thread
.cf
) {
3270 memset(f
->frame_thread
.cf
, 0,
3271 (size_t)f
->frame_thread
.cf_sz
* 128 * 128 / 2);
3273 for (int i
= 0; i
< 7; i
++) {
3274 if (f
->refp
[i
].p
.frame_hdr
) {
3275 if (!retval
&& c
->n_fc
> 1 && c
->strict_std_compliance
&&
3276 atomic_load(&f
->refp
[i
].progress
[1]) == FRAME_ERROR
)
3278 retval
= DAV1D_ERR(EINVAL
);
3279 atomic_store(&f
->task_thread
.error
, 1);
3280 atomic_store(&f
->sr_cur
.progress
[1], FRAME_ERROR
);
3282 dav1d_thread_picture_unref(&f
->refp
[i
]);
3284 dav1d_ref_dec(&f
->ref_mvs_ref
[i
]);
3287 dav1d_picture_unref_internal(&f
->cur
);
3288 dav1d_thread_picture_unref(&f
->sr_cur
);
3289 dav1d_cdf_thread_unref(&f
->in_cdf
);
3290 if (f
->frame_hdr
&& f
->frame_hdr
->refresh_context
) {
3291 if (f
->out_cdf
.progress
)
3292 atomic_store(f
->out_cdf
.progress
, retval
== 0 ? 1 : TILE_ERROR
);
3293 dav1d_cdf_thread_unref(&f
->out_cdf
);
3295 dav1d_ref_dec(&f
->cur_segmap_ref
);
3296 dav1d_ref_dec(&f
->prev_segmap_ref
);
3297 dav1d_ref_dec(&f
->mvs_ref
);
3298 dav1d_ref_dec(&f
->seq_hdr_ref
);
3299 dav1d_ref_dec(&f
->frame_hdr_ref
);
3301 for (int i
= 0; i
< f
->n_tile_data
; i
++)
3302 dav1d_data_unref_internal(&f
->tile
[i
].data
);
3303 f
->task_thread
.retval
= retval
;
3306 int dav1d_decode_frame(Dav1dFrameContext
*const f
) {
3307 assert(f
->c
->n_fc
== 1);
3308 // if n_tc > 1 (but n_fc == 1), we could run init/exit in the task
3309 // threads also. Not sure it makes a measurable difference.
3310 int res
= dav1d_decode_frame_init(f
);
3311 if (!res
) res
= dav1d_decode_frame_init_cdf(f
);
3312 // wait until all threads have completed
3314 if (f
->c
->n_tc
> 1) {
3315 res
= dav1d_task_create_tile_sbrow(f
, 0, 1);
3316 pthread_mutex_lock(&f
->task_thread
.ttd
->lock
);
3317 pthread_cond_signal(&f
->task_thread
.ttd
->cond
);
3319 while (!f
->task_thread
.done
[0] ||
3320 atomic_load(&f
->task_thread
.task_counter
) > 0)
3322 pthread_cond_wait(&f
->task_thread
.cond
,
3323 &f
->task_thread
.ttd
->lock
);
3326 pthread_mutex_unlock(&f
->task_thread
.ttd
->lock
);
3327 res
= f
->task_thread
.retval
;
3329 res
= dav1d_decode_frame_main(f
);
3330 if (!res
&& f
->frame_hdr
->refresh_context
&& f
->task_thread
.update_set
) {
3331 dav1d_cdf_thread_update(f
->frame_hdr
, f
->out_cdf
.data
.cdf
,
3332 &f
->ts
[f
->frame_hdr
->tiling
.update
].cdf
);
3336 dav1d_decode_frame_exit(f
, res
);
3337 res
= f
->task_thread
.retval
;
3342 static int get_upscale_x0(const int in_w
, const int out_w
, const int step
) {
3343 const int err
= out_w
* step
- (in_w
<< 14);
3344 const int x0
= (-((out_w
- in_w
) << 13) + (out_w
>> 1)) / out_w
+ 128 - (err
/ 2);
3348 int dav1d_submit_frame(Dav1dContext
*const c
) {
3349 Dav1dFrameContext
*f
;
3352 // wait for c->out_delayed[next] and move into c->out if visible
3353 Dav1dThreadPicture
*out_delayed
;
3355 pthread_mutex_lock(&c
->task_thread
.lock
);
3356 const unsigned next
= c
->frame_thread
.next
++;
3357 if (c
->frame_thread
.next
== c
->n_fc
)
3358 c
->frame_thread
.next
= 0;
3361 while (f
->n_tile_data
> 0)
3362 pthread_cond_wait(&f
->task_thread
.cond
,
3363 &c
->task_thread
.lock
);
3364 out_delayed
= &c
->frame_thread
.out_delayed
[next
];
3365 if (out_delayed
->p
.data
[0] || atomic_load(&f
->task_thread
.error
)) {
3366 unsigned first
= atomic_load(&c
->task_thread
.first
);
3367 if (first
+ 1U < c
->n_fc
)
3368 atomic_fetch_add(&c
->task_thread
.first
, 1U);
3370 atomic_store(&c
->task_thread
.first
, 0);
3371 atomic_compare_exchange_strong(&c
->task_thread
.reset_task_cur
,
3373 if (c
->task_thread
.cur
&& c
->task_thread
.cur
< c
->n_fc
)
3374 c
->task_thread
.cur
--;
3376 const int error
= f
->task_thread
.retval
;
3378 f
->task_thread
.retval
= 0;
3379 c
->cached_error
= error
;
3380 dav1d_data_props_copy(&c
->cached_error_props
, &out_delayed
->p
.m
);
3381 dav1d_thread_picture_unref(out_delayed
);
3382 } else if (out_delayed
->p
.data
[0]) {
3383 const unsigned progress
= atomic_load_explicit(&out_delayed
->progress
[1],
3384 memory_order_relaxed
);
3385 if ((out_delayed
->visible
|| c
->output_invisible_frames
) &&
3386 progress
!= FRAME_ERROR
)
3388 dav1d_thread_picture_ref(&c
->out
, out_delayed
);
3389 c
->event_flags
|= dav1d_picture_get_event_flags(out_delayed
);
3391 dav1d_thread_picture_unref(out_delayed
);
3397 f
->seq_hdr
= c
->seq_hdr
;
3398 f
->seq_hdr_ref
= c
->seq_hdr_ref
;
3399 dav1d_ref_inc(f
->seq_hdr_ref
);
3400 f
->frame_hdr
= c
->frame_hdr
;
3401 f
->frame_hdr_ref
= c
->frame_hdr_ref
;
3402 c
->frame_hdr
= NULL
;
3403 c
->frame_hdr_ref
= NULL
;
3404 f
->dsp
= &c
->dsp
[f
->seq_hdr
->hbd
];
3406 const int bpc
= 8 + 2 * f
->seq_hdr
->hbd
;
3408 if (!f
->dsp
->ipred
.intra_pred
[DC_PRED
]) {
3409 Dav1dDSPContext
*const dsp
= &c
->dsp
[f
->seq_hdr
->hbd
];
3412 #define assign_bitdepth_case(bd) \
3413 dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
3414 dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
3415 dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
3416 dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
3417 dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
3418 dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
3419 dav1d_film_grain_dsp_init_##bd##bpc(&dsp->fg); \
3423 assign_bitdepth_case(8);
3428 assign_bitdepth_case(16);
3430 #undef assign_bitdepth_case
3432 dav1d_log(c
, "Compiled without support for %d-bit decoding\n",
3433 8 + 2 * f
->seq_hdr
->hbd
);
3434 res
= DAV1D_ERR(ENOPROTOOPT
);
3439 #define assign_bitdepth_case(bd) \
3440 f->bd_fn.recon_b_inter = dav1d_recon_b_inter_##bd##bpc; \
3441 f->bd_fn.recon_b_intra = dav1d_recon_b_intra_##bd##bpc; \
3442 f->bd_fn.filter_sbrow = dav1d_filter_sbrow_##bd##bpc; \
3443 f->bd_fn.filter_sbrow_deblock_cols = dav1d_filter_sbrow_deblock_cols_##bd##bpc; \
3444 f->bd_fn.filter_sbrow_deblock_rows = dav1d_filter_sbrow_deblock_rows_##bd##bpc; \
3445 f->bd_fn.filter_sbrow_cdef = dav1d_filter_sbrow_cdef_##bd##bpc; \
3446 f->bd_fn.filter_sbrow_resize = dav1d_filter_sbrow_resize_##bd##bpc; \
3447 f->bd_fn.filter_sbrow_lr = dav1d_filter_sbrow_lr_##bd##bpc; \
3448 f->bd_fn.backup_ipred_edge = dav1d_backup_ipred_edge_##bd##bpc; \
3449 f->bd_fn.read_coef_blocks = dav1d_read_coef_blocks_##bd##bpc; \
3450 f->bd_fn.copy_pal_block_y = dav1d_copy_pal_block_y_##bd##bpc; \
3451 f->bd_fn.copy_pal_block_uv = dav1d_copy_pal_block_uv_##bd##bpc; \
3452 f->bd_fn.read_pal_plane = dav1d_read_pal_plane_##bd##bpc; \
3453 f->bd_fn.read_pal_uv = dav1d_read_pal_uv_##bd##bpc
3454 if (!f
->seq_hdr
->hbd
) {
3456 assign_bitdepth_case(8);
3460 assign_bitdepth_case(16);
3463 #undef assign_bitdepth_case
3465 int ref_coded_width
[7];
3466 if (IS_INTER_OR_SWITCH(f
->frame_hdr
)) {
3467 if (f
->frame_hdr
->primary_ref_frame
!= DAV1D_PRIMARY_REF_NONE
) {
3468 const int pri_ref
= f
->frame_hdr
->refidx
[f
->frame_hdr
->primary_ref_frame
];
3469 if (!c
->refs
[pri_ref
].p
.p
.data
[0]) {
3470 res
= DAV1D_ERR(EINVAL
);
3474 for (int i
= 0; i
< 7; i
++) {
3475 const int refidx
= f
->frame_hdr
->refidx
[i
];
3476 if (!c
->refs
[refidx
].p
.p
.data
[0] ||
3477 f
->frame_hdr
->width
[0] * 2 < c
->refs
[refidx
].p
.p
.p
.w
||
3478 f
->frame_hdr
->height
* 2 < c
->refs
[refidx
].p
.p
.p
.h
||
3479 f
->frame_hdr
->width
[0] > c
->refs
[refidx
].p
.p
.p
.w
* 16 ||
3480 f
->frame_hdr
->height
> c
->refs
[refidx
].p
.p
.p
.h
* 16 ||
3481 f
->seq_hdr
->layout
!= c
->refs
[refidx
].p
.p
.p
.layout
||
3482 bpc
!= c
->refs
[refidx
].p
.p
.p
.bpc
)
3484 for (int j
= 0; j
< i
; j
++)
3485 dav1d_thread_picture_unref(&f
->refp
[j
]);
3486 res
= DAV1D_ERR(EINVAL
);
3489 dav1d_thread_picture_ref(&f
->refp
[i
], &c
->refs
[refidx
].p
);
3490 ref_coded_width
[i
] = c
->refs
[refidx
].p
.p
.frame_hdr
->width
[0];
3491 if (f
->frame_hdr
->width
[0] != c
->refs
[refidx
].p
.p
.p
.w
||
3492 f
->frame_hdr
->height
!= c
->refs
[refidx
].p
.p
.p
.h
)
3494 #define scale_fac(ref_sz, this_sz) \
3495 ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))
3496 f
->svc
[i
][0].scale
= scale_fac(c
->refs
[refidx
].p
.p
.p
.w
,
3497 f
->frame_hdr
->width
[0]);
3498 f
->svc
[i
][1].scale
= scale_fac(c
->refs
[refidx
].p
.p
.p
.h
,
3499 f
->frame_hdr
->height
);
3500 f
->svc
[i
][0].step
= (f
->svc
[i
][0].scale
+ 8) >> 4;
3501 f
->svc
[i
][1].step
= (f
->svc
[i
][1].scale
+ 8) >> 4;
3503 f
->svc
[i
][0].scale
= f
->svc
[i
][1].scale
= 0;
3505 f
->gmv_warp_allowed
[i
] = f
->frame_hdr
->gmv
[i
].type
> DAV1D_WM_TYPE_TRANSLATION
&&
3506 !f
->frame_hdr
->force_integer_mv
&&
3507 !dav1d_get_shear_params(&f
->frame_hdr
->gmv
[i
]) &&
3508 !f
->svc
[i
][0].scale
;
3513 if (f
->frame_hdr
->primary_ref_frame
== DAV1D_PRIMARY_REF_NONE
) {
3514 dav1d_cdf_thread_init_static(&f
->in_cdf
, f
->frame_hdr
->quant
.yac
);
3516 const int pri_ref
= f
->frame_hdr
->refidx
[f
->frame_hdr
->primary_ref_frame
];
3517 dav1d_cdf_thread_ref(&f
->in_cdf
, &c
->cdf
[pri_ref
]);
3519 if (f
->frame_hdr
->refresh_context
) {
3520 res
= dav1d_cdf_thread_alloc(c
, &f
->out_cdf
, c
->n_fc
> 1);
3521 if (res
< 0) goto error
;
3524 // FIXME qsort so tiles are in order (for frame threading)
3525 if (f
->n_tile_data_alloc
< c
->n_tile_data
) {
3526 dav1d_free(f
->tile
);
3527 assert(c
->n_tile_data
< INT_MAX
/ (int)sizeof(*f
->tile
));
3528 f
->tile
= dav1d_malloc(ALLOC_TILE
, c
->n_tile_data
* sizeof(*f
->tile
));
3530 f
->n_tile_data_alloc
= f
->n_tile_data
= 0;
3531 res
= DAV1D_ERR(ENOMEM
);
3534 f
->n_tile_data_alloc
= c
->n_tile_data
;
3536 memcpy(f
->tile
, c
->tile
, c
->n_tile_data
* sizeof(*f
->tile
));
3537 memset(c
->tile
, 0, c
->n_tile_data
* sizeof(*c
->tile
));
3538 f
->n_tile_data
= c
->n_tile_data
;
3542 res
= dav1d_thread_picture_alloc(c
, f
, bpc
);
3543 if (res
< 0) goto error
;
3545 if (f
->frame_hdr
->width
[0] != f
->frame_hdr
->width
[1]) {
3546 res
= dav1d_picture_alloc_copy(c
, &f
->cur
, f
->frame_hdr
->width
[0], &f
->sr_cur
.p
);
3547 if (res
< 0) goto error
;
3549 dav1d_picture_ref(&f
->cur
, &f
->sr_cur
.p
);
3552 if (f
->frame_hdr
->width
[0] != f
->frame_hdr
->width
[1]) {
3553 f
->resize_step
[0] = scale_fac(f
->cur
.p
.w
, f
->sr_cur
.p
.p
.w
);
3554 const int ss_hor
= f
->cur
.p
.layout
!= DAV1D_PIXEL_LAYOUT_I444
;
3555 const int in_cw
= (f
->cur
.p
.w
+ ss_hor
) >> ss_hor
;
3556 const int out_cw
= (f
->sr_cur
.p
.p
.w
+ ss_hor
) >> ss_hor
;
3557 f
->resize_step
[1] = scale_fac(in_cw
, out_cw
);
3559 f
->resize_start
[0] = get_upscale_x0(f
->cur
.p
.w
, f
->sr_cur
.p
.p
.w
, f
->resize_step
[0]);
3560 f
->resize_start
[1] = get_upscale_x0(in_cw
, out_cw
, f
->resize_step
[1]);
3563 // move f->cur into output queue
3565 if (f
->frame_hdr
->show_frame
|| c
->output_invisible_frames
) {
3566 dav1d_thread_picture_ref(&c
->out
, &f
->sr_cur
);
3567 c
->event_flags
|= dav1d_picture_get_event_flags(&f
->sr_cur
);
3570 dav1d_thread_picture_ref(out_delayed
, &f
->sr_cur
);
3573 f
->w4
= (f
->frame_hdr
->width
[0] + 3) >> 2;
3574 f
->h4
= (f
->frame_hdr
->height
+ 3) >> 2;
3575 f
->bw
= ((f
->frame_hdr
->width
[0] + 7) >> 3) << 1;
3576 f
->bh
= ((f
->frame_hdr
->height
+ 7) >> 3) << 1;
3577 f
->sb128w
= (f
->bw
+ 31) >> 5;
3578 f
->sb128h
= (f
->bh
+ 31) >> 5;
3579 f
->sb_shift
= 4 + f
->seq_hdr
->sb128
;
3580 f
->sb_step
= 16 << f
->seq_hdr
->sb128
;
3581 f
->sbh
= (f
->bh
+ f
->sb_step
- 1) >> f
->sb_shift
;
3582 f
->b4_stride
= (f
->bw
+ 31) & ~31;
3583 f
->bitdepth_max
= (1 << f
->cur
.p
.bpc
) - 1;
3584 atomic_init(&f
->task_thread
.error
, 0);
3585 const int uses_2pass
= c
->n_fc
> 1;
3586 const int cols
= f
->frame_hdr
->tiling
.cols
;
3587 const int rows
= f
->frame_hdr
->tiling
.rows
;
3588 atomic_store(&f
->task_thread
.task_counter
,
3589 (cols
* rows
+ f
->sbh
) << uses_2pass
);
3592 if (IS_INTER_OR_SWITCH(f
->frame_hdr
) || f
->frame_hdr
->allow_intrabc
) {
3593 f
->mvs_ref
= dav1d_ref_create_using_pool(c
->refmvs_pool
,
3594 sizeof(*f
->mvs
) * f
->sb128h
* 16 * (f
->b4_stride
>> 1));
3596 res
= DAV1D_ERR(ENOMEM
);
3599 f
->mvs
= f
->mvs_ref
->data
;
3600 if (!f
->frame_hdr
->allow_intrabc
) {
3601 for (int i
= 0; i
< 7; i
++)
3602 f
->refpoc
[i
] = f
->refp
[i
].p
.frame_hdr
->frame_offset
;
3604 memset(f
->refpoc
, 0, sizeof(f
->refpoc
));
3606 if (f
->frame_hdr
->use_ref_frame_mvs
) {
3607 for (int i
= 0; i
< 7; i
++) {
3608 const int refidx
= f
->frame_hdr
->refidx
[i
];
3609 const int ref_w
= ((ref_coded_width
[i
] + 7) >> 3) << 1;
3610 const int ref_h
= ((f
->refp
[i
].p
.p
.h
+ 7) >> 3) << 1;
3611 if (c
->refs
[refidx
].refmvs
!= NULL
&&
3612 ref_w
== f
->bw
&& ref_h
== f
->bh
)
3614 f
->ref_mvs_ref
[i
] = c
->refs
[refidx
].refmvs
;
3615 dav1d_ref_inc(f
->ref_mvs_ref
[i
]);
3616 f
->ref_mvs
[i
] = c
->refs
[refidx
].refmvs
->data
;
3618 f
->ref_mvs
[i
] = NULL
;
3619 f
->ref_mvs_ref
[i
] = NULL
;
3621 memcpy(f
->refrefpoc
[i
], c
->refs
[refidx
].refpoc
,
3622 sizeof(*f
->refrefpoc
));
3625 memset(f
->ref_mvs_ref
, 0, sizeof(f
->ref_mvs_ref
));
3629 memset(f
->ref_mvs_ref
, 0, sizeof(f
->ref_mvs_ref
));
3633 if (f
->frame_hdr
->segmentation
.enabled
) {
3634 // By default, the previous segmentation map is not initialised.
3635 f
->prev_segmap_ref
= NULL
;
3636 f
->prev_segmap
= NULL
;
3638 // We might need a previous frame's segmentation map. This
3639 // happens if there is either no update or a temporal update.
3640 if (f
->frame_hdr
->segmentation
.temporal
|| !f
->frame_hdr
->segmentation
.update_map
) {
3641 const int pri_ref
= f
->frame_hdr
->primary_ref_frame
;
3642 assert(pri_ref
!= DAV1D_PRIMARY_REF_NONE
);
3643 const int ref_w
= ((ref_coded_width
[pri_ref
] + 7) >> 3) << 1;
3644 const int ref_h
= ((f
->refp
[pri_ref
].p
.p
.h
+ 7) >> 3) << 1;
3645 if (ref_w
== f
->bw
&& ref_h
== f
->bh
) {
3646 f
->prev_segmap_ref
= c
->refs
[f
->frame_hdr
->refidx
[pri_ref
]].segmap
;
3647 if (f
->prev_segmap_ref
) {
3648 dav1d_ref_inc(f
->prev_segmap_ref
);
3649 f
->prev_segmap
= f
->prev_segmap_ref
->data
;
3654 if (f
->frame_hdr
->segmentation
.update_map
) {
3655 // We're updating an existing map, but need somewhere to
3656 // put the new values. Allocate them here (the data
3657 // actually gets set elsewhere)
3658 f
->cur_segmap_ref
= dav1d_ref_create_using_pool(c
->segmap_pool
,
3659 sizeof(*f
->cur_segmap
) * f
->b4_stride
* 32 * f
->sb128h
);
3660 if (!f
->cur_segmap_ref
) {
3661 dav1d_ref_dec(&f
->prev_segmap_ref
);
3662 res
= DAV1D_ERR(ENOMEM
);
3665 f
->cur_segmap
= f
->cur_segmap_ref
->data
;
3666 } else if (f
->prev_segmap_ref
) {
3667 // We're not updating an existing map, and we have a valid
3668 // reference. Use that.
3669 f
->cur_segmap_ref
= f
->prev_segmap_ref
;
3670 dav1d_ref_inc(f
->cur_segmap_ref
);
3671 f
->cur_segmap
= f
->prev_segmap_ref
->data
;
3673 // We need to make a new map. Allocate one here and zero it out.
3674 const size_t segmap_size
= sizeof(*f
->cur_segmap
) * f
->b4_stride
* 32 * f
->sb128h
;
3675 f
->cur_segmap_ref
= dav1d_ref_create_using_pool(c
->segmap_pool
, segmap_size
);
3676 if (!f
->cur_segmap_ref
) {
3677 res
= DAV1D_ERR(ENOMEM
);
3680 f
->cur_segmap
= f
->cur_segmap_ref
->data
;
3681 memset(f
->cur_segmap
, 0, segmap_size
);
3684 f
->cur_segmap
= NULL
;
3685 f
->cur_segmap_ref
= NULL
;
3686 f
->prev_segmap_ref
= NULL
;
3689 // update references etc.
3690 const unsigned refresh_frame_flags
= f
->frame_hdr
->refresh_frame_flags
;
3691 for (int i
= 0; i
< 8; i
++) {
3692 if (refresh_frame_flags
& (1 << i
)) {
3693 if (c
->refs
[i
].p
.p
.frame_hdr
)
3694 dav1d_thread_picture_unref(&c
->refs
[i
].p
);
3695 dav1d_thread_picture_ref(&c
->refs
[i
].p
, &f
->sr_cur
);
3697 dav1d_cdf_thread_unref(&c
->cdf
[i
]);
3698 if (f
->frame_hdr
->refresh_context
) {
3699 dav1d_cdf_thread_ref(&c
->cdf
[i
], &f
->out_cdf
);
3701 dav1d_cdf_thread_ref(&c
->cdf
[i
], &f
->in_cdf
);
3704 dav1d_ref_dec(&c
->refs
[i
].segmap
);
3705 c
->refs
[i
].segmap
= f
->cur_segmap_ref
;
3706 if (f
->cur_segmap_ref
)
3707 dav1d_ref_inc(f
->cur_segmap_ref
);
3708 dav1d_ref_dec(&c
->refs
[i
].refmvs
);
3709 if (!f
->frame_hdr
->allow_intrabc
) {
3710 c
->refs
[i
].refmvs
= f
->mvs_ref
;
3712 dav1d_ref_inc(f
->mvs_ref
);
3714 memcpy(c
->refs
[i
].refpoc
, f
->refpoc
, sizeof(f
->refpoc
));
3719 if ((res
= dav1d_decode_frame(f
)) < 0) {
3720 dav1d_thread_picture_unref(&c
->out
);
3721 for (int i
= 0; i
< 8; i
++) {
3722 if (refresh_frame_flags
& (1 << i
)) {
3723 if (c
->refs
[i
].p
.p
.frame_hdr
)
3724 dav1d_thread_picture_unref(&c
->refs
[i
].p
);
3725 dav1d_cdf_thread_unref(&c
->cdf
[i
]);
3726 dav1d_ref_dec(&c
->refs
[i
].segmap
);
3727 dav1d_ref_dec(&c
->refs
[i
].refmvs
);
3733 dav1d_task_frame_init(f
);
3734 pthread_mutex_unlock(&c
->task_thread
.lock
);
3739 atomic_init(&f
->task_thread
.error
, 1);
3740 dav1d_cdf_thread_unref(&f
->in_cdf
);
3741 if (f
->frame_hdr
->refresh_context
)
3742 dav1d_cdf_thread_unref(&f
->out_cdf
);
3743 for (int i
= 0; i
< 7; i
++) {
3744 if (f
->refp
[i
].p
.frame_hdr
)
3745 dav1d_thread_picture_unref(&f
->refp
[i
]);
3746 dav1d_ref_dec(&f
->ref_mvs_ref
[i
]);
3749 dav1d_thread_picture_unref(&c
->out
);
3751 dav1d_thread_picture_unref(out_delayed
);
3752 dav1d_picture_unref_internal(&f
->cur
);
3753 dav1d_thread_picture_unref(&f
->sr_cur
);
3754 dav1d_ref_dec(&f
->mvs_ref
);
3755 dav1d_ref_dec(&f
->seq_hdr_ref
);
3756 dav1d_ref_dec(&f
->frame_hdr_ref
);
3757 dav1d_data_props_copy(&c
->cached_error_props
, &c
->in
.m
);
3759 for (int i
= 0; i
< f
->n_tile_data
; i
++)
3760 dav1d_data_unref_internal(&f
->tile
[i
].data
);
3764 pthread_mutex_unlock(&c
->task_thread
.lock
);