Bug 1893155 - Part 6: Correct constant for minimum epoch day. r=spidermonkey-reviewer...
[gecko.git] / third_party / dav1d / src / thread_task.c
blob1ededde43ca85e8f492ea6dac94b271a2f21d6e0
1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "config.h"
30 #include "common/frame.h"
32 #include "src/thread_task.h"
33 #include "src/fg_apply.h"
35 // This function resets the cur pointer to the first frame theoretically
36 // executable after a task completed (ie. each time we update some progress or
37 // insert some tasks in the queue).
38 // When frame_idx is set, it can be either from a completed task, or from tasks
39 // inserted in the queue, in which case we have to make sure the cur pointer
40 // isn't past this insert.
41 // The special case where frame_idx is UINT_MAX is to handle the reset after
42 // completing a task and locklessly signaling progress. In this case we don't
43 // enter a critical section, which is needed for this function, so we set an
44 // atomic for a delayed handling, happening here. Meaning we can call this
45 // function without any actual update other than what's in the atomic, hence
46 // this special case.
47 static inline int reset_task_cur(const Dav1dContext *const c,
48 struct TaskThreadData *const ttd,
49 unsigned frame_idx)
51 const unsigned first = atomic_load(&ttd->first);
52 unsigned reset_frame_idx = atomic_exchange(&ttd->reset_task_cur, UINT_MAX);
53 if (reset_frame_idx < first) {
54 if (frame_idx == UINT_MAX) return 0;
55 reset_frame_idx = UINT_MAX;
57 if (!ttd->cur && c->fc[first].task_thread.task_cur_prev == NULL)
58 return 0;
59 if (reset_frame_idx != UINT_MAX) {
60 if (frame_idx == UINT_MAX) {
61 if (reset_frame_idx > first + ttd->cur)
62 return 0;
63 ttd->cur = reset_frame_idx - first;
64 goto cur_found;
66 } else if (frame_idx == UINT_MAX)
67 return 0;
68 if (frame_idx < first) frame_idx += c->n_fc;
69 const unsigned min_frame_idx = umin(reset_frame_idx, frame_idx);
70 const unsigned cur_frame_idx = first + ttd->cur;
71 if (ttd->cur < c->n_fc && cur_frame_idx < min_frame_idx)
72 return 0;
73 for (ttd->cur = min_frame_idx - first; ttd->cur < c->n_fc; ttd->cur++)
74 if (c->fc[(first + ttd->cur) % c->n_fc].task_thread.task_head)
75 break;
76 cur_found:
77 for (unsigned i = ttd->cur; i < c->n_fc; i++)
78 c->fc[(first + i) % c->n_fc].task_thread.task_cur_prev = NULL;
79 return 1;
82 static inline void reset_task_cur_async(struct TaskThreadData *const ttd,
83 unsigned frame_idx, unsigned n_frames)
85 const unsigned first = atomic_load(&ttd->first);
86 if (frame_idx < first) frame_idx += n_frames;
87 unsigned last_idx = frame_idx;
88 do {
89 frame_idx = last_idx;
90 last_idx = atomic_exchange(&ttd->reset_task_cur, frame_idx);
91 } while (last_idx < frame_idx);
92 if (frame_idx == first && atomic_load(&ttd->first) != first) {
93 unsigned expected = frame_idx;
94 atomic_compare_exchange_strong(&ttd->reset_task_cur, &expected, UINT_MAX);
98 static void insert_tasks_between(Dav1dFrameContext *const f,
99 Dav1dTask *const first, Dav1dTask *const last,
100 Dav1dTask *const a, Dav1dTask *const b,
101 const int cond_signal)
103 struct TaskThreadData *const ttd = f->task_thread.ttd;
104 if (atomic_load(f->c->flush)) return;
105 assert(!a || a->next == b);
106 if (!a) f->task_thread.task_head = first;
107 else a->next = first;
108 if (!b) f->task_thread.task_tail = last;
109 last->next = b;
110 reset_task_cur(f->c, ttd, first->frame_idx);
111 if (cond_signal && !atomic_fetch_or(&ttd->cond_signaled, 1))
112 pthread_cond_signal(&ttd->cond);
115 static void insert_tasks(Dav1dFrameContext *const f,
116 Dav1dTask *const first, Dav1dTask *const last,
117 const int cond_signal)
119 // insert task back into task queue
120 Dav1dTask *t_ptr, *prev_t = NULL;
121 for (t_ptr = f->task_thread.task_head;
122 t_ptr; prev_t = t_ptr, t_ptr = t_ptr->next)
124 // entropy coding precedes other steps
125 if (t_ptr->type == DAV1D_TASK_TYPE_TILE_ENTROPY) {
126 if (first->type > DAV1D_TASK_TYPE_TILE_ENTROPY) continue;
127 // both are entropy
128 if (first->sby > t_ptr->sby) continue;
129 if (first->sby < t_ptr->sby) {
130 insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
131 return;
133 // same sby
134 } else {
135 if (first->type == DAV1D_TASK_TYPE_TILE_ENTROPY) {
136 insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
137 return;
139 if (first->sby > t_ptr->sby) continue;
140 if (first->sby < t_ptr->sby) {
141 insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
142 return;
144 // same sby
145 if (first->type > t_ptr->type) continue;
146 if (first->type < t_ptr->type) {
147 insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
148 return;
150 // same task type
153 // sort by tile-id
154 assert(first->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION ||
155 first->type == DAV1D_TASK_TYPE_TILE_ENTROPY);
156 assert(first->type == t_ptr->type);
157 assert(t_ptr->sby == first->sby);
158 const int p = first->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
159 const int t_tile_idx = (int) (first - f->task_thread.tile_tasks[p]);
160 const int p_tile_idx = (int) (t_ptr - f->task_thread.tile_tasks[p]);
161 assert(t_tile_idx != p_tile_idx);
162 if (t_tile_idx > p_tile_idx) continue;
163 insert_tasks_between(f, first, last, prev_t, t_ptr, cond_signal);
164 return;
166 // append at the end
167 insert_tasks_between(f, first, last, prev_t, NULL, cond_signal);
170 static inline void insert_task(Dav1dFrameContext *const f,
171 Dav1dTask *const t, const int cond_signal)
173 insert_tasks(f, t, t, cond_signal);
176 static inline void add_pending(Dav1dFrameContext *const f, Dav1dTask *const t) {
177 pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
178 t->next = NULL;
179 if (!f->task_thread.pending_tasks.head)
180 f->task_thread.pending_tasks.head = t;
181 else
182 f->task_thread.pending_tasks.tail->next = t;
183 f->task_thread.pending_tasks.tail = t;
184 atomic_store(&f->task_thread.pending_tasks.merge, 1);
185 pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
188 static inline int merge_pending_frame(Dav1dFrameContext *const f) {
189 int const merge = atomic_load(&f->task_thread.pending_tasks.merge);
190 if (merge) {
191 pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
192 Dav1dTask *t = f->task_thread.pending_tasks.head;
193 f->task_thread.pending_tasks.head = NULL;
194 f->task_thread.pending_tasks.tail = NULL;
195 atomic_store(&f->task_thread.pending_tasks.merge, 0);
196 pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
197 while (t) {
198 Dav1dTask *const tmp = t->next;
199 insert_task(f, t, 0);
200 t = tmp;
203 return merge;
206 static inline int merge_pending(const Dav1dContext *const c) {
207 int res = 0;
208 for (unsigned i = 0; i < c->n_fc; i++)
209 res |= merge_pending_frame(&c->fc[i]);
210 return res;
213 static int create_filter_sbrow(Dav1dFrameContext *const f,
214 const int pass, Dav1dTask **res_t)
216 const int has_deblock = f->frame_hdr->loopfilter.level_y[0] ||
217 f->frame_hdr->loopfilter.level_y[1];
218 const int has_cdef = f->seq_hdr->cdef;
219 const int has_resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
220 const int has_lr = f->lf.restore_planes;
222 Dav1dTask *tasks = f->task_thread.tasks;
223 const int uses_2pass = f->c->n_fc > 1;
224 int num_tasks = f->sbh * (1 + uses_2pass);
225 if (num_tasks > f->task_thread.num_tasks) {
226 const size_t size = sizeof(Dav1dTask) * num_tasks;
227 tasks = dav1d_realloc(ALLOC_COMMON_CTX, f->task_thread.tasks, size);
228 if (!tasks) return -1;
229 memset(tasks, 0, size);
230 f->task_thread.tasks = tasks;
231 f->task_thread.num_tasks = num_tasks;
233 tasks += f->sbh * (pass & 1);
235 if (pass & 1) {
236 f->frame_thread.entropy_progress = 0;
237 } else {
238 const int prog_sz = ((f->sbh + 31) & ~31) >> 5;
239 if (prog_sz > f->frame_thread.prog_sz) {
240 atomic_uint *const prog = dav1d_realloc(ALLOC_COMMON_CTX, f->frame_thread.frame_progress,
241 2 * prog_sz * sizeof(*prog));
242 if (!prog) return -1;
243 f->frame_thread.frame_progress = prog;
244 f->frame_thread.copy_lpf_progress = prog + prog_sz;
246 f->frame_thread.prog_sz = prog_sz;
247 memset(f->frame_thread.frame_progress, 0, prog_sz * sizeof(atomic_uint));
248 memset(f->frame_thread.copy_lpf_progress, 0, prog_sz * sizeof(atomic_uint));
249 atomic_store(&f->frame_thread.deblock_progress, 0);
251 f->frame_thread.next_tile_row[pass & 1] = 0;
253 Dav1dTask *t = &tasks[0];
254 t->sby = 0;
255 t->recon_progress = 1;
256 t->deblock_progress = 0;
257 t->type = pass == 1 ? DAV1D_TASK_TYPE_ENTROPY_PROGRESS :
258 has_deblock ? DAV1D_TASK_TYPE_DEBLOCK_COLS :
259 has_cdef || has_lr /* i.e. LR backup */ ? DAV1D_TASK_TYPE_DEBLOCK_ROWS :
260 has_resize ? DAV1D_TASK_TYPE_SUPER_RESOLUTION :
261 DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS;
262 t->frame_idx = (int)(f - f->c->fc);
264 *res_t = t;
265 return 0;
268 int dav1d_task_create_tile_sbrow(Dav1dFrameContext *const f, const int pass,
269 const int cond_signal)
271 Dav1dTask *tasks = f->task_thread.tile_tasks[0];
272 const int uses_2pass = f->c->n_fc > 1;
273 const int num_tasks = f->frame_hdr->tiling.cols * f->frame_hdr->tiling.rows;
274 if (pass < 2) {
275 int alloc_num_tasks = num_tasks * (1 + uses_2pass);
276 if (alloc_num_tasks > f->task_thread.num_tile_tasks) {
277 const size_t size = sizeof(Dav1dTask) * alloc_num_tasks;
278 tasks = dav1d_realloc(ALLOC_COMMON_CTX, f->task_thread.tile_tasks[0], size);
279 if (!tasks) return -1;
280 memset(tasks, 0, size);
281 f->task_thread.tile_tasks[0] = tasks;
282 f->task_thread.num_tile_tasks = alloc_num_tasks;
284 f->task_thread.tile_tasks[1] = tasks + num_tasks;
286 tasks += num_tasks * (pass & 1);
288 Dav1dTask *pf_t;
289 if (create_filter_sbrow(f, pass, &pf_t))
290 return -1;
292 Dav1dTask *prev_t = NULL;
293 for (int tile_idx = 0; tile_idx < num_tasks; tile_idx++) {
294 Dav1dTileState *const ts = &f->ts[tile_idx];
295 Dav1dTask *t = &tasks[tile_idx];
296 t->sby = ts->tiling.row_start >> f->sb_shift;
297 if (pf_t && t->sby) {
298 prev_t->next = pf_t;
299 prev_t = pf_t;
300 pf_t = NULL;
302 t->recon_progress = 0;
303 t->deblock_progress = 0;
304 t->deps_skip = 0;
305 t->type = pass != 1 ? DAV1D_TASK_TYPE_TILE_RECONSTRUCTION :
306 DAV1D_TASK_TYPE_TILE_ENTROPY;
307 t->frame_idx = (int)(f - f->c->fc);
308 if (prev_t) prev_t->next = t;
309 prev_t = t;
311 if (pf_t) {
312 prev_t->next = pf_t;
313 prev_t = pf_t;
315 prev_t->next = NULL;
317 atomic_store(&f->task_thread.done[pass & 1], 0);
319 // XXX in theory this could be done locklessly, at this point they are no
320 // tasks in the frameQ, so no other runner should be using this lock, but
321 // we must add both passes at once
322 pthread_mutex_lock(&f->task_thread.pending_tasks.lock);
323 assert(f->task_thread.pending_tasks.head == NULL || pass == 2);
324 if (!f->task_thread.pending_tasks.head)
325 f->task_thread.pending_tasks.head = &tasks[0];
326 else
327 f->task_thread.pending_tasks.tail->next = &tasks[0];
328 f->task_thread.pending_tasks.tail = prev_t;
329 atomic_store(&f->task_thread.pending_tasks.merge, 1);
330 atomic_store(&f->task_thread.init_done, 1);
331 pthread_mutex_unlock(&f->task_thread.pending_tasks.lock);
333 return 0;
336 void dav1d_task_frame_init(Dav1dFrameContext *const f) {
337 const Dav1dContext *const c = f->c;
339 atomic_store(&f->task_thread.init_done, 0);
340 // schedule init task, which will schedule the remaining tasks
341 Dav1dTask *const t = &f->task_thread.init_task;
342 t->type = DAV1D_TASK_TYPE_INIT;
343 t->frame_idx = (int)(f - c->fc);
344 t->sby = 0;
345 t->recon_progress = t->deblock_progress = 0;
346 insert_task(f, t, 1);
349 void dav1d_task_delayed_fg(Dav1dContext *const c, Dav1dPicture *const out,
350 const Dav1dPicture *const in)
352 struct TaskThreadData *const ttd = &c->task_thread;
353 ttd->delayed_fg.in = in;
354 ttd->delayed_fg.out = out;
355 ttd->delayed_fg.type = DAV1D_TASK_TYPE_FG_PREP;
356 atomic_init(&ttd->delayed_fg.progress[0], 0);
357 atomic_init(&ttd->delayed_fg.progress[1], 0);
358 pthread_mutex_lock(&ttd->lock);
359 ttd->delayed_fg.exec = 1;
360 ttd->delayed_fg.finished = 0;
361 pthread_cond_signal(&ttd->cond);
362 do {
363 pthread_cond_wait(&ttd->delayed_fg.cond, &ttd->lock);
364 } while (!ttd->delayed_fg.finished);
365 pthread_mutex_unlock(&ttd->lock);
368 static inline int ensure_progress(struct TaskThreadData *const ttd,
369 Dav1dFrameContext *const f,
370 Dav1dTask *const t, const enum TaskType type,
371 atomic_int *const state, int *const target)
373 // deblock_rows (non-LR portion) depends on deblock of previous sbrow,
374 // so ensure that completed. if not, re-add to task-queue; else, fall-through
375 int p1 = atomic_load(state);
376 if (p1 < t->sby) {
377 t->type = type;
378 t->recon_progress = t->deblock_progress = 0;
379 *target = t->sby;
380 add_pending(f, t);
381 pthread_mutex_lock(&ttd->lock);
382 return 1;
384 return 0;
387 static inline int check_tile(Dav1dTask *const t, Dav1dFrameContext *const f,
388 const int frame_mt)
390 const int tp = t->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
391 const int tile_idx = (int)(t - f->task_thread.tile_tasks[tp]);
392 Dav1dTileState *const ts = &f->ts[tile_idx];
393 const int p1 = atomic_load(&ts->progress[tp]);
394 if (p1 < t->sby) return 1;
395 int error = p1 == TILE_ERROR;
396 error |= atomic_fetch_or(&f->task_thread.error, error);
397 if (!error && frame_mt && !tp) {
398 const int p2 = atomic_load(&ts->progress[1]);
399 if (p2 <= t->sby) return 1;
400 error = p2 == TILE_ERROR;
401 error |= atomic_fetch_or(&f->task_thread.error, error);
403 if (!error && frame_mt && !IS_KEY_OR_INTRA(f->frame_hdr)) {
404 // check reference state
405 const Dav1dThreadPicture *p = &f->sr_cur;
406 const int ss_ver = p->p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
407 const unsigned p_b = (t->sby + 1) << (f->sb_shift + 2);
408 const int tile_sby = t->sby - (ts->tiling.row_start >> f->sb_shift);
409 const int (*const lowest_px)[2] = ts->lowest_pixel[tile_sby];
410 for (int n = t->deps_skip; n < 7; n++, t->deps_skip++) {
411 unsigned lowest;
412 if (tp) {
413 // if temporal mv refs are disabled, we only need this
414 // for the primary ref; if segmentation is disabled, we
415 // don't even need that
416 lowest = p_b;
417 } else {
418 // +8 is postfilter-induced delay
419 const int y = lowest_px[n][0] == INT_MIN ? INT_MIN :
420 lowest_px[n][0] + 8;
421 const int uv = lowest_px[n][1] == INT_MIN ? INT_MIN :
422 lowest_px[n][1] * (1 << ss_ver) + 8;
423 const int max = imax(y, uv);
424 if (max == INT_MIN) continue;
425 lowest = iclip(max, 1, f->refp[n].p.p.h);
427 const unsigned p3 = atomic_load(&f->refp[n].progress[!tp]);
428 if (p3 < lowest) return 1;
429 atomic_fetch_or(&f->task_thread.error, p3 == FRAME_ERROR);
432 return 0;
435 static inline int get_frame_progress(const Dav1dContext *const c,
436 const Dav1dFrameContext *const f)
438 unsigned frame_prog = c->n_fc > 1 ? atomic_load(&f->sr_cur.progress[1]) : 0;
439 if (frame_prog >= FRAME_ERROR)
440 return f->sbh - 1;
441 int idx = frame_prog >> (f->sb_shift + 7);
442 int prog;
443 do {
444 atomic_uint *state = &f->frame_thread.frame_progress[idx];
445 const unsigned val = ~atomic_load(state);
446 prog = val ? ctz(val) : 32;
447 if (prog != 32) break;
448 prog = 0;
449 } while (++idx < f->frame_thread.prog_sz);
450 return ((idx << 5) | prog) - 1;
453 static inline void abort_frame(Dav1dFrameContext *const f, const int error) {
454 atomic_store(&f->task_thread.error, error == DAV1D_ERR(EINVAL) ? 1 : -1);
455 atomic_store(&f->task_thread.task_counter, 0);
456 atomic_store(&f->task_thread.done[0], 1);
457 atomic_store(&f->task_thread.done[1], 1);
458 atomic_store(&f->sr_cur.progress[0], FRAME_ERROR);
459 atomic_store(&f->sr_cur.progress[1], FRAME_ERROR);
460 dav1d_decode_frame_exit(f, error);
461 f->n_tile_data = 0;
462 pthread_cond_signal(&f->task_thread.cond);
465 static inline void delayed_fg_task(const Dav1dContext *const c,
466 struct TaskThreadData *const ttd)
468 const Dav1dPicture *const in = ttd->delayed_fg.in;
469 Dav1dPicture *const out = ttd->delayed_fg.out;
470 #if CONFIG_16BPC
471 int off;
472 if (out->p.bpc != 8)
473 off = (out->p.bpc >> 1) - 4;
474 #endif
475 switch (ttd->delayed_fg.type) {
476 case DAV1D_TASK_TYPE_FG_PREP:
477 ttd->delayed_fg.exec = 0;
478 if (atomic_load(&ttd->cond_signaled))
479 pthread_cond_signal(&ttd->cond);
480 pthread_mutex_unlock(&ttd->lock);
481 switch (out->p.bpc) {
482 #if CONFIG_8BPC
483 case 8:
484 dav1d_prep_grain_8bpc(&c->dsp[0].fg, out, in,
485 ttd->delayed_fg.scaling_8bpc,
486 ttd->delayed_fg.grain_lut_8bpc);
487 break;
488 #endif
489 #if CONFIG_16BPC
490 case 10:
491 case 12:
492 dav1d_prep_grain_16bpc(&c->dsp[off].fg, out, in,
493 ttd->delayed_fg.scaling_16bpc,
494 ttd->delayed_fg.grain_lut_16bpc);
495 break;
496 #endif
497 default: abort();
499 ttd->delayed_fg.type = DAV1D_TASK_TYPE_FG_APPLY;
500 pthread_mutex_lock(&ttd->lock);
501 ttd->delayed_fg.exec = 1;
502 // fall-through
503 case DAV1D_TASK_TYPE_FG_APPLY:;
504 int row = atomic_fetch_add(&ttd->delayed_fg.progress[0], 1);
505 pthread_mutex_unlock(&ttd->lock);
506 int progmax = (out->p.h + FG_BLOCK_SIZE - 1) / FG_BLOCK_SIZE;
507 while (row < progmax) {
508 if (row + 1 < progmax)
509 pthread_cond_signal(&ttd->cond);
510 else {
511 pthread_mutex_lock(&ttd->lock);
512 ttd->delayed_fg.exec = 0;
513 pthread_mutex_unlock(&ttd->lock);
515 switch (out->p.bpc) {
516 #if CONFIG_8BPC
517 case 8:
518 dav1d_apply_grain_row_8bpc(&c->dsp[0].fg, out, in,
519 ttd->delayed_fg.scaling_8bpc,
520 ttd->delayed_fg.grain_lut_8bpc, row);
521 break;
522 #endif
523 #if CONFIG_16BPC
524 case 10:
525 case 12:
526 dav1d_apply_grain_row_16bpc(&c->dsp[off].fg, out, in,
527 ttd->delayed_fg.scaling_16bpc,
528 ttd->delayed_fg.grain_lut_16bpc, row);
529 break;
530 #endif
531 default: abort();
533 row = atomic_fetch_add(&ttd->delayed_fg.progress[0], 1);
534 atomic_fetch_add(&ttd->delayed_fg.progress[1], 1);
536 pthread_mutex_lock(&ttd->lock);
537 ttd->delayed_fg.exec = 0;
538 int done = atomic_fetch_add(&ttd->delayed_fg.progress[1], 1) + 1;
539 progmax = atomic_load(&ttd->delayed_fg.progress[0]);
540 // signal for completion only once the last runner reaches this
541 if (done >= progmax) {
542 ttd->delayed_fg.finished = 1;
543 pthread_cond_signal(&ttd->delayed_fg.cond);
545 break;
546 default: abort();
550 void *dav1d_worker_task(void *data) {
551 Dav1dTaskContext *const tc = data;
552 const Dav1dContext *const c = tc->c;
553 struct TaskThreadData *const ttd = tc->task_thread.ttd;
555 dav1d_set_thread_name("dav1d-worker");
557 pthread_mutex_lock(&ttd->lock);
558 for (;;) {
559 if (tc->task_thread.die) break;
560 if (atomic_load(c->flush)) goto park;
562 merge_pending(c);
563 if (ttd->delayed_fg.exec) { // run delayed film grain first
564 delayed_fg_task(c, ttd);
565 continue;
567 Dav1dFrameContext *f;
568 Dav1dTask *t, *prev_t = NULL;
569 if (c->n_fc > 1) { // run init tasks second
570 for (unsigned i = 0; i < c->n_fc; i++) {
571 const unsigned first = atomic_load(&ttd->first);
572 f = &c->fc[(first + i) % c->n_fc];
573 if (atomic_load(&f->task_thread.init_done)) continue;
574 t = f->task_thread.task_head;
575 if (!t) continue;
576 if (t->type == DAV1D_TASK_TYPE_INIT) goto found;
577 if (t->type == DAV1D_TASK_TYPE_INIT_CDF) {
578 // XXX This can be a simple else, if adding tasks of both
579 // passes at once (in dav1d_task_create_tile_sbrow).
580 // Adding the tasks to the pending Q can result in a
581 // thread merging them before setting init_done.
582 // We will need to set init_done before adding to the
583 // pending Q, so maybe return the tasks, set init_done,
584 // and add to pending Q only then.
585 const int p1 = f->in_cdf.progress ?
586 atomic_load(f->in_cdf.progress) : 1;
587 if (p1) {
588 atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
589 goto found;
594 while (ttd->cur < c->n_fc) { // run decoding tasks last
595 const unsigned first = atomic_load(&ttd->first);
596 f = &c->fc[(first + ttd->cur) % c->n_fc];
597 merge_pending_frame(f);
598 prev_t = f->task_thread.task_cur_prev;
599 t = prev_t ? prev_t->next : f->task_thread.task_head;
600 while (t) {
601 if (t->type == DAV1D_TASK_TYPE_INIT_CDF) goto next;
602 else if (t->type == DAV1D_TASK_TYPE_TILE_ENTROPY ||
603 t->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION)
605 // if not bottom sbrow of tile, this task will be re-added
606 // after it's finished
607 if (!check_tile(t, f, c->n_fc > 1))
608 goto found;
609 } else if (t->recon_progress) {
610 const int p = t->type == DAV1D_TASK_TYPE_ENTROPY_PROGRESS;
611 int error = atomic_load(&f->task_thread.error);
612 assert(!atomic_load(&f->task_thread.done[p]) || error);
613 const int tile_row_base = f->frame_hdr->tiling.cols *
614 f->frame_thread.next_tile_row[p];
615 if (p) {
616 atomic_int *const prog = &f->frame_thread.entropy_progress;
617 const int p1 = atomic_load(prog);
618 if (p1 < t->sby) goto next;
619 atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
621 for (int tc = 0; tc < f->frame_hdr->tiling.cols; tc++) {
622 Dav1dTileState *const ts = &f->ts[tile_row_base + tc];
623 const int p2 = atomic_load(&ts->progress[p]);
624 if (p2 < t->recon_progress) goto next;
625 atomic_fetch_or(&f->task_thread.error, p2 == TILE_ERROR);
627 if (t->sby + 1 < f->sbh) {
628 // add sby+1 to list to replace this one
629 Dav1dTask *next_t = &t[1];
630 *next_t = *t;
631 next_t->sby++;
632 const int ntr = f->frame_thread.next_tile_row[p] + 1;
633 const int start = f->frame_hdr->tiling.row_start_sb[ntr];
634 if (next_t->sby == start)
635 f->frame_thread.next_tile_row[p] = ntr;
636 next_t->recon_progress = next_t->sby + 1;
637 insert_task(f, next_t, 0);
639 goto found;
640 } else if (t->type == DAV1D_TASK_TYPE_CDEF) {
641 atomic_uint *prog = f->frame_thread.copy_lpf_progress;
642 const int p1 = atomic_load(&prog[(t->sby - 1) >> 5]);
643 if (p1 & (1U << ((t->sby - 1) & 31)))
644 goto found;
645 } else {
646 assert(t->deblock_progress);
647 const int p1 = atomic_load(&f->frame_thread.deblock_progress);
648 if (p1 >= t->deblock_progress) {
649 atomic_fetch_or(&f->task_thread.error, p1 == TILE_ERROR);
650 goto found;
653 next:
654 prev_t = t;
655 t = t->next;
656 f->task_thread.task_cur_prev = prev_t;
658 ttd->cur++;
660 if (reset_task_cur(c, ttd, UINT_MAX)) continue;
661 if (merge_pending(c)) continue;
662 park:
663 tc->task_thread.flushed = 1;
664 pthread_cond_signal(&tc->task_thread.td.cond);
665 // we want to be woken up next time progress is signaled
666 atomic_store(&ttd->cond_signaled, 0);
667 pthread_cond_wait(&ttd->cond, &ttd->lock);
668 tc->task_thread.flushed = 0;
669 reset_task_cur(c, ttd, UINT_MAX);
670 continue;
672 found:
673 // remove t from list
674 if (prev_t) prev_t->next = t->next;
675 else f->task_thread.task_head = t->next;
676 if (!t->next) f->task_thread.task_tail = prev_t;
677 if (t->type > DAV1D_TASK_TYPE_INIT_CDF && !f->task_thread.task_head)
678 ttd->cur++;
679 t->next = NULL;
680 // we don't need to check cond_signaled here, since we found a task
681 // after the last signal so we want to re-signal the next waiting thread
682 // and again won't need to signal after that
683 atomic_store(&ttd->cond_signaled, 1);
684 pthread_cond_signal(&ttd->cond);
685 pthread_mutex_unlock(&ttd->lock);
686 found_unlocked:;
687 const int flush = atomic_load(c->flush);
688 int error = atomic_fetch_or(&f->task_thread.error, flush) | flush;
690 // run it
691 tc->f = f;
692 int sby = t->sby;
693 switch (t->type) {
694 case DAV1D_TASK_TYPE_INIT: {
695 assert(c->n_fc > 1);
696 int res = dav1d_decode_frame_init(f);
697 int p1 = f->in_cdf.progress ? atomic_load(f->in_cdf.progress) : 1;
698 if (res || p1 == TILE_ERROR) {
699 pthread_mutex_lock(&ttd->lock);
700 abort_frame(f, res ? res : DAV1D_ERR(EINVAL));
701 reset_task_cur(c, ttd, t->frame_idx);
702 } else {
703 t->type = DAV1D_TASK_TYPE_INIT_CDF;
704 if (p1) goto found_unlocked;
705 add_pending(f, t);
706 pthread_mutex_lock(&ttd->lock);
708 continue;
710 case DAV1D_TASK_TYPE_INIT_CDF: {
711 assert(c->n_fc > 1);
712 int res = DAV1D_ERR(EINVAL);
713 if (!atomic_load(&f->task_thread.error))
714 res = dav1d_decode_frame_init_cdf(f);
715 if (f->frame_hdr->refresh_context && !f->task_thread.update_set) {
716 atomic_store(f->out_cdf.progress, res < 0 ? TILE_ERROR : 1);
718 if (!res) {
719 assert(c->n_fc > 1);
720 for (int p = 1; p <= 2; p++) {
721 const int res = dav1d_task_create_tile_sbrow(f, p, 0);
722 if (res) {
723 pthread_mutex_lock(&ttd->lock);
724 // memory allocation failed
725 atomic_store(&f->task_thread.done[2 - p], 1);
726 atomic_store(&f->task_thread.error, -1);
727 atomic_fetch_sub(&f->task_thread.task_counter,
728 f->frame_hdr->tiling.cols *
729 f->frame_hdr->tiling.rows + f->sbh);
730 atomic_store(&f->sr_cur.progress[p - 1], FRAME_ERROR);
731 if (p == 2 && atomic_load(&f->task_thread.done[1])) {
732 assert(!atomic_load(&f->task_thread.task_counter));
733 dav1d_decode_frame_exit(f, DAV1D_ERR(ENOMEM));
734 f->n_tile_data = 0;
735 pthread_cond_signal(&f->task_thread.cond);
736 } else {
737 pthread_mutex_unlock(&ttd->lock);
741 pthread_mutex_lock(&ttd->lock);
742 } else {
743 pthread_mutex_lock(&ttd->lock);
744 abort_frame(f, res);
745 reset_task_cur(c, ttd, t->frame_idx);
746 atomic_store(&f->task_thread.init_done, 1);
748 continue;
750 case DAV1D_TASK_TYPE_TILE_ENTROPY:
751 case DAV1D_TASK_TYPE_TILE_RECONSTRUCTION: {
752 const int p = t->type == DAV1D_TASK_TYPE_TILE_ENTROPY;
753 const int tile_idx = (int)(t - f->task_thread.tile_tasks[p]);
754 Dav1dTileState *const ts = &f->ts[tile_idx];
756 tc->ts = ts;
757 tc->by = sby << f->sb_shift;
758 const int uses_2pass = c->n_fc > 1;
759 tc->frame_thread.pass = !uses_2pass ? 0 :
760 1 + (t->type == DAV1D_TASK_TYPE_TILE_RECONSTRUCTION);
761 if (!error) error = dav1d_decode_tile_sbrow(tc);
762 const int progress = error ? TILE_ERROR : 1 + sby;
764 // signal progress
765 atomic_fetch_or(&f->task_thread.error, error);
766 if (((sby + 1) << f->sb_shift) < ts->tiling.row_end) {
767 t->sby++;
768 t->deps_skip = 0;
769 if (!check_tile(t, f, uses_2pass)) {
770 atomic_store(&ts->progress[p], progress);
771 reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
772 if (!atomic_fetch_or(&ttd->cond_signaled, 1))
773 pthread_cond_signal(&ttd->cond);
774 goto found_unlocked;
776 atomic_store(&ts->progress[p], progress);
777 add_pending(f, t);
778 pthread_mutex_lock(&ttd->lock);
779 } else {
780 pthread_mutex_lock(&ttd->lock);
781 atomic_store(&ts->progress[p], progress);
782 reset_task_cur(c, ttd, t->frame_idx);
783 error = atomic_load(&f->task_thread.error);
784 if (f->frame_hdr->refresh_context &&
785 tc->frame_thread.pass <= 1 && f->task_thread.update_set &&
786 f->frame_hdr->tiling.update == tile_idx)
788 if (!error)
789 dav1d_cdf_thread_update(f->frame_hdr, f->out_cdf.data.cdf,
790 &f->ts[f->frame_hdr->tiling.update].cdf);
791 if (c->n_fc > 1)
792 atomic_store(f->out_cdf.progress, error ? TILE_ERROR : 1);
794 if (atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1 == 0 &&
795 atomic_load(&f->task_thread.done[0]) &&
796 (!uses_2pass || atomic_load(&f->task_thread.done[1])))
798 error = atomic_load(&f->task_thread.error);
799 dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
800 error ? DAV1D_ERR(ENOMEM) : 0);
801 f->n_tile_data = 0;
802 pthread_cond_signal(&f->task_thread.cond);
804 assert(atomic_load(&f->task_thread.task_counter) >= 0);
805 if (!atomic_fetch_or(&ttd->cond_signaled, 1))
806 pthread_cond_signal(&ttd->cond);
808 continue;
810 case DAV1D_TASK_TYPE_DEBLOCK_COLS:
811 if (!atomic_load(&f->task_thread.error))
812 f->bd_fn.filter_sbrow_deblock_cols(f, sby);
813 if (ensure_progress(ttd, f, t, DAV1D_TASK_TYPE_DEBLOCK_ROWS,
814 &f->frame_thread.deblock_progress,
815 &t->deblock_progress)) continue;
816 // fall-through
817 case DAV1D_TASK_TYPE_DEBLOCK_ROWS:
818 if (!atomic_load(&f->task_thread.error))
819 f->bd_fn.filter_sbrow_deblock_rows(f, sby);
820 // signal deblock progress
821 if (f->frame_hdr->loopfilter.level_y[0] ||
822 f->frame_hdr->loopfilter.level_y[1])
824 error = atomic_load(&f->task_thread.error);
825 atomic_store(&f->frame_thread.deblock_progress,
826 error ? TILE_ERROR : sby + 1);
827 reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
828 if (!atomic_fetch_or(&ttd->cond_signaled, 1))
829 pthread_cond_signal(&ttd->cond);
830 } else if (f->seq_hdr->cdef || f->lf.restore_planes) {
831 atomic_fetch_or(&f->frame_thread.copy_lpf_progress[sby >> 5],
832 1U << (sby & 31));
833 // CDEF needs the top buffer to be saved by lr_copy_lpf of the
834 // previous sbrow
835 if (sby) {
836 int prog = atomic_load(&f->frame_thread.copy_lpf_progress[(sby - 1) >> 5]);
837 if (~prog & (1U << ((sby - 1) & 31))) {
838 t->type = DAV1D_TASK_TYPE_CDEF;
839 t->recon_progress = t->deblock_progress = 0;
840 add_pending(f, t);
841 pthread_mutex_lock(&ttd->lock);
842 continue;
846 // fall-through
847 case DAV1D_TASK_TYPE_CDEF:
848 if (f->seq_hdr->cdef) {
849 if (!atomic_load(&f->task_thread.error))
850 f->bd_fn.filter_sbrow_cdef(tc, sby);
851 reset_task_cur_async(ttd, t->frame_idx, c->n_fc);
852 if (!atomic_fetch_or(&ttd->cond_signaled, 1))
853 pthread_cond_signal(&ttd->cond);
855 // fall-through
856 case DAV1D_TASK_TYPE_SUPER_RESOLUTION:
857 if (f->frame_hdr->width[0] != f->frame_hdr->width[1])
858 if (!atomic_load(&f->task_thread.error))
859 f->bd_fn.filter_sbrow_resize(f, sby);
860 // fall-through
861 case DAV1D_TASK_TYPE_LOOP_RESTORATION:
862 if (!atomic_load(&f->task_thread.error) && f->lf.restore_planes)
863 f->bd_fn.filter_sbrow_lr(f, sby);
864 // fall-through
865 case DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS:
866 // dummy to cover for no post-filters
867 case DAV1D_TASK_TYPE_ENTROPY_PROGRESS:
868 // dummy to convert tile progress to frame
869 break;
870 default: abort();
872 // if task completed [typically LR], signal picture progress as per below
873 const int uses_2pass = c->n_fc > 1;
874 const int sbh = f->sbh;
875 const int sbsz = f->sb_step * 4;
876 if (t->type == DAV1D_TASK_TYPE_ENTROPY_PROGRESS) {
877 error = atomic_load(&f->task_thread.error);
878 const unsigned y = sby + 1 == sbh ? UINT_MAX : (unsigned)(sby + 1) * sbsz;
879 assert(c->n_fc > 1);
880 if (f->sr_cur.p.data[0] /* upon flush, this can be free'ed already */)
881 atomic_store(&f->sr_cur.progress[0], error ? FRAME_ERROR : y);
882 atomic_store(&f->frame_thread.entropy_progress,
883 error ? TILE_ERROR : sby + 1);
884 if (sby + 1 == sbh)
885 atomic_store(&f->task_thread.done[1], 1);
886 pthread_mutex_lock(&ttd->lock);
887 const int num_tasks = atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1;
888 if (sby + 1 < sbh && num_tasks) {
889 reset_task_cur(c, ttd, t->frame_idx);
890 continue;
892 if (!num_tasks && atomic_load(&f->task_thread.done[0]) &&
893 atomic_load(&f->task_thread.done[1]))
895 error = atomic_load(&f->task_thread.error);
896 dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
897 error ? DAV1D_ERR(ENOMEM) : 0);
898 f->n_tile_data = 0;
899 pthread_cond_signal(&f->task_thread.cond);
901 reset_task_cur(c, ttd, t->frame_idx);
902 continue;
904 // t->type != DAV1D_TASK_TYPE_ENTROPY_PROGRESS
905 atomic_fetch_or(&f->frame_thread.frame_progress[sby >> 5],
906 1U << (sby & 31));
907 pthread_mutex_lock(&f->task_thread.lock);
908 sby = get_frame_progress(c, f);
909 error = atomic_load(&f->task_thread.error);
910 const unsigned y = sby + 1 == sbh ? UINT_MAX : (unsigned)(sby + 1) * sbsz;
911 if (c->n_fc > 1 && f->sr_cur.p.data[0] /* upon flush, this can be free'ed already */)
912 atomic_store(&f->sr_cur.progress[1], error ? FRAME_ERROR : y);
913 pthread_mutex_unlock(&f->task_thread.lock);
914 if (sby + 1 == sbh)
915 atomic_store(&f->task_thread.done[0], 1);
916 pthread_mutex_lock(&ttd->lock);
917 const int num_tasks = atomic_fetch_sub(&f->task_thread.task_counter, 1) - 1;
918 if (sby + 1 < sbh && num_tasks) {
919 reset_task_cur(c, ttd, t->frame_idx);
920 continue;
922 if (!num_tasks && atomic_load(&f->task_thread.done[0]) &&
923 (!uses_2pass || atomic_load(&f->task_thread.done[1])))
925 error = atomic_load(&f->task_thread.error);
926 dav1d_decode_frame_exit(f, error == 1 ? DAV1D_ERR(EINVAL) :
927 error ? DAV1D_ERR(ENOMEM) : 0);
928 f->n_tile_data = 0;
929 pthread_cond_signal(&f->task_thread.cond);
931 reset_task_cur(c, ttd, t->frame_idx);
933 pthread_mutex_unlock(&ttd->lock);
935 return NULL;