2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "vp9/encoder/vp9_encodeframe.h"
12 #include "vp9/encoder/vp9_encoder.h"
13 #include "vp9/encoder/vp9_ethread.h"
15 static void accumulate_rd_opt(ThreadData
*td
, ThreadData
*td_t
) {
18 for (i
= 0; i
< REFERENCE_MODES
; i
++)
19 td
->rd_counts
.comp_pred_diff
[i
] += td_t
->rd_counts
.comp_pred_diff
[i
];
21 for (i
= 0; i
< SWITCHABLE_FILTER_CONTEXTS
; i
++)
22 td
->rd_counts
.filter_diff
[i
] += td_t
->rd_counts
.filter_diff
[i
];
24 for (i
= 0; i
< TX_MODES
; i
++)
25 td
->rd_counts
.tx_select_diff
[i
] += td_t
->rd_counts
.tx_select_diff
[i
];
27 for (i
= 0; i
< TX_SIZES
; i
++)
28 for (j
= 0; j
< PLANE_TYPES
; j
++)
29 for (k
= 0; k
< REF_TYPES
; k
++)
30 for (l
= 0; l
< COEF_BANDS
; l
++)
31 for (m
= 0; m
< COEFF_CONTEXTS
; m
++)
32 for (n
= 0; n
< ENTROPY_TOKENS
; n
++)
33 td
->rd_counts
.coef_counts
[i
][j
][k
][l
][m
][n
] +=
34 td_t
->rd_counts
.coef_counts
[i
][j
][k
][l
][m
][n
];
37 static int enc_worker_hook(EncWorkerData
*const thread_data
, void *unused
) {
38 VP9_COMP
*const cpi
= thread_data
->cpi
;
39 const VP9_COMMON
*const cm
= &cpi
->common
;
40 const int tile_cols
= 1 << cm
->log2_tile_cols
;
41 const int tile_rows
= 1 << cm
->log2_tile_rows
;
46 for (t
= thread_data
->start
; t
< tile_rows
* tile_cols
;
47 t
+= cpi
->num_workers
) {
48 int tile_row
= t
/ tile_cols
;
49 int tile_col
= t
% tile_cols
;
51 vp9_encode_tile(cpi
, thread_data
->td
, tile_row
, tile_col
);
57 static int get_max_tile_cols(VP9_COMP
*cpi
) {
58 const int aligned_width
= ALIGN_POWER_OF_TWO(cpi
->oxcf
.width
, MI_SIZE_LOG2
);
59 int mi_cols
= aligned_width
>> MI_SIZE_LOG2
;
60 int min_log2_tile_cols
, max_log2_tile_cols
;
63 vp9_get_tile_n_bits(mi_cols
, &min_log2_tile_cols
, &max_log2_tile_cols
);
64 log2_tile_cols
= clamp(cpi
->oxcf
.tile_columns
,
65 min_log2_tile_cols
, max_log2_tile_cols
);
66 return (1 << log2_tile_cols
);
69 void vp9_encode_tiles_mt(VP9_COMP
*cpi
) {
70 VP9_COMMON
*const cm
= &cpi
->common
;
71 const int tile_cols
= 1 << cm
->log2_tile_cols
;
72 const VPxWorkerInterface
*const winterface
= vpx_get_worker_interface();
73 const int num_workers
= MIN(cpi
->oxcf
.max_threads
, tile_cols
);
76 vp9_init_tile_data(cpi
);
78 // Only run once to create threads and allocate thread data.
79 if (cpi
->num_workers
== 0) {
80 int allocated_workers
= num_workers
;
82 // While using SVC, we need to allocate threads according to the highest
85 int max_tile_cols
= get_max_tile_cols(cpi
);
86 allocated_workers
= MIN(cpi
->oxcf
.max_threads
, max_tile_cols
);
89 CHECK_MEM_ERROR(cm
, cpi
->workers
,
90 vpx_malloc(allocated_workers
* sizeof(*cpi
->workers
)));
92 CHECK_MEM_ERROR(cm
, cpi
->tile_thr_data
,
93 vpx_calloc(allocated_workers
,
94 sizeof(*cpi
->tile_thr_data
)));
96 for (i
= 0; i
< allocated_workers
; i
++) {
97 VPxWorker
*const worker
= &cpi
->workers
[i
];
98 EncWorkerData
*thread_data
= &cpi
->tile_thr_data
[i
];
101 winterface
->init(worker
);
103 if (i
< allocated_workers
- 1) {
104 thread_data
->cpi
= cpi
;
106 // Allocate thread data.
107 CHECK_MEM_ERROR(cm
, thread_data
->td
,
108 vpx_memalign(32, sizeof(*thread_data
->td
)));
109 vp9_zero(*thread_data
->td
);
112 thread_data
->td
->leaf_tree
= NULL
;
113 thread_data
->td
->pc_tree
= NULL
;
114 vp9_setup_pc_tree(cm
, thread_data
->td
);
116 // Allocate frame counters in thread data.
117 CHECK_MEM_ERROR(cm
, thread_data
->td
->counts
,
118 vpx_calloc(1, sizeof(*thread_data
->td
->counts
)));
121 if (!winterface
->reset(worker
))
122 vpx_internal_error(&cm
->error
, VPX_CODEC_ERROR
,
123 "Tile encoder thread creation failed");
125 // Main thread acts as a worker and uses the thread data in cpi.
126 thread_data
->cpi
= cpi
;
127 thread_data
->td
= &cpi
->td
;
130 winterface
->sync(worker
);
134 for (i
= 0; i
< num_workers
; i
++) {
135 VPxWorker
*const worker
= &cpi
->workers
[i
];
136 EncWorkerData
*thread_data
;
138 worker
->hook
= (VPxWorkerHook
)enc_worker_hook
;
139 worker
->data1
= &cpi
->tile_thr_data
[i
];
140 worker
->data2
= NULL
;
141 thread_data
= (EncWorkerData
*)worker
->data1
;
143 // Before encoding a frame, copy the thread data from cpi.
144 if (thread_data
->td
!= &cpi
->td
) {
145 thread_data
->td
->mb
= cpi
->td
.mb
;
146 thread_data
->td
->rd_counts
= cpi
->td
.rd_counts
;
148 if (thread_data
->td
->counts
!= &cpi
->common
.counts
) {
149 memcpy(thread_data
->td
->counts
, &cpi
->common
.counts
,
150 sizeof(cpi
->common
.counts
));
153 // Handle use_nonrd_pick_mode case.
154 if (cpi
->sf
.use_nonrd_pick_mode
) {
155 MACROBLOCK
*const x
= &thread_data
->td
->mb
;
156 MACROBLOCKD
*const xd
= &x
->e_mbd
;
157 struct macroblock_plane
*const p
= x
->plane
;
158 struct macroblockd_plane
*const pd
= xd
->plane
;
159 PICK_MODE_CONTEXT
*ctx
= &thread_data
->td
->pc_root
->none
;
162 for (j
= 0; j
< MAX_MB_PLANE
; ++j
) {
163 p
[j
].coeff
= ctx
->coeff_pbuf
[j
][0];
164 p
[j
].qcoeff
= ctx
->qcoeff_pbuf
[j
][0];
165 pd
[j
].dqcoeff
= ctx
->dqcoeff_pbuf
[j
][0];
166 p
[j
].eobs
= ctx
->eobs_pbuf
[j
][0];
172 for (i
= 0; i
< num_workers
; i
++) {
173 VPxWorker
*const worker
= &cpi
->workers
[i
];
174 EncWorkerData
*const thread_data
= (EncWorkerData
*)worker
->data1
;
176 // Set the starting tile for each thread.
177 thread_data
->start
= i
;
179 if (i
== cpi
->num_workers
- 1)
180 winterface
->execute(worker
);
182 winterface
->launch(worker
);
186 for (i
= 0; i
< num_workers
; i
++) {
187 VPxWorker
*const worker
= &cpi
->workers
[i
];
188 winterface
->sync(worker
);
191 for (i
= 0; i
< num_workers
; i
++) {
192 VPxWorker
*const worker
= &cpi
->workers
[i
];
193 EncWorkerData
*const thread_data
= (EncWorkerData
*)worker
->data1
;
195 // Accumulate counters.
196 if (i
< cpi
->num_workers
- 1) {
197 vp9_accumulate_frame_counts(cm
, thread_data
->td
->counts
, 0);
198 accumulate_rd_opt(&cpi
->td
, thread_data
->td
);