2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "vp9/encoder/vp9_encodeframe.h"
12 #include "vp9/encoder/vp9_encoder.h"
13 #include "vp9/encoder/vp9_ethread.h"
15 static void accumulate_frame_counts(VP9_COMMON
*cm
, ThreadData
*td
) {
18 for (i
= 0; i
< BLOCK_SIZE_GROUPS
; i
++)
19 for (j
= 0; j
< INTRA_MODES
; j
++)
20 cm
->counts
.y_mode
[i
][j
] += td
->counts
->y_mode
[i
][j
];
22 for (i
= 0; i
< INTRA_MODES
; i
++)
23 for (j
= 0; j
< INTRA_MODES
; j
++)
24 cm
->counts
.uv_mode
[i
][j
] += td
->counts
->uv_mode
[i
][j
];
26 for (i
= 0; i
< PARTITION_CONTEXTS
; i
++)
27 for (j
= 0; j
< PARTITION_TYPES
; j
++)
28 cm
->counts
.partition
[i
][j
] += td
->counts
->partition
[i
][j
];
30 for (i
= 0; i
< TX_SIZES
; i
++)
31 for (j
= 0; j
< PLANE_TYPES
; j
++)
32 for (k
= 0; k
< REF_TYPES
; k
++)
33 for (l
= 0; l
< COEF_BANDS
; l
++)
34 for (m
= 0; m
< COEFF_CONTEXTS
; m
++)
35 cm
->counts
.eob_branch
[i
][j
][k
][l
][m
] +=
36 td
->counts
->eob_branch
[i
][j
][k
][l
][m
];
37 // cm->counts.coef is only updated at frame level, so not need
38 // to accumulate it here.
39 // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
40 // cm->counts.coef[i][j][k][l][m][n] +=
41 // td->counts->coef[i][j][k][l][m][n];
43 for (i
= 0; i
< SWITCHABLE_FILTER_CONTEXTS
; i
++)
44 for (j
= 0; j
< SWITCHABLE_FILTERS
; j
++)
45 cm
->counts
.switchable_interp
[i
][j
] += td
->counts
->switchable_interp
[i
][j
];
47 for (i
= 0; i
< INTER_MODE_CONTEXTS
; i
++)
48 for (j
= 0; j
< INTER_MODES
; j
++)
49 cm
->counts
.inter_mode
[i
][j
] += td
->counts
->inter_mode
[i
][j
];
51 for (i
= 0; i
< INTRA_INTER_CONTEXTS
; i
++)
52 for (j
= 0; j
< 2; j
++)
53 cm
->counts
.intra_inter
[i
][j
] += td
->counts
->intra_inter
[i
][j
];
55 for (i
= 0; i
< COMP_INTER_CONTEXTS
; i
++)
56 for (j
= 0; j
< 2; j
++)
57 cm
->counts
.comp_inter
[i
][j
] += td
->counts
->comp_inter
[i
][j
];
59 for (i
= 0; i
< REF_CONTEXTS
; i
++)
60 for (j
= 0; j
< 2; j
++)
61 for (k
= 0; k
< 2; k
++)
62 cm
->counts
.single_ref
[i
][j
][k
] += td
->counts
->single_ref
[i
][j
][k
];
64 for (i
= 0; i
< REF_CONTEXTS
; i
++)
65 for (j
= 0; j
< 2; j
++)
66 cm
->counts
.comp_ref
[i
][j
] += td
->counts
->comp_ref
[i
][j
];
68 for (i
= 0; i
< TX_SIZE_CONTEXTS
; i
++) {
69 for (j
= 0; j
< TX_SIZES
; j
++)
70 cm
->counts
.tx
.p32x32
[i
][j
] += td
->counts
->tx
.p32x32
[i
][j
];
72 for (j
= 0; j
< TX_SIZES
- 1; j
++)
73 cm
->counts
.tx
.p16x16
[i
][j
] += td
->counts
->tx
.p16x16
[i
][j
];
75 for (j
= 0; j
< TX_SIZES
- 2; j
++)
76 cm
->counts
.tx
.p8x8
[i
][j
] += td
->counts
->tx
.p8x8
[i
][j
];
79 for (i
= 0; i
< SKIP_CONTEXTS
; i
++)
80 for (j
= 0; j
< 2; j
++)
81 cm
->counts
.skip
[i
][j
] += td
->counts
->skip
[i
][j
];
83 for (i
= 0; i
< MV_JOINTS
; i
++)
84 cm
->counts
.mv
.joints
[i
] += td
->counts
->mv
.joints
[i
];
86 for (k
= 0; k
< 2; k
++) {
87 nmv_component_counts
*comps
= &cm
->counts
.mv
.comps
[k
];
88 nmv_component_counts
*comps_t
= &td
->counts
->mv
.comps
[k
];
90 for (i
= 0; i
< 2; i
++) {
91 comps
->sign
[i
] += comps_t
->sign
[i
];
92 comps
->class0_hp
[i
] += comps_t
->class0_hp
[i
];
93 comps
->hp
[i
] += comps_t
->hp
[i
];
96 for (i
= 0; i
< MV_CLASSES
; i
++)
97 comps
->classes
[i
] += comps_t
->classes
[i
];
99 for (i
= 0; i
< CLASS0_SIZE
; i
++) {
100 comps
->class0
[i
] += comps_t
->class0
[i
];
101 for (j
= 0; j
< MV_FP_SIZE
; j
++)
102 comps
->class0_fp
[i
][j
] += comps_t
->class0_fp
[i
][j
];
105 for (i
= 0; i
< MV_OFFSET_BITS
; i
++)
106 for (j
= 0; j
< 2; j
++)
107 comps
->bits
[i
][j
] += comps_t
->bits
[i
][j
];
109 for (i
= 0; i
< MV_FP_SIZE
; i
++)
110 comps
->fp
[i
] += comps_t
->fp
[i
];
114 static void accumulate_rd_opt(ThreadData
*td
, ThreadData
*td_t
) {
115 int i
, j
, k
, l
, m
, n
;
117 for (i
= 0; i
< REFERENCE_MODES
; i
++)
118 td
->rd_counts
.comp_pred_diff
[i
] += td_t
->rd_counts
.comp_pred_diff
[i
];
120 for (i
= 0; i
< SWITCHABLE_FILTER_CONTEXTS
; i
++)
121 td
->rd_counts
.filter_diff
[i
] += td_t
->rd_counts
.filter_diff
[i
];
123 for (i
= 0; i
< TX_MODES
; i
++)
124 td
->rd_counts
.tx_select_diff
[i
] += td_t
->rd_counts
.tx_select_diff
[i
];
126 for (i
= 0; i
< TX_SIZES
; i
++)
127 for (j
= 0; j
< PLANE_TYPES
; j
++)
128 for (k
= 0; k
< REF_TYPES
; k
++)
129 for (l
= 0; l
< COEF_BANDS
; l
++)
130 for (m
= 0; m
< COEFF_CONTEXTS
; m
++)
131 for (n
= 0; n
< ENTROPY_TOKENS
; n
++)
132 td
->rd_counts
.coef_counts
[i
][j
][k
][l
][m
][n
] +=
133 td_t
->rd_counts
.coef_counts
[i
][j
][k
][l
][m
][n
];
136 static int enc_worker_hook(EncWorkerData
*const thread_data
, void *unused
) {
137 VP9_COMP
*const cpi
= thread_data
->cpi
;
138 const VP9_COMMON
*const cm
= &cpi
->common
;
139 const int tile_cols
= 1 << cm
->log2_tile_cols
;
140 const int tile_rows
= 1 << cm
->log2_tile_rows
;
145 for (t
= thread_data
->start
; t
< tile_rows
* tile_cols
;
146 t
+= cpi
->num_workers
) {
147 int tile_row
= t
/ tile_cols
;
148 int tile_col
= t
% tile_cols
;
150 vp9_encode_tile(cpi
, thread_data
->td
, tile_row
, tile_col
);
156 void vp9_encode_tiles_mt(VP9_COMP
*cpi
) {
157 VP9_COMMON
*const cm
= &cpi
->common
;
158 const int tile_cols
= 1 << cm
->log2_tile_cols
;
159 const VP9WorkerInterface
*const winterface
= vp9_get_worker_interface();
160 const int num_workers
= MIN(cpi
->oxcf
.max_threads
, tile_cols
);
163 vp9_init_tile_data(cpi
);
165 // Only run once to create threads and allocate thread data.
166 if (cpi
->num_workers
== 0) {
167 CHECK_MEM_ERROR(cm
, cpi
->workers
,
168 vpx_malloc(num_workers
* sizeof(*cpi
->workers
)));
170 CHECK_MEM_ERROR(cm
, cpi
->tile_thr_data
,
171 vpx_calloc(num_workers
, sizeof(*cpi
->tile_thr_data
)));
173 for (i
= 0; i
< num_workers
; i
++) {
174 VP9Worker
*const worker
= &cpi
->workers
[i
];
175 EncWorkerData
*thread_data
= &cpi
->tile_thr_data
[i
];
178 winterface
->init(worker
);
180 if (i
< num_workers
- 1) {
181 thread_data
->cpi
= cpi
;
183 // Allocate thread data.
184 CHECK_MEM_ERROR(cm
, thread_data
->td
,
185 vpx_memalign(32, sizeof(*thread_data
->td
)));
186 vp9_zero(*thread_data
->td
);
189 thread_data
->td
->leaf_tree
= NULL
;
190 thread_data
->td
->pc_tree
= NULL
;
191 vp9_setup_pc_tree(cm
, thread_data
->td
);
193 // Allocate frame counters in thread data.
194 CHECK_MEM_ERROR(cm
, thread_data
->td
->counts
,
195 vpx_calloc(1, sizeof(*thread_data
->td
->counts
)));
198 if (!winterface
->reset(worker
))
199 vpx_internal_error(&cm
->error
, VPX_CODEC_ERROR
,
200 "Tile encoder thread creation failed");
202 // Main thread acts as a worker and uses the thread data in cpi.
203 thread_data
->cpi
= cpi
;
204 thread_data
->td
= &cpi
->td
;
207 winterface
->sync(worker
);
211 for (i
= 0; i
< num_workers
; i
++) {
212 VP9Worker
*const worker
= &cpi
->workers
[i
];
213 EncWorkerData
*thread_data
;
215 worker
->hook
= (VP9WorkerHook
)enc_worker_hook
;
216 worker
->data1
= &cpi
->tile_thr_data
[i
];
217 worker
->data2
= NULL
;
218 thread_data
= (EncWorkerData
*)worker
->data1
;
220 // Before encoding a frame, copy the thread data from cpi.
221 thread_data
->td
->mb
= cpi
->td
.mb
;
222 thread_data
->td
->rd_counts
= cpi
->td
.rd_counts
;
223 vpx_memcpy(thread_data
->td
->counts
, &cpi
->common
.counts
,
224 sizeof(cpi
->common
.counts
));
226 // Handle use_nonrd_pick_mode case.
227 if (cpi
->sf
.use_nonrd_pick_mode
) {
228 MACROBLOCK
*const x
= &thread_data
->td
->mb
;
229 MACROBLOCKD
*const xd
= &x
->e_mbd
;
230 struct macroblock_plane
*const p
= x
->plane
;
231 struct macroblockd_plane
*const pd
= xd
->plane
;
232 PICK_MODE_CONTEXT
*ctx
= &thread_data
->td
->pc_root
->none
;
235 for (j
= 0; j
< MAX_MB_PLANE
; ++j
) {
236 p
[j
].coeff
= ctx
->coeff_pbuf
[j
][0];
237 p
[j
].qcoeff
= ctx
->qcoeff_pbuf
[j
][0];
238 pd
[j
].dqcoeff
= ctx
->dqcoeff_pbuf
[j
][0];
239 p
[j
].eobs
= ctx
->eobs_pbuf
[j
][0];
245 for (i
= 0; i
< num_workers
; i
++) {
246 VP9Worker
*const worker
= &cpi
->workers
[i
];
247 EncWorkerData
*const thread_data
= (EncWorkerData
*)worker
->data1
;
249 // Set the starting tile for each thread.
250 thread_data
->start
= i
;
252 if (i
== num_workers
- 1)
253 winterface
->execute(worker
);
255 winterface
->launch(worker
);
259 for (i
= 0; i
< num_workers
; i
++) {
260 VP9Worker
*const worker
= &cpi
->workers
[i
];
261 winterface
->sync(worker
);
264 for (i
= 0; i
< num_workers
; i
++) {
265 VP9Worker
*const worker
= &cpi
->workers
[i
];
266 EncWorkerData
*const thread_data
= (EncWorkerData
*)worker
->data1
;
268 // Accumulate counters.
269 if (i
< num_workers
- 1) {
270 accumulate_frame_counts(&cpi
->common
, thread_data
->td
);
271 accumulate_rd_opt(&cpi
->td
, thread_data
->td
);