3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
9 Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
10 Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
17 #include "oclnv12toyuv.h"
20 * It creates are opencl bufs w is input frame width, h is input frame height
22 static int hb_nv12toyuv_create_cl_buf( KernelEnv
*kenv
, int w
, int h
, hb_va_dxva2_t
*dxva2
);
25 * It creates are opencl kernel. kernel name is nv12toyuv
27 static int hb_nv12toyuv_create_cl_kernel( KernelEnv
*kenv
, hb_va_dxva2_t
*dxva2
);
30 * It set opencl arg, input data,output data, input width, output height
32 static int hb_nv12toyuv_setkernelarg( KernelEnv
*kenv
, int w
, int h
, hb_va_dxva2_t
*dxva2
);
35 * It initialize nv12 to yuv kernel.
37 static int hb_init_nv12toyuv_ocl( KernelEnv
*kenv
, int w
, int h
, hb_va_dxva2_t
*dxva2
);
40 * Run nv12 to yuv kernel.
42 static int hb_nv12toyuv( void **userdata
, KernelEnv
*kenv
);
45 * register nv12 to yuv kernel.
47 static int hb_nv12toyuv_reg_kernel( void );
50 * It creates are opencl bufs w is input frame width, h is input frame height
52 static int hb_nv12toyuv_create_cl_buf( KernelEnv
*kenv
, int w
, int h
, hb_va_dxva2_t
*dxva2
)
56 hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available");
60 cl_int status
= CL_SUCCESS
;
61 int in_bytes
= w
*h
*3/2;
62 HB_OCL_BUF_CREATE(hb_ocl
, dxva2
->cl_mem_nv12
, CL_MEM_READ_ONLY
|CL_MEM_ALLOC_HOST_PTR
, in_bytes
);
63 HB_OCL_BUF_CREATE(hb_ocl
, dxva2
->cl_mem_yuv
, CL_MEM_READ_WRITE
|CL_MEM_ALLOC_HOST_PTR
, in_bytes
);
68 * It creates are opencl kernel. kernel name is nv12toyuv
70 static int hb_nv12toyuv_create_cl_kernel( KernelEnv
*kenv
, hb_va_dxva2_t
*dxva2
)
74 hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available");
79 dxva2
->nv12toyuv
= hb_ocl
->clCreateKernel(kenv
->program
, "nv12toyuv", &ret
);
84 * It set opencl arg, input data,output data, input width, output height
86 static int hb_nv12toyuv_setkernelarg( KernelEnv
*kenv
, int w
, int h
, hb_va_dxva2_t
*dxva2
)
89 kenv
->kernel
= dxva2
->nv12toyuv
;
93 hb_error("hb_nv12toyuv_setkernelarg: OpenCL support not available");
97 HB_OCL_CHECK(hb_ocl
->clSetKernelArg
, kenv
->kernel
, arg
++, sizeof(cl_mem
), &dxva2
->cl_mem_nv12
);
98 HB_OCL_CHECK(hb_ocl
->clSetKernelArg
, kenv
->kernel
, arg
++, sizeof(cl_mem
), &dxva2
->cl_mem_yuv
);
99 HB_OCL_CHECK(hb_ocl
->clSetKernelArg
, kenv
->kernel
, arg
++, sizeof(int), &w
);
100 HB_OCL_CHECK(hb_ocl
->clSetKernelArg
, kenv
->kernel
, arg
++, sizeof(int), &h
);
105 * It initialize nv12 to yuv kernel.
107 static int hb_init_nv12toyuv_ocl( KernelEnv
*kenv
, int w
, int h
, hb_va_dxva2_t
*dxva2
)
109 if( !dxva2
->nv12toyuv
)
111 if( hb_nv12toyuv_create_cl_buf( kenv
, w
, h
, dxva2
) )
113 hb_log( "OpenCL: nv12toyuv_create_cl_buf fail" );
116 if (!dxva2
->nv12toyuv_tmp_in
)
118 dxva2
->nv12toyuv_tmp_in
= malloc (w
*h
*3/2);
121 if (!dxva2
->nv12toyuv_tmp_out
)
123 dxva2
->nv12toyuv_tmp_out
= malloc (w
*h
*3/2);
126 hb_nv12toyuv_create_cl_kernel( kenv
, dxva2
);
139 static uint8_t *copy_plane( uint8_t *dst
, uint8_t* src
, int dstride
, int sstride
,
142 if ( dstride
== sstride
)
144 memcpy( dst
, src
, dstride
* h
);
145 return dst
+ dstride
* h
;
148 int lbytes
= dstride
<= sstride
? dstride
: sstride
;
151 memcpy( dst
, src
, lbytes
);
160 * Run nv12 to yuv kernel.
162 static int hb_nv12toyuv( void **userdata
, KernelEnv
*kenv
)
165 int w
= (int)userdata
[0];
166 int h
= (int)userdata
[1];
167 uint8_t *bufi1
= userdata
[2];
168 int *crop
= userdata
[3];
169 hb_va_dxva2_t
*dxva2
= userdata
[4];
171 uint8_t *bufi2
= userdata
[5];
172 int p
= (int)userdata
[6];
173 int decomb
= (int)userdata
[7];
174 int detelecine
= (int)userdata
[8];
176 if( hb_init_nv12toyuv_ocl( kenv
, w
, h
, dxva2
) )
181 if( hb_nv12toyuv_setkernelarg( kenv
, w
, h
, dxva2
) )
188 hb_error("hb_nv12toyuv: OpenCL support not available");
192 int in_bytes
= w
*h
*3/2;
195 void *data
= hb_ocl
->clEnqueueMapBuffer(kenv
->command_queue
,
197 CL_MAP_WRITE_INVALIDATE_REGION
,
198 CL_TRUE
, 0, in_bytes
, 0, NULL
, NULL
, NULL
);
200 for ( i
= 0; i
< dxva2
->height
; i
++ )
202 memcpy( data
+ i
* dxva2
->width
, bufi1
+ i
* p
, dxva2
->width
);
203 if ( i
< dxva2
->height
>> 1 )
205 memcpy( data
+ ( dxva2
->width
* dxva2
->height
) + i
* dxva2
->width
, bufi2
+ i
* p
, dxva2
->width
);
208 hb_ocl
->clEnqueueUnmapMemObject(kenv
->command_queue
, dxva2
->cl_mem_nv12
,
209 data
, 0, NULL
, NULL
);
213 uint8_t *tmp
= (uint8_t*)malloc( dxva2
->width
* dxva2
->height
* 3 / 2 );
214 for( i
= 0; i
< dxva2
->height
; i
++ )
216 memcpy( tmp
+ i
* dxva2
->width
, bufi1
+ i
* p
, dxva2
->width
);
217 if( i
< dxva2
->height
>> 1 )
219 memcpy( tmp
+ (dxva2
->width
* dxva2
->height
) + i
* dxva2
->width
, bufi2
+ i
* p
, dxva2
->width
);
222 HB_OCL_CHECK(hb_ocl
->clEnqueueWriteBuffer
, kenv
->command_queue
,
223 dxva2
->cl_mem_nv12
, CL_TRUE
, 0, in_bytes
, tmp
, 0, NULL
, NULL
);
227 size_t gdim
[2] = {w
>>1, h
>>1};
228 HB_OCL_CHECK(hb_ocl
->clEnqueueNDRangeKernel
, kenv
->command_queue
,
229 kenv
->kernel
, 2, NULL
, gdim
, NULL
, 0, NULL
, NULL
);
231 if( (crop
[0] || crop
[1] || crop
[2] || crop
[3]) && (decomb
== 0) && (detelecine
== 0) )
235 hb_ocl
->clEnqueueReadBuffer(kenv
->command_queue
, dxva2
->cl_mem_yuv
,
236 CL_TRUE
, 0, in_bytes
, dxva2
->nv12toyuv_tmp_out
,
238 hb_buffer_t
*in
= hb_video_buffer_init( w
, h
);
240 int wmp
= in
->plane
[0].stride
;
241 int hmp
= in
->plane
[0].height
;
242 copy_plane( in
->plane
[0].data
, dxva2
->nv12toyuv_tmp_out
, wmp
, w
, hmp
);
243 wmp
= in
->plane
[1].stride
;
244 hmp
= in
->plane
[1].height
;
245 copy_plane( in
->plane
[1].data
, dxva2
->nv12toyuv_tmp_out
+ w
* h
, wmp
, w
>>1, hmp
);
246 wmp
= in
->plane
[2].stride
;
247 hmp
= in
->plane
[2].height
;
248 copy_plane( in
->plane
[2].data
, dxva2
->nv12toyuv_tmp_out
+ w
* h
+( ( w
* h
)>>2 ), wmp
, w
>>1, hmp
);
250 hb_avpicture_fill( &pic_in
, in
);
251 av_picture_crop( &pic_crop
, &pic_in
, in
->f
.fmt
, crop
[0], crop
[2] );
252 int i
, ww
= w
- ( crop
[2] + crop
[3] ), hh
= h
- ( crop
[0] + crop
[1] );
253 for( i
= 0; i
< hh
>> 1; i
++ )
255 memcpy( dxva2
->nv12toyuv_tmp_in
+ ( ( i
<< 1 ) + 0 ) * ww
, pic_crop
.data
[0]+ ( ( i
<< 1 ) + 0 ) * pic_crop
.linesize
[0], ww
);
256 memcpy( dxva2
->nv12toyuv_tmp_in
+ ( ( i
<< 1 ) + 1 ) * ww
, pic_crop
.data
[0]+ ( ( i
<< 1 ) + 1 ) * pic_crop
.linesize
[0], ww
);
257 memcpy( dxva2
->nv12toyuv_tmp_in
+ ( ww
* hh
) + i
* ( ww
>> 1 ), pic_crop
.data
[1] + i
* pic_crop
.linesize
[1], ww
>> 1 );
258 memcpy( dxva2
->nv12toyuv_tmp_in
+ ( ww
* hh
) + ( ( ww
* hh
)>>2 ) + i
* ( ww
>> 1 ), pic_crop
.data
[2] + i
* pic_crop
.linesize
[2], ww
>> 1 );
263 void *data
= hb_ocl
->clEnqueueMapBuffer(kenv
->command_queue
,
265 CL_MAP_WRITE_INVALIDATE_REGION
,
266 CL_TRUE
, 0, ww
* hh
* 3 / 2, 0,
268 memcpy( data
, dxva2
->nv12toyuv_tmp_in
, ww
* hh
* 3 / 2 );
269 hb_ocl
->clEnqueueUnmapMemObject(kenv
->command_queue
,
270 dxva2
->cl_mem_yuv
, data
, 0, NULL
, NULL
);
274 HB_OCL_CHECK(hb_ocl
->clEnqueueWriteBuffer
, kenv
->command_queue
,
275 dxva2
->cl_mem_yuv
, CL_TRUE
, 0, in_bytes
,
276 dxva2
->nv12toyuv_tmp_in
, 0, NULL
, NULL
);
279 hb_buffer_close( &in
);
284 * register nv12 to yuv kernel.
286 static int hb_nv12toyuv_reg_kernel( void )
288 int st
= hb_register_kernel_wrapper( "nv12toyuv", hb_nv12toyuv
);
291 hb_log( "OpenCL: register kernel[%s] failed", "nv12toyuv" );
297 * nv12 to yuv interface
298 * bufi is input frame of nv12, w is input frame width, h is input frame height
300 int hb_ocl_nv12toyuv( uint8_t *bufi
[], int p
, int w
, int h
, int *crop
, hb_va_dxva2_t
*dxva2
, int decomb
, int detelecine
)
303 userdata
[0] = (void*)w
;
304 userdata
[1] = (void*)h
;
305 userdata
[2] = bufi
[0];
308 userdata
[5] = bufi
[1];
309 userdata
[6] = (void*)p
;
310 userdata
[7] = decomb
;
311 userdata
[8] = detelecine
;
313 if( hb_nv12toyuv_reg_kernel() )
318 if( hb_run_kernel( "nv12toyuv", userdata
) )
320 hb_log( "OpenCL: run kernel[nv12toyuv] failed" );