WinGui: Fix another instance of the Caliburn vs Json.net sillyness where objects...
[HandBrake.git] / libhb / oclnv12toyuv.c
blob6f2cf66e5efa1d61d5ad9e1a2b6d8f612b9fd738
1 /* oclnv12toyuv.c
3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
9 Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
10 Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
13 #ifdef USE_HWD
15 #include "opencl.h"
16 #include "vadxva2.h"
17 #include "oclnv12toyuv.h"
19 /**
20 * It creates are opencl bufs w is input frame width, h is input frame height
22 static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 );
24 /**
25 * It creates are opencl kernel. kernel name is nv12toyuv
27 static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 );
29 /**
30 * It set opencl arg, input data,output data, input width, output height
32 static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 );
34 /**
35 * It initialize nv12 to yuv kernel.
37 static int hb_init_nv12toyuv_ocl( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 );
39 /**
40 * Run nv12 to yuv kernel.
42 static int hb_nv12toyuv( void **userdata, KernelEnv *kenv );
44 /**
45 * register nv12 to yuv kernel.
47 static int hb_nv12toyuv_reg_kernel( void );
49 /**
50 * It creates are opencl bufs w is input frame width, h is input frame height
52 static int hb_nv12toyuv_create_cl_buf( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 )
54 if (hb_ocl == NULL)
56 hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available");
57 return 1;
60 cl_int status = CL_SUCCESS;
61 int in_bytes = w*h*3/2;
62 HB_OCL_BUF_CREATE(hb_ocl, dxva2->cl_mem_nv12, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, in_bytes);
63 HB_OCL_BUF_CREATE(hb_ocl, dxva2->cl_mem_yuv, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, in_bytes);
64 return 0;
67 /**
68 * It creates are opencl kernel. kernel name is nv12toyuv
70 static int hb_nv12toyuv_create_cl_kernel( KernelEnv *kenv, hb_va_dxva2_t *dxva2 )
72 if (hb_ocl == NULL)
74 hb_error("hb_nv12toyuv_create_cl_kernel: OpenCL support not available");
75 return 1;
78 int ret;
79 dxva2->nv12toyuv = hb_ocl->clCreateKernel(kenv->program, "nv12toyuv", &ret);
80 return ret;
83 /**
84 * It set opencl arg, input data,output data, input width, output height
86 static int hb_nv12toyuv_setkernelarg( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 )
88 int arg = 0, status;
89 kenv->kernel = dxva2->nv12toyuv;
91 if (hb_ocl == NULL)
93 hb_error("hb_nv12toyuv_setkernelarg: OpenCL support not available");
94 return 1;
97 HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_nv12);
98 HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(cl_mem), &dxva2->cl_mem_yuv);
99 HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(int), &w);
100 HB_OCL_CHECK(hb_ocl->clSetKernelArg, kenv->kernel, arg++, sizeof(int), &h);
101 return 0;
105 * It initialize nv12 to yuv kernel.
107 static int hb_init_nv12toyuv_ocl( KernelEnv *kenv, int w, int h, hb_va_dxva2_t *dxva2 )
109 if( !dxva2->nv12toyuv )
111 if( hb_nv12toyuv_create_cl_buf( kenv, w, h, dxva2 ) )
113 hb_log( "OpenCL: nv12toyuv_create_cl_buf fail" );
114 return -1;
116 if (!dxva2->nv12toyuv_tmp_in)
118 dxva2->nv12toyuv_tmp_in = malloc (w*h*3/2);
121 if (!dxva2->nv12toyuv_tmp_out)
123 dxva2->nv12toyuv_tmp_out = malloc (w*h*3/2);
126 hb_nv12toyuv_create_cl_kernel( kenv, dxva2 );
128 return 0;
132 * copy_plane
133 * @param dst -
134 * @param src -
135 * @param dstride -
136 * @param sstride -
137 * @param h -
139 static uint8_t *copy_plane( uint8_t *dst, uint8_t* src, int dstride, int sstride,
140 int h )
142 if ( dstride == sstride )
144 memcpy( dst, src, dstride * h );
145 return dst + dstride * h;
148 int lbytes = dstride <= sstride? dstride : sstride;
149 while ( --h >= 0 )
151 memcpy( dst, src, lbytes );
152 src += sstride;
153 dst += dstride;
156 return dst;
160 * Run nv12 to yuv kernel.
162 static int hb_nv12toyuv( void **userdata, KernelEnv *kenv )
164 int status;
165 int w = (int)userdata[0];
166 int h = (int)userdata[1];
167 uint8_t *bufi1 = userdata[2];
168 int *crop = userdata[3];
169 hb_va_dxva2_t *dxva2 = userdata[4];
171 uint8_t *bufi2 = userdata[5];
172 int p = (int)userdata[6];
173 int decomb = (int)userdata[7];
174 int detelecine = (int)userdata[8];
175 int i;
176 if( hb_init_nv12toyuv_ocl( kenv, w, h, dxva2 ) )
178 return -1;
181 if( hb_nv12toyuv_setkernelarg( kenv, w, h, dxva2 ) )
183 return -1;
186 if (hb_ocl == NULL)
188 hb_error("hb_nv12toyuv: OpenCL support not available");
189 return -1;
192 int in_bytes = w*h*3/2;
193 if( kenv->isAMD )
195 void *data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue,
196 dxva2->cl_mem_nv12,
197 CL_MAP_WRITE_INVALIDATE_REGION,
198 CL_TRUE, 0, in_bytes, 0, NULL, NULL, NULL);
200 for ( i = 0; i < dxva2->height; i++ )
202 memcpy( data + i * dxva2->width, bufi1 + i * p, dxva2->width );
203 if ( i < dxva2->height >> 1 )
205 memcpy( data + ( dxva2->width * dxva2->height ) + i * dxva2->width, bufi2 + i * p, dxva2->width );
208 hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue, dxva2->cl_mem_nv12,
209 data, 0, NULL, NULL);
211 else
213 uint8_t *tmp = (uint8_t*)malloc( dxva2->width * dxva2->height * 3 / 2 );
214 for( i = 0; i < dxva2->height; i++ )
216 memcpy( tmp + i * dxva2->width, bufi1 + i * p, dxva2->width );
217 if( i < dxva2->height >> 1 )
219 memcpy( tmp + (dxva2->width * dxva2->height) + i * dxva2->width, bufi2 + i * p, dxva2->width );
222 HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue,
223 dxva2->cl_mem_nv12, CL_TRUE, 0, in_bytes, tmp, 0, NULL, NULL);
224 free( tmp );
227 size_t gdim[2] = {w>>1, h>>1};
228 HB_OCL_CHECK(hb_ocl->clEnqueueNDRangeKernel, kenv->command_queue,
229 kenv->kernel, 2, NULL, gdim, NULL, 0, NULL, NULL );
231 if( (crop[0] || crop[1] || crop[2] || crop[3]) && (decomb == 0) && (detelecine == 0) )
233 AVPicture pic_in;
234 AVPicture pic_crop;
235 hb_ocl->clEnqueueReadBuffer(kenv->command_queue, dxva2->cl_mem_yuv,
236 CL_TRUE, 0, in_bytes, dxva2->nv12toyuv_tmp_out,
237 0, NULL, NULL);
238 hb_buffer_t *in = hb_video_buffer_init( w, h );
240 int wmp = in->plane[0].stride;
241 int hmp = in->plane[0].height;
242 copy_plane( in->plane[0].data, dxva2->nv12toyuv_tmp_out, wmp, w, hmp );
243 wmp = in->plane[1].stride;
244 hmp = in->plane[1].height;
245 copy_plane( in->plane[1].data, dxva2->nv12toyuv_tmp_out + w * h, wmp, w>>1, hmp );
246 wmp = in->plane[2].stride;
247 hmp = in->plane[2].height;
248 copy_plane( in->plane[2].data, dxva2->nv12toyuv_tmp_out + w * h +( ( w * h )>>2 ), wmp, w>>1, hmp );
250 hb_avpicture_fill( &pic_in, in );
251 av_picture_crop( &pic_crop, &pic_in, in->f.fmt, crop[0], crop[2] );
252 int i, ww = w - ( crop[2] + crop[3] ), hh = h - ( crop[0] + crop[1] );
253 for( i = 0; i< hh >> 1; i++ )
255 memcpy( dxva2->nv12toyuv_tmp_in + ( ( i << 1 ) + 0 ) * ww, pic_crop.data[0]+ ( ( i << 1 ) + 0 ) * pic_crop.linesize[0], ww );
256 memcpy( dxva2->nv12toyuv_tmp_in + ( ( i << 1 ) + 1 ) * ww, pic_crop.data[0]+ ( ( i << 1 ) + 1 ) * pic_crop.linesize[0], ww );
257 memcpy( dxva2->nv12toyuv_tmp_in + ( ww * hh ) + i * ( ww >> 1 ), pic_crop.data[1] + i * pic_crop.linesize[1], ww >> 1 );
258 memcpy( dxva2->nv12toyuv_tmp_in + ( ww * hh ) + ( ( ww * hh )>>2 ) + i * ( ww >> 1 ), pic_crop.data[2] + i * pic_crop.linesize[2], ww >> 1 );
261 if( kenv->isAMD )
263 void *data = hb_ocl->clEnqueueMapBuffer(kenv->command_queue,
264 dxva2->cl_mem_yuv,
265 CL_MAP_WRITE_INVALIDATE_REGION,
266 CL_TRUE, 0, ww * hh * 3 / 2, 0,
267 NULL, NULL, NULL);
268 memcpy( data, dxva2->nv12toyuv_tmp_in, ww * hh * 3 / 2 );
269 hb_ocl->clEnqueueUnmapMemObject(kenv->command_queue,
270 dxva2->cl_mem_yuv, data, 0, NULL, NULL);
272 else
274 HB_OCL_CHECK(hb_ocl->clEnqueueWriteBuffer, kenv->command_queue,
275 dxva2->cl_mem_yuv, CL_TRUE, 0, in_bytes,
276 dxva2->nv12toyuv_tmp_in, 0, NULL, NULL);
279 hb_buffer_close( &in );
281 return 0;
284 * register nv12 to yuv kernel.
286 static int hb_nv12toyuv_reg_kernel( void )
288 int st = hb_register_kernel_wrapper( "nv12toyuv", hb_nv12toyuv );
289 if( !st )
291 hb_log( "OpenCL: register kernel[%s] failed", "nv12toyuv" );
292 return -1;
294 return 0;
297 * nv12 to yuv interface
298 * bufi is input frame of nv12, w is input frame width, h is input frame height
300 int hb_ocl_nv12toyuv( uint8_t *bufi[], int p, int w, int h, int *crop, hb_va_dxva2_t *dxva2, int decomb, int detelecine )
302 void *userdata[9];
303 userdata[0] = (void*)w;
304 userdata[1] = (void*)h;
305 userdata[2] = bufi[0];
306 userdata[3] = crop;
307 userdata[4] = dxva2;
308 userdata[5] = bufi[1];
309 userdata[6] = (void*)p;
310 userdata[7] = decomb;
311 userdata[8] = detelecine;
313 if( hb_nv12toyuv_reg_kernel() )
315 return -1;
318 if( hb_run_kernel( "nv12toyuv", userdata ) )
320 hb_log( "OpenCL: run kernel[nv12toyuv] failed" );
321 return -1;
323 return 0;
326 #endif // USE_HWD