WinGui: Fix another instance of the Caliburn vs Json.net sillyness where objects...
[HandBrake.git] / libhb / openclwrapper.c
blob9621d3a97191ebaf16c21e8adfa192250aa01f7f
1 /* openclwrapper.c
3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
9 Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
10 Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include "extras/cl.h"
17 #include "opencl.h"
18 #include "openclwrapper.h"
19 #include "openclkernels.h"
21 //#define USE_EXTERNAL_KERNEL
22 #ifdef SYS_MINGW
23 #include <windows.h>
24 #endif
26 #if defined(_MSC_VER)
27 #define strcasecmp strcmpi
28 #endif
30 #define MAX_KERNEL_STRING_LEN 64
31 #define MAX_CLFILE_NUM 50
32 #define MAX_CLKERNEL_NUM 200
33 #define MAX_CLFILE_PATH 255
34 #define MAX_KERNEL_NUM 50
35 #define MAX_KERNEL_NAME_LEN 64
37 #ifndef INVALID_HANDLE_VALUE
38 #define INVALID_HANDLE_VALUE NULL
39 #endif
41 //#define THREAD_PRIORITY_TIME_CRITICAL 15
43 enum VENDOR
45 AMD = 0,
46 Intel,
47 NVIDIA,
48 others
50 typedef struct _GPUEnv
52 //share vb in all modules in hb library
53 cl_platform_id platform;
54 cl_device_type dType;
55 cl_context context;
56 cl_device_id * devices;
57 cl_device_id dev;
58 cl_command_queue command_queue;
59 cl_kernel kernels[MAX_CLFILE_NUM];
60 cl_program programs[MAX_CLFILE_NUM]; //one program object maps one kernel source file
61 char kernelSrcFile[MAX_CLFILE_NUM][256]; //the max len of kernel file name is 256
62 int file_count; // only one kernel file
64 char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
65 cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
66 int kernel_count;
67 int isUserCreated; // 1: created , 0:no create and needed to create by opencl wrapper
68 enum VENDOR vendor;
69 }GPUEnv;
71 typedef struct
73 char kernelName[MAX_KERNEL_NAME_LEN+1];
74 char * kernelStr;
75 }hb_kernel_node;
77 static GPUEnv gpu_env;
78 static int isInited = 0;
79 static int useBuffers = 0;
80 static hb_kernel_node gKernels[MAX_KERNEL_NUM];
82 #define HB_OCL_ADD_KERNEL_CFG(idx, s, p) \
83 { \
84 strcpy(gKernels[idx].kernelName, s); \
85 gKernels[idx].kernelStr = p; \
86 strcpy(gpu_env.kernel_names[idx], s); \
87 gpu_env.kernel_count++; \
90 /**
91 * hb_regist_opencl_kernel
93 int hb_regist_opencl_kernel()
95 //if( !gpu_env.isUserCreated )
96 // memset( &gpu_env, 0, sizeof(gpu_env) );
97 //Comment for posterity: When in doubt just zero out a structure full of pointers to allocated resources.
99 gpu_env.file_count = 0; //argc;
100 gpu_env.kernel_count = 0UL;
102 HB_OCL_ADD_KERNEL_CFG(0, "frame_scale", NULL);
103 HB_OCL_ADD_KERNEL_CFG(1, "yadif_filter", NULL);
105 return 0;
109 * hb_regist_opencl_kernel
110 * @param filename -
111 * @param source -
112 * @param gpu_info -
113 * @param int idx -
115 int hb_convert_to_string( const char *filename, char **source, GPUEnv *gpu_info, int idx )
117 int file_size;
118 size_t result;
119 FILE * file = NULL;
120 file_size = 0;
121 result = 0;
122 file = fopen( filename, "rb+" );
124 if( file!=NULL )
126 fseek( file, 0, SEEK_END );
128 file_size = ftell( file );
129 rewind( file );
130 *source = (char*)malloc( sizeof(char) * file_size + 1 );
131 if( *source == (char*)NULL )
133 return(0);
135 result = fread( *source, 1, file_size, file );
136 if( result != file_size )
138 free( *source );
139 return(0);
141 (*source)[file_size] = '\0';
142 fclose( file );
144 return(1);
146 return(0);
150 * hb_binary_generated
151 * @param context -
152 * @param cl_file_name -
153 * @param fhandle -
155 int hb_binary_generated( cl_context context, const char * cl_file_name, FILE ** fhandle )
157 int i = 0;
158 cl_int status;
159 cl_uint numDevices;
160 cl_device_id *devices;
161 char * str = NULL;
162 FILE * fd = NULL;
164 if (hb_ocl == NULL)
166 hb_error("hb_binary_generated: OpenCL support not available");
167 return 0;
170 status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
171 sizeof(numDevices), &numDevices, NULL);
172 if( status != CL_SUCCESS )
174 hb_log( "OpenCL: Get context info failed" );
175 return 0;
178 devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
179 if( devices == NULL )
181 hb_log( "OpenCL: No device found" );
182 return 0;
185 /* grab the handles to all of the devices in the context. */
186 status = hb_ocl->clGetContextInfo(context, CL_CONTEXT_DEVICES,
187 sizeof(cl_device_id) * numDevices,
188 devices, NULL);
190 status = 0;
191 /* dump out each binary into its own separate file. */
192 for (i = 0; i < numDevices; i++)
194 char fileName[256] = { 0 };
195 char cl_name[128] = { 0 };
196 if (devices[i])
198 char deviceName[1024];
199 status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
200 sizeof(deviceName), deviceName, NULL);
202 str = (char*)strstr(cl_file_name, ".cl");
203 memcpy(cl_name, cl_file_name, str - cl_file_name);
204 cl_name[str - cl_file_name] = '\0';
205 sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
206 fd = fopen(fileName, "rb");
207 status = fd != NULL;
211 if( devices != NULL )
213 free( devices );
214 devices = NULL;
217 if( fd != NULL )
218 *fhandle = fd;
220 return status;
224 * hb_write_binary_to_file
225 * @param fileName -
226 * @param birary -
227 * @param numBytes -
229 int hb_write_binary_to_file( const char* fileName, const char* birary, size_t numBytes )
231 FILE *output = NULL;
232 output = fopen( fileName, "wb" );
233 if( output == NULL )
234 return 0;
236 fwrite( birary, sizeof(char), numBytes, output );
237 fclose( output );
239 return 1;
243 * hb_generat_bin_from_kernel_source
244 * @param program -
245 * @param cl_file_name -
247 int hb_generat_bin_from_kernel_source( cl_program program, const char * cl_file_name )
249 int i = 0;
250 cl_int status;
251 cl_uint numDevices;
252 size_t *binarySizes;
253 cl_device_id *devices;
254 char **binaries;
255 char *str = NULL;
257 if (hb_ocl == NULL)
259 hb_error("hb_generat_bin_from_kernel_source: OpenCL support not available");
260 return 0;
263 status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
264 sizeof(numDevices), &numDevices, NULL);
265 if( status != CL_SUCCESS )
267 hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed");
268 return 0;
271 devices = (cl_device_id*)malloc( sizeof(cl_device_id) * numDevices );
272 if( devices == NULL )
274 hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found");
275 return 0;
278 /* grab the handles to all of the devices in the program. */
279 status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_DEVICES,
280 sizeof(cl_device_id) * numDevices,
281 devices, NULL);
282 if( status != CL_SUCCESS )
284 hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed");
285 return 0;
288 /* figure out the sizes of each of the binaries. */
289 binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
291 status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
292 sizeof(size_t) * numDevices,
293 binarySizes, NULL);
294 if( status != CL_SUCCESS )
296 hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed");
297 return 0;
300 /* copy over all of the generated binaries. */
301 binaries = (char**)malloc( sizeof(char *) * numDevices );
302 if( binaries == NULL )
304 hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed");
305 return 0;
308 for( i = 0; i < numDevices; i++ )
310 if( binarySizes[i] != 0 )
312 binaries[i] = (char*)malloc( sizeof(char) * binarySizes[i] );
313 if( binaries[i] == NULL )
315 hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i);
316 return 0;
319 else
321 binaries[i] = NULL;
325 status = hb_ocl->clGetProgramInfo(program, CL_PROGRAM_BINARIES,
326 sizeof(char *) * numDevices,
327 binaries, NULL);
328 if( status != CL_SUCCESS )
330 hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed");
331 return 0;
334 /* dump out each binary into its own separate file. */
335 for (i = 0; i < numDevices; i++)
337 char fileName[256] = {0};
338 char cl_name[128] = {0};
339 if (binarySizes[i])
341 char deviceName[1024];
342 status = hb_ocl->clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
343 sizeof(deviceName), deviceName,
344 NULL);
346 str = (char*)strstr( cl_file_name, (char*)".cl" );
347 memcpy(cl_name, cl_file_name, str - cl_file_name);
348 cl_name[str - cl_file_name] = '\0';
349 sprintf(fileName, "./%s - %s.bin", cl_name, deviceName);
351 if (!hb_write_binary_to_file(fileName, binaries[i], binarySizes[i]))
353 hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead.");
354 return 0;
359 // Release all resouces and memory
360 for( i = 0; i < numDevices; i++ )
362 if( binaries[i] != NULL )
364 free( binaries[i] );
365 binaries[i] = NULL;
369 if( binaries != NULL )
371 free( binaries );
372 binaries = NULL;
375 if( binarySizes != NULL )
377 free( binarySizes );
378 binarySizes = NULL;
381 if( devices != NULL )
383 free( devices );
384 devices = NULL;
386 return 1;
391 * hb_init_opencl_attr
392 * @param env -
394 int hb_init_opencl_attr( OpenCLEnv * env )
396 if( gpu_env.isUserCreated )
397 return 1;
399 gpu_env.context = env->context;
400 gpu_env.platform = env->platform;
401 gpu_env.dev = env->devices;
402 gpu_env.command_queue = env->command_queue;
404 gpu_env.isUserCreated = 1;
406 return 0;
410 * hb_create_kernel
411 * @param kernelname -
412 * @param env -
414 int hb_create_kernel( char * kernelname, KernelEnv * env )
416 int status;
418 if (hb_ocl == NULL)
420 hb_error("hb_create_kernel: OpenCL support not available");
421 return 0;
424 env->kernel = hb_ocl->clCreateKernel(gpu_env.programs[0], kernelname, &status);
425 env->context = gpu_env.context;
426 env->command_queue = gpu_env.command_queue;
427 return status != CL_SUCCESS ? 1 : 0;
431 * hb_release_kernel
432 * @param env -
434 int hb_release_kernel( KernelEnv * env )
436 if (hb_ocl == NULL)
438 hb_error("hb_release_kernel: OpenCL support not available");
439 return 0;
442 int status = hb_ocl->clReleaseKernel(env->kernel);
443 return status != CL_SUCCESS ? 1 : 0;
447 * hb_init_opencl_env
448 * @param gpu_info -
451 static int init_once = 0;
452 int hb_init_opencl_env( GPUEnv *gpu_info )
454 size_t length;
455 cl_int status;
456 cl_uint numPlatforms, numDevices;
457 cl_platform_id *platforms;
458 cl_context_properties cps[3];
459 char platformName[100];
460 unsigned int i;
461 void *handle = INVALID_HANDLE_VALUE;
463 if (init_once != 0)
464 return 0;
465 else
466 init_once = 1;
468 if (hb_ocl == NULL)
470 hb_error("hb_init_opencl_env: OpenCL support not available");
471 return 1;
475 * Have a look at the available platforms.
477 if( !gpu_info->isUserCreated )
479 status = hb_ocl->clGetPlatformIDs(0, NULL, &numPlatforms);
480 if( status != CL_SUCCESS )
482 hb_log( "OpenCL: OpenCL device platform not found." );
483 return(1);
486 gpu_info->platform = NULL;
487 if( 0 < numPlatforms )
489 platforms = (cl_platform_id*)malloc(
490 numPlatforms * sizeof(cl_platform_id));
491 if( platforms == (cl_platform_id*)NULL )
493 return(1);
495 status = hb_ocl->clGetPlatformIDs(numPlatforms, platforms, NULL);
497 if( status != CL_SUCCESS )
499 hb_log( "OpenCL: Specific opencl platform not found." );
500 return(1);
503 for( i = 0; i < numPlatforms; i++ )
505 status = hb_ocl->clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
506 sizeof(platformName), platformName, NULL);
508 if( status != CL_SUCCESS )
510 continue;
512 gpu_info->platform = platforms[i];
514 if (!strcmp(platformName, "Advanced Micro Devices, Inc.") ||
515 !strcmp(platformName, "AMD"))
516 gpu_info->vendor = AMD;
517 else
518 gpu_info->vendor = others;
520 gpu_info->platform = platforms[i];
522 status = hb_ocl->clGetDeviceIDs(gpu_info->platform /* platform */,
523 CL_DEVICE_TYPE_GPU /* device_type */,
524 0 /* num_entries */,
525 NULL /* devices */, &numDevices);
527 if( status != CL_SUCCESS )
529 continue;
532 if( numDevices )
533 break;
536 free( platforms );
539 if( NULL == gpu_info->platform )
541 hb_log( "OpenCL: No OpenCL-compatible GPU found." );
542 return(1);
545 if( status != CL_SUCCESS )
547 hb_log( "OpenCL: No OpenCL-compatible GPU found." );
548 return(1);
552 * Use available platform.
554 cps[0] = CL_CONTEXT_PLATFORM;
555 cps[1] = (cl_context_properties)gpu_info->platform;
556 cps[2] = 0;
557 /* Check for GPU. */
558 gpu_info->dType = CL_DEVICE_TYPE_GPU;
559 gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
560 NULL, NULL, &status);
562 if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
564 gpu_info->dType = CL_DEVICE_TYPE_CPU;
565 gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
566 NULL, NULL, &status);
569 if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
571 gpu_info->dType = CL_DEVICE_TYPE_DEFAULT;
572 gpu_info->context = hb_ocl->clCreateContextFromType(cps, gpu_info->dType,
573 NULL, NULL, &status);
576 if( (gpu_info->context == (cl_context)NULL) || (status != CL_SUCCESS) )
578 hb_log( "OpenCL: Unable to create opencl context." );
579 return(1);
582 /* Detect OpenCL devices. */
583 /* First, get the size of device list data */
584 status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
585 0, NULL, &length);
586 if((status != CL_SUCCESS) || (length == 0))
588 hb_log( "OpenCL: Unable to get the list of devices in context." );
589 return(1);
592 /* Now allocate memory for device list based on the size we got earlier */
593 gpu_info->devices = (cl_device_id*)malloc( length );
594 if( gpu_info->devices == (cl_device_id*)NULL )
596 return(1);
599 /* Now, get the device list data */
600 status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
601 length, gpu_info->devices, NULL);
602 if( status != CL_SUCCESS )
604 hb_log( "OpenCL: Unable to get the device list data in context." );
605 return(1);
608 /* Create OpenCL command queue. */
609 gpu_info->command_queue = hb_ocl->clCreateCommandQueue(gpu_info->context,
610 gpu_info->devices[0],
611 0, &status);
612 if( status != CL_SUCCESS )
614 hb_log( "OpenCL: Unable to create opencl command queue." );
615 return(1);
619 if ((CL_SUCCESS == hb_ocl->clGetCommandQueueInfo(gpu_info->command_queue,
620 CL_QUEUE_THREAD_HANDLE_AMD,
621 sizeof(handle), &handle, NULL)) &&
622 (INVALID_HANDLE_VALUE != handle))
624 #ifdef SYS_MINGW
625 SetThreadPriority( handle, THREAD_PRIORITY_TIME_CRITICAL );
626 #endif
629 return 0;
634 * hb_release_opencl_env
635 * @param gpu_info -
637 int hb_release_opencl_env( GPUEnv *gpu_info )
639 if( !isInited )
640 return 1;
641 int i;
643 if (hb_ocl == NULL)
645 hb_error("hb_release_opencl_env: OpenCL support not available");
646 return 0;
649 for( i = 0; i<gpu_env.file_count; i++ )
651 if( gpu_env.programs[i] )
653 hb_ocl->clReleaseProgram(gpu_env.programs[i]);
654 gpu_env.programs[i] = NULL;
658 if( gpu_env.command_queue )
660 hb_ocl->clReleaseCommandQueue(gpu_env.command_queue);
661 gpu_env.command_queue = NULL;
664 if( gpu_env.context )
666 hb_ocl->clReleaseContext(gpu_env.context);
667 gpu_env.context = NULL;
670 isInited = 0;
671 useBuffers = 0;
672 gpu_info->isUserCreated = 0;
674 return 1;
679 * hb_register_kernel_wrapper
680 * @param kernel_name -
681 * @param function -
683 int hb_register_kernel_wrapper( const char *kernel_name, cl_kernel_function function )
685 int i;
686 for( i = 0; i < gpu_env.kernel_count; i++ )
688 if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
690 gpu_env.kernel_functions[i] = function;
691 return(1);
694 return(0);
698 * hb_cached_of_kerner_prg
699 * @param gpu_env -
700 * @param cl_file_name -
702 int hb_cached_of_kerner_prg( const GPUEnv *gpu_env, const char * cl_file_name )
704 int i;
705 for( i = 0; i < gpu_env->file_count; i++ )
707 if( strcasecmp( gpu_env->kernelSrcFile[i], cl_file_name ) == 0 )
709 if( gpu_env->programs[i] != NULL )
710 return(1);
714 return(0);
718 * hb_compile_kernel_file
719 * @param filename -
720 * @param gpu_info -
721 * @param indx -
722 * @param build_option -
724 int hb_compile_kernel_file( const char *filename, GPUEnv *gpu_info,
725 int indx, const char *build_option )
727 cl_int status;
728 size_t length;
729 char *source_str;
730 const char *source;
731 size_t source_size[1];
732 char *buildLog = NULL;
733 int b_error, binary_status, binaryExisted;
734 char * binary;
735 cl_uint numDevices;
736 cl_device_id *devices;
737 FILE * fd;
738 FILE * fd1;
739 int idx;
741 if( hb_cached_of_kerner_prg( gpu_info, filename ) == 1 )
742 return (1);
744 idx = gpu_info->file_count;
746 #ifdef USE_EXTERNAL_KERNEL
747 status = hb_convert_to_string( filename, &source_str, gpu_info, idx );
748 if( status == 0 )
749 return(0);
750 #else
751 int kernel_src_size = strlen(kernel_src_scale) + strlen(kernel_src_yadif_filter);
753 // char *scale_src;
754 // status = hb_convert_to_string("./scale_kernels.cl", &scale_src, gpu_info, idx);
755 // if (status != 0)
756 // kernel_src_size += strlen(scale_src);
758 source_str = (char*)malloc( kernel_src_size + 2 );
759 strcpy( source_str, kernel_src_scale );
760 // strcat( source_str, scale_src ); //
761 strcat( source_str, kernel_src_yadif_filter );
762 #endif
764 source = source_str;
765 source_size[0] = strlen( source );
767 if (hb_ocl == NULL)
769 hb_error("hb_compile_kernel_file: OpenCL support not available");
770 return 0;
773 if ((binaryExisted = hb_binary_generated(gpu_info->context, filename, &fd)) == 1)
775 status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_NUM_DEVICES,
776 sizeof(numDevices), &numDevices, NULL);
777 if (status != CL_SUCCESS)
779 hb_log("OpenCL: Unable to get the number of devices in context.");
780 return 0;
783 devices = (cl_device_id*)malloc(sizeof(cl_device_id) * numDevices);
784 if (devices == NULL)
785 return 0;
787 length = 0;
788 b_error = 0;
789 b_error |= fseek(fd, 0, SEEK_END) < 0;
790 b_error |= (length = ftell(fd)) <= 0;
791 b_error |= fseek(fd, 0, SEEK_SET) < 0;
792 if (b_error)
793 return 0;
795 binary = (char*)calloc(length + 2, sizeof(char));
796 if (binary == NULL)
797 return 0;
799 b_error |= fread(binary, 1, length, fd) != length;
800 #if 0 // this doesn't work under OS X and/or with some non-AMD GPUs
801 if (binary[length-1] != '\n')
802 binary[length++] = '\n';
803 #endif
805 if (b_error)
806 return 0;
808 /* grab the handles to all of the devices in the context. */
809 status = hb_ocl->clGetContextInfo(gpu_info->context, CL_CONTEXT_DEVICES,
810 sizeof(cl_device_id) * numDevices,
811 devices, NULL);
813 gpu_info->programs[idx] = hb_ocl->clCreateProgramWithBinary(gpu_info->context,
814 numDevices,
815 devices,
816 &length,
817 (const unsigned char**)&binary,
818 &binary_status,
819 &status);
821 fclose(fd);
822 free(devices);
823 fd = NULL;
824 devices = NULL;
826 else
828 /* create a CL program using the kernel source */
829 gpu_info->programs[idx] = hb_ocl->clCreateProgramWithSource(gpu_info->context, 1,
830 &source, source_size,
831 &status);
834 if((gpu_info->programs[idx] == (cl_program)NULL) || (status != CL_SUCCESS)){
835 hb_log( "OpenCL: Unable to get list of devices in context." );
836 return(0);
839 /* create a cl program executable for all the devices specified */
840 if( !gpu_info->isUserCreated )
842 status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, gpu_info->devices,
843 build_option, NULL, NULL);
845 else
847 status = hb_ocl->clBuildProgram(gpu_info->programs[idx], 1, &(gpu_info->dev),
848 build_option, NULL, NULL);
851 if( status != CL_SUCCESS )
853 if( !gpu_info->isUserCreated )
855 status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
856 gpu_info->devices[0],
857 CL_PROGRAM_BUILD_LOG,
858 0, NULL, &length);
860 else
862 status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
863 gpu_info->dev,
864 CL_PROGRAM_BUILD_LOG,
865 0, NULL, &length);
868 if( status != CL_SUCCESS )
870 hb_log( "OpenCL: Unable to get GPU build information." );
871 return(0);
874 buildLog = (char*)malloc( length );
875 if( buildLog == (char*)NULL )
877 return(0);
880 if( !gpu_info->isUserCreated )
882 status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
883 gpu_info->devices[0],
884 CL_PROGRAM_BUILD_LOG,
885 length, buildLog, &length);
887 else
889 status = hb_ocl->clGetProgramBuildInfo(gpu_info->programs[idx],
890 gpu_info->dev,
891 CL_PROGRAM_BUILD_LOG,
892 length, buildLog, &length);
895 fd1 = fopen( "kernel-build.log", "w+" );
896 if( fd1 != NULL ) {
897 fwrite( buildLog, sizeof(char), length, fd1 );
898 fclose( fd1 );
901 free( buildLog );
902 return(0);
905 strcpy( gpu_env.kernelSrcFile[idx], filename );
907 if (binaryExisted != 1)
909 //hb_generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
912 gpu_info->file_count += 1;
914 return(1);
919 * hb_get_kernel_env_and_func
920 * @param kernel_name -
921 * @param env -
922 * @param function -
924 int hb_get_kernel_env_and_func( const char *kernel_name,
925 KernelEnv *env,
926 cl_kernel_function *function )
928 int i;
929 for( i = 0; i < gpu_env.kernel_count; i++ )
931 if( strcasecmp( kernel_name, gpu_env.kernel_names[i] ) == 0 )
933 env->context = gpu_env.context;
934 env->command_queue = gpu_env.command_queue;
935 env->program = gpu_env.programs[0];
936 env->kernel = gpu_env.kernels[i];
937 env->isAMD = ( gpu_env.vendor == AMD ) ? 1 : 0;
938 *function = gpu_env.kernel_functions[i];
939 return(1);
942 return(0);
946 * hb_get_kernel_env_and_func
947 * @param kernel_name -
948 * @param userdata -
950 int hb_run_kernel( const char *kernel_name, void **userdata )
952 KernelEnv env;
953 cl_kernel_function function;
954 int status;
955 memset( &env, 0, sizeof(KernelEnv));
956 status = hb_get_kernel_env_and_func( kernel_name, &env, &function );
957 strcpy( env.kernel_name, kernel_name );
958 if( status == 1 )
960 return(function( userdata, &env ));
963 return(0);
967 * hb_init_opencl_run_env
968 * @param argc -
969 * @param argv -
970 * @param build_option -
972 int hb_init_opencl_run_env( int argc, char **argv, const char *build_option )
974 int status = 0;
975 if( MAX_CLKERNEL_NUM <= 0 )
977 return 1;
980 if((argc > MAX_CLFILE_NUM) || (argc<0))
982 return 1;
985 if( !isInited )
987 hb_regist_opencl_kernel();
989 /*initialize devices, context, comand_queue*/
990 status = hb_init_opencl_env( &gpu_env );
991 if( status )
992 return(1);
994 /*initialize program, kernel_name, kernel_count*/
995 status = hb_compile_kernel_file("hb-opencl-kernels.cl",
996 &gpu_env, 0, build_option);
998 if( status == 0 || gpu_env.kernel_count == 0 )
1000 return(1);
1004 useBuffers = 1;
1005 isInited = 1;
1008 return(0);
1012 * hb_release_opencl_run_env
1014 int hb_release_opencl_run_env()
1016 return hb_release_opencl_env( &gpu_env );
1020 * hb_opencl_stats
1022 int hb_opencl_stats()
1024 return isInited;
1028 * hb_get_opencl_env
1030 int hb_get_opencl_env()
1032 /* initialize devices, context, command_queue */
1033 return hb_init_opencl_env(&gpu_env);
1037 * hb_create_buffer
1038 * @param cl_inBuf -
1039 * @param flags -
1040 * @param size -
1042 int hb_create_buffer( cl_mem *cl_Buf, int flags, int size )
1044 int status;
1046 if (hb_ocl == NULL)
1048 hb_error("hb_create_buffer: OpenCL support not available");
1049 return 0;
1052 *cl_Buf = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
1054 if( status != CL_SUCCESS )
1056 hb_log( "OpenCL: clCreateBuffer error '%d'", status );
1057 return 0;
1060 return 1;
1065 * hb_read_opencl_buffer
1066 * @param cl_inBuf -
1067 * @param outbuf -
1068 * @param size -
1070 int hb_read_opencl_buffer( cl_mem cl_inBuf, unsigned char *outbuf, int size )
1072 int status;
1074 if (hb_ocl == NULL)
1076 hb_error("hb_read_opencl_suffer: OpenCL support not available");
1077 return 0;
1080 status = hb_ocl->clEnqueueReadBuffer(gpu_env.command_queue, cl_inBuf,
1081 CL_TRUE, 0, size, outbuf, 0, 0, 0);
1082 if( status != CL_SUCCESS )
1084 hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status );
1085 return 0;
1088 return 1;
1091 int hb_cl_create_mapped_buffer(cl_mem *mem, unsigned char **addr, int size)
1093 int status;
1094 int flags = CL_MEM_ALLOC_HOST_PTR;
1096 if (hb_ocl == NULL)
1098 hb_error("hb_cl_create_mapped_buffer: OpenCL support not available");
1099 return 0;
1102 //cl_event event;
1103 *mem = hb_ocl->clCreateBuffer(gpu_env.context, flags, size, NULL, &status);
1104 *addr = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, *mem, CL_TRUE,
1105 CL_MAP_READ|CL_MAP_WRITE, 0, size, 0,
1106 NULL, NULL/*&event*/, &status);
1108 //hb_log("\t **** context: %.8x cmdqueue: %.8x cl_mem: %.8x mapaddr: %.8x size: %d status: %d", gpu_env.context, gpu_env.command_queue, mem, addr, size, status);
1110 return (status == CL_SUCCESS) ? 1 : 0;
1113 int hb_cl_free_mapped_buffer(cl_mem mem, unsigned char *addr)
1115 cl_event event;
1117 if (hb_ocl == NULL)
1119 hb_error("hb_cl_free_mapped_buffer: OpenCL support not available");
1120 return 0;
1123 int status = hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, mem,
1124 addr, 0, NULL, &event);
1125 if (status == CL_SUCCESS)
1126 hb_ocl->clWaitForEvents(1, &event);
1127 else
1128 hb_log("hb_free_mapped_buffer: error %d", status);
1129 return (status == CL_SUCCESS) ? 1 : 0;
1132 void hb_opencl_init()
1134 hb_get_opencl_env();
1137 int hb_use_buffers()
1139 return useBuffers;
1142 int hb_copy_buffer(cl_mem src_buffer,cl_mem dst_buffer,size_t src_offset,size_t dst_offset,size_t cb)
1144 if (hb_ocl == NULL)
1146 hb_error("hb_copy_buffer: OpenCL support not available");
1147 return 0;
1150 int status = hb_ocl->clEnqueueCopyBuffer(gpu_env.command_queue,
1151 src_buffer, dst_buffer,
1152 src_offset, dst_offset,
1153 cb, 0, 0, 0);
1154 if( status != CL_SUCCESS )
1156 av_log(NULL,AV_LOG_ERROR, "hb_read_opencl_buffer error '%d'\n", status );
1157 return 0;
1159 return 1;
1162 int hb_read_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height)
1165 int chrH = -(-height >> 1);
1166 unsigned char *temp = (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH * 2));
1167 if(hb_read_opencl_buffer(cl_inBuf,temp,sizeof(uint8_t)*(linesize0 + linesize1)*height))
1169 memcpy(Ybuf,temp,linesize0 * height);
1170 memcpy(Ubuf,temp + linesize0 * height,linesize1 *chrH);
1171 memcpy(Vbuf,temp + linesize0 * height + linesize1 * chrH,linesize2 * chrH);
1174 av_free(temp);
1176 return 1;
1179 int hb_write_opencl_frame_buffer(cl_mem cl_inBuf,unsigned char *Ybuf,unsigned char *Ubuf,unsigned char *Vbuf,int linesize0,int linesize1,int linesize2,int height,int offset)
1181 if (hb_ocl == NULL)
1183 hb_error("hb_write_opencl_frame_buffer: OpenCL support not available");
1184 return 0;
1187 void *mapped = hb_ocl->clEnqueueMapBuffer(gpu_env.command_queue, cl_inBuf,
1188 CL_TRUE,CL_MAP_WRITE, 0,
1189 sizeof(uint8_t) * (linesize0 + linesize1) * height + offset,
1190 0, NULL, NULL, NULL);
1191 uint8_t *temp = (uint8_t *)mapped;
1192 temp += offset;
1193 memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);
1194 memcpy(temp + sizeof(uint8_t) * linesize0 * height,Ubuf,sizeof(uint8_t) * linesize1 * height/2);
1195 memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 * height/2),Vbuf,sizeof(uint8_t) * linesize2 * height/2);
1196 hb_ocl->clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0, NULL, NULL);
1197 return 1;
1200 cl_command_queue hb_get_command_queue()
1202 return gpu_env.command_queue;
1205 cl_context hb_get_context()
1207 return gpu_env.context;