3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
9 Authors: Peng Gao <peng@multicorewareinc.com> <http://www.multicorewareinc.com/>
10 Li Cao <li@multicorewareinc.com> <http://www.multicorewareinc.com/>
16 #include "extras/cl.h"
18 #include "openclwrapper.h"
19 #include "openclkernels.h"
21 //#define USE_EXTERNAL_KERNEL
27 #define strcasecmp strcmpi
30 #define MAX_KERNEL_STRING_LEN 64
31 #define MAX_CLFILE_NUM 50
32 #define MAX_CLKERNEL_NUM 200
33 #define MAX_CLFILE_PATH 255
34 #define MAX_KERNEL_NUM 50
35 #define MAX_KERNEL_NAME_LEN 64
37 #ifndef INVALID_HANDLE_VALUE
38 #define INVALID_HANDLE_VALUE NULL
41 //#define THREAD_PRIORITY_TIME_CRITICAL 15
50 typedef struct _GPUEnv
52 //share vb in all modules in hb library
53 cl_platform_id platform
;
56 cl_device_id
* devices
;
58 cl_command_queue command_queue
;
59 cl_kernel kernels
[MAX_CLFILE_NUM
];
60 cl_program programs
[MAX_CLFILE_NUM
]; //one program object maps one kernel source file
61 char kernelSrcFile
[MAX_CLFILE_NUM
][256]; //the max len of kernel file name is 256
62 int file_count
; // only one kernel file
64 char kernel_names
[MAX_CLKERNEL_NUM
][MAX_KERNEL_STRING_LEN
+1];
65 cl_kernel_function kernel_functions
[MAX_CLKERNEL_NUM
];
67 int isUserCreated
; // 1: created , 0:no create and needed to create by opencl wrapper
73 char kernelName
[MAX_KERNEL_NAME_LEN
+1];
77 static GPUEnv gpu_env
;
78 static int isInited
= 0;
79 static int useBuffers
= 0;
80 static hb_kernel_node gKernels
[MAX_KERNEL_NUM
];
82 #define HB_OCL_ADD_KERNEL_CFG(idx, s, p) \
84 strcpy(gKernels[idx].kernelName, s); \
85 gKernels[idx].kernelStr = p; \
86 strcpy(gpu_env.kernel_names[idx], s); \
87 gpu_env.kernel_count++; \
91 * hb_regist_opencl_kernel
93 int hb_regist_opencl_kernel()
95 //if( !gpu_env.isUserCreated )
96 // memset( &gpu_env, 0, sizeof(gpu_env) );
97 //Comment for posterity: When in doubt just zero out a structure full of pointers to allocated resources.
99 gpu_env
.file_count
= 0; //argc;
100 gpu_env
.kernel_count
= 0UL;
102 HB_OCL_ADD_KERNEL_CFG(0, "frame_scale", NULL
);
103 HB_OCL_ADD_KERNEL_CFG(1, "yadif_filter", NULL
);
109 * hb_regist_opencl_kernel
115 int hb_convert_to_string( const char *filename
, char **source
, GPUEnv
*gpu_info
, int idx
)
122 file
= fopen( filename
, "rb+" );
126 fseek( file
, 0, SEEK_END
);
128 file_size
= ftell( file
);
130 *source
= (char*)malloc( sizeof(char) * file_size
+ 1 );
131 if( *source
== (char*)NULL
)
135 result
= fread( *source
, 1, file_size
, file
);
136 if( result
!= file_size
)
141 (*source
)[file_size
] = '\0';
150 * hb_binary_generated
152 * @param cl_file_name -
155 int hb_binary_generated( cl_context context
, const char * cl_file_name
, FILE ** fhandle
)
160 cl_device_id
*devices
;
166 hb_error("hb_binary_generated: OpenCL support not available");
170 status
= hb_ocl
->clGetContextInfo(context
, CL_CONTEXT_NUM_DEVICES
,
171 sizeof(numDevices
), &numDevices
, NULL
);
172 if( status
!= CL_SUCCESS
)
174 hb_log( "OpenCL: Get context info failed" );
178 devices
= (cl_device_id
*)malloc( sizeof(cl_device_id
) * numDevices
);
179 if( devices
== NULL
)
181 hb_log( "OpenCL: No device found" );
185 /* grab the handles to all of the devices in the context. */
186 status
= hb_ocl
->clGetContextInfo(context
, CL_CONTEXT_DEVICES
,
187 sizeof(cl_device_id
) * numDevices
,
191 /* dump out each binary into its own separate file. */
192 for (i
= 0; i
< numDevices
; i
++)
194 char fileName
[256] = { 0 };
195 char cl_name
[128] = { 0 };
198 char deviceName
[1024];
199 status
= hb_ocl
->clGetDeviceInfo(devices
[i
], CL_DEVICE_NAME
,
200 sizeof(deviceName
), deviceName
, NULL
);
202 str
= (char*)strstr(cl_file_name
, ".cl");
203 memcpy(cl_name
, cl_file_name
, str
- cl_file_name
);
204 cl_name
[str
- cl_file_name
] = '\0';
205 sprintf(fileName
, "./%s - %s.bin", cl_name
, deviceName
);
206 fd
= fopen(fileName
, "rb");
211 if( devices
!= NULL
)
224 * hb_write_binary_to_file
229 int hb_write_binary_to_file( const char* fileName
, const char* birary
, size_t numBytes
)
232 output
= fopen( fileName
, "wb" );
236 fwrite( birary
, sizeof(char), numBytes
, output
);
243 * hb_generat_bin_from_kernel_source
245 * @param cl_file_name -
247 int hb_generat_bin_from_kernel_source( cl_program program
, const char * cl_file_name
)
253 cl_device_id
*devices
;
259 hb_error("hb_generat_bin_from_kernel_source: OpenCL support not available");
263 status
= hb_ocl
->clGetProgramInfo(program
, CL_PROGRAM_NUM_DEVICES
,
264 sizeof(numDevices
), &numDevices
, NULL
);
265 if( status
!= CL_SUCCESS
)
267 hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_NUM_DEVICES failed");
271 devices
= (cl_device_id
*)malloc( sizeof(cl_device_id
) * numDevices
);
272 if( devices
== NULL
)
274 hb_log("OpenCL: hb_generat_bin_from_kernel_source: no device found");
278 /* grab the handles to all of the devices in the program. */
279 status
= hb_ocl
->clGetProgramInfo(program
, CL_PROGRAM_DEVICES
,
280 sizeof(cl_device_id
) * numDevices
,
282 if( status
!= CL_SUCCESS
)
284 hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_DEVICES failed");
288 /* figure out the sizes of each of the binaries. */
289 binarySizes
= (size_t*)malloc( sizeof(size_t) * numDevices
);
291 status
= hb_ocl
->clGetProgramInfo(program
, CL_PROGRAM_BINARY_SIZES
,
292 sizeof(size_t) * numDevices
,
294 if( status
!= CL_SUCCESS
)
296 hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARY_SIZES failed");
300 /* copy over all of the generated binaries. */
301 binaries
= (char**)malloc( sizeof(char *) * numDevices
);
302 if( binaries
== NULL
)
304 hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries failed");
308 for( i
= 0; i
< numDevices
; i
++ )
310 if( binarySizes
[i
] != 0 )
312 binaries
[i
] = (char*)malloc( sizeof(char) * binarySizes
[i
] );
313 if( binaries
[i
] == NULL
)
315 hb_log("OpenCL: hb_generat_bin_from_kernel_source: malloc for binaries[%d] failed", i
);
325 status
= hb_ocl
->clGetProgramInfo(program
, CL_PROGRAM_BINARIES
,
326 sizeof(char *) * numDevices
,
328 if( status
!= CL_SUCCESS
)
330 hb_log("OpenCL: hb_generat_bin_from_kernel_source: clGetProgramInfo for CL_PROGRAM_BINARIES failed");
334 /* dump out each binary into its own separate file. */
335 for (i
= 0; i
< numDevices
; i
++)
337 char fileName
[256] = {0};
338 char cl_name
[128] = {0};
341 char deviceName
[1024];
342 status
= hb_ocl
->clGetDeviceInfo(devices
[i
], CL_DEVICE_NAME
,
343 sizeof(deviceName
), deviceName
,
346 str
= (char*)strstr( cl_file_name
, (char*)".cl" );
347 memcpy(cl_name
, cl_file_name
, str
- cl_file_name
);
348 cl_name
[str
- cl_file_name
] = '\0';
349 sprintf(fileName
, "./%s - %s.bin", cl_name
, deviceName
);
351 if (!hb_write_binary_to_file(fileName
, binaries
[i
], binarySizes
[i
]))
353 hb_log("OpenCL: hb_generat_bin_from_kernel_source: unable to write kernel, writing to temporary directory instead.");
359 // Release all resouces and memory
360 for( i
= 0; i
< numDevices
; i
++ )
362 if( binaries
[i
] != NULL
)
369 if( binaries
!= NULL
)
375 if( binarySizes
!= NULL
)
381 if( devices
!= NULL
)
391 * hb_init_opencl_attr
394 int hb_init_opencl_attr( OpenCLEnv
* env
)
396 if( gpu_env
.isUserCreated
)
399 gpu_env
.context
= env
->context
;
400 gpu_env
.platform
= env
->platform
;
401 gpu_env
.dev
= env
->devices
;
402 gpu_env
.command_queue
= env
->command_queue
;
404 gpu_env
.isUserCreated
= 1;
411 * @param kernelname -
414 int hb_create_kernel( char * kernelname
, KernelEnv
* env
)
420 hb_error("hb_create_kernel: OpenCL support not available");
424 env
->kernel
= hb_ocl
->clCreateKernel(gpu_env
.programs
[0], kernelname
, &status
);
425 env
->context
= gpu_env
.context
;
426 env
->command_queue
= gpu_env
.command_queue
;
427 return status
!= CL_SUCCESS
? 1 : 0;
434 int hb_release_kernel( KernelEnv
* env
)
438 hb_error("hb_release_kernel: OpenCL support not available");
442 int status
= hb_ocl
->clReleaseKernel(env
->kernel
);
443 return status
!= CL_SUCCESS
? 1 : 0;
451 static int init_once
= 0;
452 int hb_init_opencl_env( GPUEnv
*gpu_info
)
456 cl_uint numPlatforms
, numDevices
;
457 cl_platform_id
*platforms
;
458 cl_context_properties cps
[3];
459 char platformName
[100];
461 void *handle
= INVALID_HANDLE_VALUE
;
470 hb_error("hb_init_opencl_env: OpenCL support not available");
475 * Have a look at the available platforms.
477 if( !gpu_info
->isUserCreated
)
479 status
= hb_ocl
->clGetPlatformIDs(0, NULL
, &numPlatforms
);
480 if( status
!= CL_SUCCESS
)
482 hb_log( "OpenCL: OpenCL device platform not found." );
486 gpu_info
->platform
= NULL
;
487 if( 0 < numPlatforms
)
489 platforms
= (cl_platform_id
*)malloc(
490 numPlatforms
* sizeof(cl_platform_id
));
491 if( platforms
== (cl_platform_id
*)NULL
)
495 status
= hb_ocl
->clGetPlatformIDs(numPlatforms
, platforms
, NULL
);
497 if( status
!= CL_SUCCESS
)
499 hb_log( "OpenCL: Specific opencl platform not found." );
503 for( i
= 0; i
< numPlatforms
; i
++ )
505 status
= hb_ocl
->clGetPlatformInfo(platforms
[i
], CL_PLATFORM_VENDOR
,
506 sizeof(platformName
), platformName
, NULL
);
508 if( status
!= CL_SUCCESS
)
512 gpu_info
->platform
= platforms
[i
];
514 if (!strcmp(platformName
, "Advanced Micro Devices, Inc.") ||
515 !strcmp(platformName
, "AMD"))
516 gpu_info
->vendor
= AMD
;
518 gpu_info
->vendor
= others
;
520 gpu_info
->platform
= platforms
[i
];
522 status
= hb_ocl
->clGetDeviceIDs(gpu_info
->platform
/* platform */,
523 CL_DEVICE_TYPE_GPU
/* device_type */,
525 NULL
/* devices */, &numDevices
);
527 if( status
!= CL_SUCCESS
)
539 if( NULL
== gpu_info
->platform
)
541 hb_log( "OpenCL: No OpenCL-compatible GPU found." );
545 if( status
!= CL_SUCCESS
)
547 hb_log( "OpenCL: No OpenCL-compatible GPU found." );
552 * Use available platform.
554 cps
[0] = CL_CONTEXT_PLATFORM
;
555 cps
[1] = (cl_context_properties
)gpu_info
->platform
;
558 gpu_info
->dType
= CL_DEVICE_TYPE_GPU
;
559 gpu_info
->context
= hb_ocl
->clCreateContextFromType(cps
, gpu_info
->dType
,
560 NULL
, NULL
, &status
);
562 if( (gpu_info
->context
== (cl_context
)NULL
) || (status
!= CL_SUCCESS
) )
564 gpu_info
->dType
= CL_DEVICE_TYPE_CPU
;
565 gpu_info
->context
= hb_ocl
->clCreateContextFromType(cps
, gpu_info
->dType
,
566 NULL
, NULL
, &status
);
569 if( (gpu_info
->context
== (cl_context
)NULL
) || (status
!= CL_SUCCESS
) )
571 gpu_info
->dType
= CL_DEVICE_TYPE_DEFAULT
;
572 gpu_info
->context
= hb_ocl
->clCreateContextFromType(cps
, gpu_info
->dType
,
573 NULL
, NULL
, &status
);
576 if( (gpu_info
->context
== (cl_context
)NULL
) || (status
!= CL_SUCCESS
) )
578 hb_log( "OpenCL: Unable to create opencl context." );
582 /* Detect OpenCL devices. */
583 /* First, get the size of device list data */
584 status
= hb_ocl
->clGetContextInfo(gpu_info
->context
, CL_CONTEXT_DEVICES
,
586 if((status
!= CL_SUCCESS
) || (length
== 0))
588 hb_log( "OpenCL: Unable to get the list of devices in context." );
592 /* Now allocate memory for device list based on the size we got earlier */
593 gpu_info
->devices
= (cl_device_id
*)malloc( length
);
594 if( gpu_info
->devices
== (cl_device_id
*)NULL
)
599 /* Now, get the device list data */
600 status
= hb_ocl
->clGetContextInfo(gpu_info
->context
, CL_CONTEXT_DEVICES
,
601 length
, gpu_info
->devices
, NULL
);
602 if( status
!= CL_SUCCESS
)
604 hb_log( "OpenCL: Unable to get the device list data in context." );
608 /* Create OpenCL command queue. */
609 gpu_info
->command_queue
= hb_ocl
->clCreateCommandQueue(gpu_info
->context
,
610 gpu_info
->devices
[0],
612 if( status
!= CL_SUCCESS
)
614 hb_log( "OpenCL: Unable to create opencl command queue." );
619 if ((CL_SUCCESS
== hb_ocl
->clGetCommandQueueInfo(gpu_info
->command_queue
,
620 CL_QUEUE_THREAD_HANDLE_AMD
,
621 sizeof(handle
), &handle
, NULL
)) &&
622 (INVALID_HANDLE_VALUE
!= handle
))
625 SetThreadPriority( handle
, THREAD_PRIORITY_TIME_CRITICAL
);
634 * hb_release_opencl_env
637 int hb_release_opencl_env( GPUEnv
*gpu_info
)
645 hb_error("hb_release_opencl_env: OpenCL support not available");
649 for( i
= 0; i
<gpu_env
.file_count
; i
++ )
651 if( gpu_env
.programs
[i
] )
653 hb_ocl
->clReleaseProgram(gpu_env
.programs
[i
]);
654 gpu_env
.programs
[i
] = NULL
;
658 if( gpu_env
.command_queue
)
660 hb_ocl
->clReleaseCommandQueue(gpu_env
.command_queue
);
661 gpu_env
.command_queue
= NULL
;
664 if( gpu_env
.context
)
666 hb_ocl
->clReleaseContext(gpu_env
.context
);
667 gpu_env
.context
= NULL
;
672 gpu_info
->isUserCreated
= 0;
679 * hb_register_kernel_wrapper
680 * @param kernel_name -
683 int hb_register_kernel_wrapper( const char *kernel_name
, cl_kernel_function function
)
686 for( i
= 0; i
< gpu_env
.kernel_count
; i
++ )
688 if( strcasecmp( kernel_name
, gpu_env
.kernel_names
[i
] ) == 0 )
690 gpu_env
.kernel_functions
[i
] = function
;
698 * hb_cached_of_kerner_prg
700 * @param cl_file_name -
702 int hb_cached_of_kerner_prg( const GPUEnv
*gpu_env
, const char * cl_file_name
)
705 for( i
= 0; i
< gpu_env
->file_count
; i
++ )
707 if( strcasecmp( gpu_env
->kernelSrcFile
[i
], cl_file_name
) == 0 )
709 if( gpu_env
->programs
[i
] != NULL
)
718 * hb_compile_kernel_file
722 * @param build_option -
724 int hb_compile_kernel_file( const char *filename
, GPUEnv
*gpu_info
,
725 int indx
, const char *build_option
)
731 size_t source_size
[1];
732 char *buildLog
= NULL
;
733 int b_error
, binary_status
, binaryExisted
;
736 cl_device_id
*devices
;
741 if( hb_cached_of_kerner_prg( gpu_info
, filename
) == 1 )
744 idx
= gpu_info
->file_count
;
746 #ifdef USE_EXTERNAL_KERNEL
747 status
= hb_convert_to_string( filename
, &source_str
, gpu_info
, idx
);
751 int kernel_src_size
= strlen(kernel_src_scale
) + strlen(kernel_src_yadif_filter
);
754 // status = hb_convert_to_string("./scale_kernels.cl", &scale_src, gpu_info, idx);
756 // kernel_src_size += strlen(scale_src);
758 source_str
= (char*)malloc( kernel_src_size
+ 2 );
759 strcpy( source_str
, kernel_src_scale
);
760 // strcat( source_str, scale_src ); //
761 strcat( source_str
, kernel_src_yadif_filter
);
765 source_size
[0] = strlen( source
);
769 hb_error("hb_compile_kernel_file: OpenCL support not available");
773 if ((binaryExisted
= hb_binary_generated(gpu_info
->context
, filename
, &fd
)) == 1)
775 status
= hb_ocl
->clGetContextInfo(gpu_info
->context
, CL_CONTEXT_NUM_DEVICES
,
776 sizeof(numDevices
), &numDevices
, NULL
);
777 if (status
!= CL_SUCCESS
)
779 hb_log("OpenCL: Unable to get the number of devices in context.");
783 devices
= (cl_device_id
*)malloc(sizeof(cl_device_id
) * numDevices
);
789 b_error
|= fseek(fd
, 0, SEEK_END
) < 0;
790 b_error
|= (length
= ftell(fd
)) <= 0;
791 b_error
|= fseek(fd
, 0, SEEK_SET
) < 0;
795 binary
= (char*)calloc(length
+ 2, sizeof(char));
799 b_error
|= fread(binary
, 1, length
, fd
) != length
;
800 #if 0 // this doesn't work under OS X and/or with some non-AMD GPUs
801 if (binary
[length
-1] != '\n')
802 binary
[length
++] = '\n';
808 /* grab the handles to all of the devices in the context. */
809 status
= hb_ocl
->clGetContextInfo(gpu_info
->context
, CL_CONTEXT_DEVICES
,
810 sizeof(cl_device_id
) * numDevices
,
813 gpu_info
->programs
[idx
] = hb_ocl
->clCreateProgramWithBinary(gpu_info
->context
,
817 (const unsigned char**)&binary
,
828 /* create a CL program using the kernel source */
829 gpu_info
->programs
[idx
] = hb_ocl
->clCreateProgramWithSource(gpu_info
->context
, 1,
830 &source
, source_size
,
834 if((gpu_info
->programs
[idx
] == (cl_program
)NULL
) || (status
!= CL_SUCCESS
)){
835 hb_log( "OpenCL: Unable to get list of devices in context." );
839 /* create a cl program executable for all the devices specified */
840 if( !gpu_info
->isUserCreated
)
842 status
= hb_ocl
->clBuildProgram(gpu_info
->programs
[idx
], 1, gpu_info
->devices
,
843 build_option
, NULL
, NULL
);
847 status
= hb_ocl
->clBuildProgram(gpu_info
->programs
[idx
], 1, &(gpu_info
->dev
),
848 build_option
, NULL
, NULL
);
851 if( status
!= CL_SUCCESS
)
853 if( !gpu_info
->isUserCreated
)
855 status
= hb_ocl
->clGetProgramBuildInfo(gpu_info
->programs
[idx
],
856 gpu_info
->devices
[0],
857 CL_PROGRAM_BUILD_LOG
,
862 status
= hb_ocl
->clGetProgramBuildInfo(gpu_info
->programs
[idx
],
864 CL_PROGRAM_BUILD_LOG
,
868 if( status
!= CL_SUCCESS
)
870 hb_log( "OpenCL: Unable to get GPU build information." );
874 buildLog
= (char*)malloc( length
);
875 if( buildLog
== (char*)NULL
)
880 if( !gpu_info
->isUserCreated
)
882 status
= hb_ocl
->clGetProgramBuildInfo(gpu_info
->programs
[idx
],
883 gpu_info
->devices
[0],
884 CL_PROGRAM_BUILD_LOG
,
885 length
, buildLog
, &length
);
889 status
= hb_ocl
->clGetProgramBuildInfo(gpu_info
->programs
[idx
],
891 CL_PROGRAM_BUILD_LOG
,
892 length
, buildLog
, &length
);
895 fd1
= fopen( "kernel-build.log", "w+" );
897 fwrite( buildLog
, sizeof(char), length
, fd1
);
905 strcpy( gpu_env
.kernelSrcFile
[idx
], filename
);
907 if (binaryExisted
!= 1)
909 //hb_generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
912 gpu_info
->file_count
+= 1;
919 * hb_get_kernel_env_and_func
920 * @param kernel_name -
924 int hb_get_kernel_env_and_func( const char *kernel_name
,
926 cl_kernel_function
*function
)
929 for( i
= 0; i
< gpu_env
.kernel_count
; i
++ )
931 if( strcasecmp( kernel_name
, gpu_env
.kernel_names
[i
] ) == 0 )
933 env
->context
= gpu_env
.context
;
934 env
->command_queue
= gpu_env
.command_queue
;
935 env
->program
= gpu_env
.programs
[0];
936 env
->kernel
= gpu_env
.kernels
[i
];
937 env
->isAMD
= ( gpu_env
.vendor
== AMD
) ? 1 : 0;
938 *function
= gpu_env
.kernel_functions
[i
];
946 * hb_get_kernel_env_and_func
947 * @param kernel_name -
950 int hb_run_kernel( const char *kernel_name
, void **userdata
)
953 cl_kernel_function function
;
955 memset( &env
, 0, sizeof(KernelEnv
));
956 status
= hb_get_kernel_env_and_func( kernel_name
, &env
, &function
);
957 strcpy( env
.kernel_name
, kernel_name
);
960 return(function( userdata
, &env
));
967 * hb_init_opencl_run_env
970 * @param build_option -
972 int hb_init_opencl_run_env( int argc
, char **argv
, const char *build_option
)
975 if( MAX_CLKERNEL_NUM
<= 0 )
980 if((argc
> MAX_CLFILE_NUM
) || (argc
<0))
987 hb_regist_opencl_kernel();
989 /*initialize devices, context, comand_queue*/
990 status
= hb_init_opencl_env( &gpu_env
);
994 /*initialize program, kernel_name, kernel_count*/
995 status
= hb_compile_kernel_file("hb-opencl-kernels.cl",
996 &gpu_env
, 0, build_option
);
998 if( status
== 0 || gpu_env
.kernel_count
== 0 )
1012 * hb_release_opencl_run_env
1014 int hb_release_opencl_run_env()
1016 return hb_release_opencl_env( &gpu_env
);
1022 int hb_opencl_stats()
1030 int hb_get_opencl_env()
1032 /* initialize devices, context, command_queue */
1033 return hb_init_opencl_env(&gpu_env
);
1042 int hb_create_buffer( cl_mem
*cl_Buf
, int flags
, int size
)
1048 hb_error("hb_create_buffer: OpenCL support not available");
1052 *cl_Buf
= hb_ocl
->clCreateBuffer(gpu_env
.context
, flags
, size
, NULL
, &status
);
1054 if( status
!= CL_SUCCESS
)
1056 hb_log( "OpenCL: clCreateBuffer error '%d'", status
);
1065 * hb_read_opencl_buffer
1070 int hb_read_opencl_buffer( cl_mem cl_inBuf
, unsigned char *outbuf
, int size
)
1076 hb_error("hb_read_opencl_suffer: OpenCL support not available");
1080 status
= hb_ocl
->clEnqueueReadBuffer(gpu_env
.command_queue
, cl_inBuf
,
1081 CL_TRUE
, 0, size
, outbuf
, 0, 0, 0);
1082 if( status
!= CL_SUCCESS
)
1084 hb_log( "OpenCL: av_read_opencl_buffer error '%d'", status
);
1091 int hb_cl_create_mapped_buffer(cl_mem
*mem
, unsigned char **addr
, int size
)
1094 int flags
= CL_MEM_ALLOC_HOST_PTR
;
1098 hb_error("hb_cl_create_mapped_buffer: OpenCL support not available");
1103 *mem
= hb_ocl
->clCreateBuffer(gpu_env
.context
, flags
, size
, NULL
, &status
);
1104 *addr
= hb_ocl
->clEnqueueMapBuffer(gpu_env
.command_queue
, *mem
, CL_TRUE
,
1105 CL_MAP_READ
|CL_MAP_WRITE
, 0, size
, 0,
1106 NULL
, NULL
/*&event*/, &status
);
1108 //hb_log("\t **** context: %.8x cmdqueue: %.8x cl_mem: %.8x mapaddr: %.8x size: %d status: %d", gpu_env.context, gpu_env.command_queue, mem, addr, size, status);
1110 return (status
== CL_SUCCESS
) ? 1 : 0;
1113 int hb_cl_free_mapped_buffer(cl_mem mem
, unsigned char *addr
)
1119 hb_error("hb_cl_free_mapped_buffer: OpenCL support not available");
1123 int status
= hb_ocl
->clEnqueueUnmapMemObject(gpu_env
.command_queue
, mem
,
1124 addr
, 0, NULL
, &event
);
1125 if (status
== CL_SUCCESS
)
1126 hb_ocl
->clWaitForEvents(1, &event
);
1128 hb_log("hb_free_mapped_buffer: error %d", status
);
1129 return (status
== CL_SUCCESS
) ? 1 : 0;
1132 void hb_opencl_init()
1134 hb_get_opencl_env();
1137 int hb_use_buffers()
1142 int hb_copy_buffer(cl_mem src_buffer
,cl_mem dst_buffer
,size_t src_offset
,size_t dst_offset
,size_t cb
)
1146 hb_error("hb_copy_buffer: OpenCL support not available");
1150 int status
= hb_ocl
->clEnqueueCopyBuffer(gpu_env
.command_queue
,
1151 src_buffer
, dst_buffer
,
1152 src_offset
, dst_offset
,
1154 if( status
!= CL_SUCCESS
)
1156 av_log(NULL
,AV_LOG_ERROR
, "hb_read_opencl_buffer error '%d'\n", status
);
1162 int hb_read_opencl_frame_buffer(cl_mem cl_inBuf
,unsigned char *Ybuf
,unsigned char *Ubuf
,unsigned char *Vbuf
,int linesize0
,int linesize1
,int linesize2
,int height
)
1165 int chrH
= -(-height
>> 1);
1166 unsigned char *temp
= (unsigned char *)av_malloc(sizeof(uint8_t) * (linesize0
* height
+ linesize1
* chrH
* 2));
1167 if(hb_read_opencl_buffer(cl_inBuf
,temp
,sizeof(uint8_t)*(linesize0
+ linesize1
)*height
))
1169 memcpy(Ybuf
,temp
,linesize0
* height
);
1170 memcpy(Ubuf
,temp
+ linesize0
* height
,linesize1
*chrH
);
1171 memcpy(Vbuf
,temp
+ linesize0
* height
+ linesize1
* chrH
,linesize2
* chrH
);
1179 int hb_write_opencl_frame_buffer(cl_mem cl_inBuf
,unsigned char *Ybuf
,unsigned char *Ubuf
,unsigned char *Vbuf
,int linesize0
,int linesize1
,int linesize2
,int height
,int offset
)
1183 hb_error("hb_write_opencl_frame_buffer: OpenCL support not available");
1187 void *mapped
= hb_ocl
->clEnqueueMapBuffer(gpu_env
.command_queue
, cl_inBuf
,
1188 CL_TRUE
,CL_MAP_WRITE
, 0,
1189 sizeof(uint8_t) * (linesize0
+ linesize1
) * height
+ offset
,
1190 0, NULL
, NULL
, NULL
);
1191 uint8_t *temp
= (uint8_t *)mapped
;
1193 memcpy(temp
,Ybuf
,sizeof(uint8_t) * linesize0
* height
);
1194 memcpy(temp
+ sizeof(uint8_t) * linesize0
* height
,Ubuf
,sizeof(uint8_t) * linesize1
* height
/2);
1195 memcpy(temp
+ sizeof(uint8_t) * (linesize0
* height
+ linesize1
* height
/2),Vbuf
,sizeof(uint8_t) * linesize2
* height
/2);
1196 hb_ocl
->clEnqueueUnmapMemObject(gpu_env
.command_queue
, cl_inBuf
, mapped
, 0, NULL
, NULL
);
1200 cl_command_queue
hb_get_command_queue()
1202 return gpu_env
.command_queue
;
1205 cl_context
hb_get_context()
1207 return gpu_env
.context
;