Move gpu_utils etc. out of gmxlib
[gromacs.git] / src / gromacs / gpu_utils / ocl_compiler.cpp
blob4e43fd0e1c634829241bb7bb1edc454953260c3b
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
36 * \brief Define infrastructure for OpenCL JIT compilation for Gromacs
38 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
42 * TODO Currently this file handles compilation of NBNXN kernels,
43 * but e.g. organizing the defines for various physics models
44 * is leaking in here a bit.
47 #include "gmxpre.h"
49 #include "ocl_compiler.h"
51 #include "config.h"
53 #include <assert.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
58 #include <string>
60 #include "gromacs/utility/path.h"
61 #include "gromacs/utility/programcontext.h"
62 #include "gromacs/utility/stringutil.h"
64 /*! \brief Path separator
66 #define SEPARATOR '/'
68 /*! \brief Compiler options index
70 typedef enum {
71 b_invalid_option = 0,
72 b_amd_cpp,
73 b_nvidia_verbose,
74 b_generic_cl11,
75 b_generic_cl12,
76 b_generic_fast_relaxed_math,
77 b_generic_noopt_compilation,
78 b_generic_debug_symbols,
79 b_amd_dump_temp_files,
80 b_include_install_opencl_dir,
81 b_include_source_opencl_dirs,
82 b_num_build_options
83 } build_options_index_t;
85 /*! \brief List of available OpenCL compiler options
87 static const char* build_options_list[] = {
88 "",
89 "-x clc++", /**< AMD C++ extension */
90 "-cl-nv-verbose", /**< Nvidia verbose build log */
91 "-cl-std=CL1.1", /**< Force CL 1.1 */
92 "-cl-std=CL1.2", /**< Force CL 1.2 */
93 "-cl-fast-relaxed-math", /**< Fast math */
94 "-cl-opt-disable", /**< Disable optimisations */
95 "-g", /**< Debug symbols */
96 "-save-temps" /**< AMD option to dump intermediate temporary
97 files such as IL or ISA code */
100 /*! \brief Available sources
102 static const char * kernel_filenames[] = {"nbnxn_ocl_kernels.cl"};
104 /*! \brief Defines to enable specific kernels based on vendor
106 static const char * kernel_vendor_spec_definitions[] = {
107 "-D_WARPLESS_SOURCE_", /**< nbnxn_ocl_kernel_nowarp.clh */
108 "-D_NVIDIA_SOURCE_", /**< nbnxn_ocl_kernel_nvidia.clh */
109 "-D_AMD_SOURCE_" /**< nbnxn_ocl_kernel_amd.clh */
113 /*! \brief Get the string of a build option of the specific id
114 * \param build_option_id The option id as defines in the header
115 * \return String containing the actual build option string for the compiler
117 static const char* get_ocl_build_option(build_options_index_t build_option_id)
119 if (build_option_id < b_num_build_options)
121 return build_options_list[build_option_id];
123 else
125 return build_options_list[b_invalid_option];
129 /*! \brief Get the size of the string (without null termination) required
130 * for the build option of the specific id
131 * \param build_option_id The option id as defines in the header
132 * \return size_t containing the size in bytes of the build option string
134 static size_t get_ocl_build_option_length(build_options_index_t build_option_id)
137 if (build_option_id < b_num_build_options)
139 return strlen(build_options_list[build_option_id]);
141 else
143 return strlen(build_options_list[b_invalid_option]);
147 /*! \brief Get the size of final composed build options literal
149 * \param build_device_vendor_id Device vendor id. Used to
150 * automatically enable some vendor specific options
151 * \param custom_build_options_prepend Prepend options string
152 * \param custom_build_options_append Append options string
153 * \return size_t containing the size in bytes of the composed
154 * build options string including null termination
156 static size_t
157 create_ocl_build_options_length(
158 ocl_vendor_id_t build_device_vendor_id,
159 const char * custom_build_options_prepend,
160 const char * custom_build_options_append)
162 size_t build_options_length = 0;
163 size_t whitespace = 1;
165 assert(build_device_vendor_id <= OCL_VENDOR_UNKNOWN);
167 if (custom_build_options_prepend)
169 build_options_length +=
170 strlen(custom_build_options_prepend)+whitespace;
173 if ( (build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DEBUG") && getenv("GMX_OCL_FORCE_CPU") )
175 build_options_length += get_ocl_build_option_length(b_generic_debug_symbols)+whitespace;
178 if (getenv("GMX_OCL_NOOPT"))
180 build_options_length +=
181 get_ocl_build_option_length(b_generic_noopt_compilation)+whitespace;
184 if (getenv("GMX_OCL_FASTMATH"))
186 build_options_length +=
187 get_ocl_build_option_length(b_generic_fast_relaxed_math)+whitespace;
190 if ((build_device_vendor_id == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
192 build_options_length +=
193 get_ocl_build_option_length(b_nvidia_verbose) + whitespace;
196 if ((build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
198 /* To dump OpenCL build intermediate files, caching must be off */
199 if (NULL != getenv("GMX_OCL_NOGENCACHE"))
201 build_options_length +=
202 get_ocl_build_option_length(b_amd_dump_temp_files) + whitespace;
206 if (custom_build_options_append)
208 build_options_length +=
209 strlen(custom_build_options_append)+whitespace;
212 return build_options_length+1;
215 /*! \brief Get the size of final composed build options literal
217 * \param build_options_string The string where to save the
218 * resulting build options in
219 * \param build_options_length The size of the build options
220 * \param build_device_vendor_id Device vendor id. Used to
221 * automatically enable some vendor specific options
222 * \param custom_build_options_prepend Prepend options string
223 * \param custom_build_options_append Append options string
224 * \return The string build_options_string with the build options
226 static char *
227 create_ocl_build_options(
228 char * build_options_string,
229 size_t gmx_unused build_options_length,
230 ocl_vendor_id_t build_device_vendor_id,
231 const char * custom_build_options_prepend,
232 const char * custom_build_options_append)
234 size_t char_added = 0;
236 if (custom_build_options_prepend)
238 strncpy( build_options_string+char_added,
239 custom_build_options_prepend,
240 strlen(custom_build_options_prepend));
242 char_added += strlen(custom_build_options_prepend);
243 build_options_string[char_added++] = ' ';
246 if (getenv("GMX_OCL_NOOPT") )
248 strncpy( build_options_string+char_added,
249 get_ocl_build_option(b_generic_noopt_compilation),
250 get_ocl_build_option_length(b_generic_noopt_compilation) );
252 char_added += get_ocl_build_option_length(b_generic_noopt_compilation);
253 build_options_string[char_added++] = ' ';
257 if (getenv("GMX_OCL_FASTMATH") )
259 strncpy( build_options_string+char_added,
260 get_ocl_build_option(b_generic_fast_relaxed_math),
261 get_ocl_build_option_length(b_generic_fast_relaxed_math) );
263 char_added += get_ocl_build_option_length(b_generic_fast_relaxed_math);
264 build_options_string[char_added++] = ' ';
267 if ((build_device_vendor_id == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
269 strncpy(build_options_string + char_added,
270 get_ocl_build_option(b_nvidia_verbose),
271 get_ocl_build_option_length(b_nvidia_verbose));
273 char_added += get_ocl_build_option_length(b_nvidia_verbose);
274 build_options_string[char_added++] = ' ';
277 if ((build_device_vendor_id == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
279 /* To dump OpenCL build intermediate files, caching must be off */
280 if (NULL != getenv("GMX_OCL_NOGENCACHE"))
282 strncpy(build_options_string + char_added,
283 get_ocl_build_option(b_amd_dump_temp_files),
284 get_ocl_build_option_length(b_amd_dump_temp_files));
286 char_added += get_ocl_build_option_length(b_amd_dump_temp_files);
287 build_options_string[char_added++] = ' ';
291 if ( ( build_device_vendor_id == OCL_VENDOR_AMD ) && getenv("GMX_OCL_DEBUG") && getenv("GMX_OCL_FORCE_CPU"))
293 strncpy( build_options_string+char_added,
294 get_ocl_build_option(b_generic_debug_symbols),
295 get_ocl_build_option_length(b_generic_debug_symbols) );
297 char_added += get_ocl_build_option_length(b_generic_debug_symbols);
298 build_options_string[char_added++] = ' ';
301 if (custom_build_options_append)
303 strncpy( build_options_string+char_added,
304 custom_build_options_append,
305 strlen(custom_build_options_append) );
307 char_added += strlen(custom_build_options_append);
308 build_options_string[char_added++] = ' ';
311 build_options_string[char_added++] = '\0';
313 assert(char_added == build_options_length);
315 return build_options_string;
318 /*! \brief Get the path to the main folder storing OpenCL kernels.
320 * By default, this function constructs the full path to the OpenCL from
321 * the known location of the binary that is running, so that we handle
322 * both in-source and installed builds. The user can override this
323 * behavior by defining GMX_OCL_FILE_PATH environment variable.
325 * \return OS-normalized path string to the main folder storing OpenCL kernels
327 * \throws std::bad_alloc if out of memory.
329 static std::string
330 get_ocl_root_path()
332 const char *gmx_ocl_file_path;
333 std::string ocl_root_path;
335 /* Use GMX_OCL_FILE_PATH if the user has defined it */
336 gmx_ocl_file_path = getenv("GMX_OCL_FILE_PATH");
338 if (!gmx_ocl_file_path)
340 /* Normal way of getting ocl_root_dir. First get the right
341 root path from the path to the binary that is running. */
342 gmx::InstallationPrefixInfo info = gmx::getProgramContext().installationPrefix();
343 std::string dataPathSuffix = (info.bSourceLayout ?
344 "src/gromacs/mdlib/nbnxn_ocl" :
345 OCL_INSTALL_DIR);
346 ocl_root_path = gmx::Path::join(info.path, dataPathSuffix);
348 else
350 ocl_root_path = gmx_ocl_file_path;
353 // Make sure we return an OS-correct path format
354 return gmx::Path::normalize(ocl_root_path);
357 /*! \brief Get the size of the full kernel source file path and name
359 * The following full path size is computed:
360 * strlen(ocl_root_path) + strlen(kernel_id.cl) + separator + null term
362 * \param kernel_src_id Id of the kernel source (auto,nvidia,amd,nowarp)
363 * \return Size in bytes of the full kernel source file path and name including
364 * separators and null termination
366 * \throws std::bad_alloc if out of memory */
367 static size_t
368 get_ocl_kernel_source_file_info(kernel_source_index_t kernel_src_id)
370 std::string ocl_root_path = get_ocl_root_path();
372 if (ocl_root_path.empty())
374 return 0;
377 return (ocl_root_path.length() + /* Path to the main OpenCL folder*/
378 1 + /* Separator */
379 strlen(kernel_filenames[kernel_src_id]) + /* Kernel source file name */
380 1 /* null char */
384 /*! \brief Compose and the full path and name of the kernel src to be used
386 * \param ocl_kernel_filename String where the full path and name will be saved
387 * \param kernel_src_id Id of the kernel source (default)
388 * \param kernel_filename_len Size of the full path and name string, as computed by get_ocl_kernel_source_file_info()
389 * \return The ocl_kernel_filename complete with the full path and name; NULL if error.
391 * \throws std::bad_alloc if out of memory */
392 static char *
393 get_ocl_kernel_source_path(
394 char * ocl_kernel_filename,
395 kernel_source_index_t kernel_src_id,
396 size_t gmx_unused kernel_filename_len)
398 std::string ocl_root_path;
400 assert(kernel_filename_len != 0);
401 assert(ocl_kernel_filename != NULL);
403 ocl_root_path = get_ocl_root_path();
404 if (ocl_root_path.empty())
406 return NULL;
409 size_t chars_copied = 0;
410 strncpy(ocl_kernel_filename, ocl_root_path.c_str(), ocl_root_path.length());
411 chars_copied += ocl_root_path.length();
413 ocl_kernel_filename[chars_copied++] = SEPARATOR;
415 strncpy(&ocl_kernel_filename[chars_copied],
416 kernel_filenames[kernel_src_id],
417 strlen(kernel_filenames[kernel_src_id]) );
418 chars_copied += strlen(kernel_filenames[kernel_src_id]);
420 ocl_kernel_filename[chars_copied++] = '\0';
422 assert(chars_copied == kernel_filename_len);
424 return ocl_kernel_filename;
427 /* Undefine the separators */
428 #undef SEPARATOR
430 /*! \brief Loads the src inside the file filename onto a string in memory
432 * \param filename The name of the file to be read
433 * \param p_source_length Pointer to the size of the source in bytes
434 * (without null termination)
435 * \return A string with the contents of the file with name filename,
436 * or NULL if there was a problem opening/reading the file
438 static char*
439 load_ocl_source(const char* filename, size_t* p_source_length)
441 FILE * filestream = NULL;
442 char * ocl_source;
443 size_t source_length;
445 source_length = 0;
447 if (!filename)
449 return NULL;
452 filestream = fopen(filename, "rb");
453 if (!filestream)
455 return NULL;
458 fseek(filestream, 0, SEEK_END);
459 source_length = ftell(filestream);
460 fseek(filestream, 0, SEEK_SET);
462 ocl_source = (char*)malloc(source_length + 1);
463 if (fread(ocl_source, source_length, 1, filestream) != 1)
465 fclose(filestream);
466 free(ocl_source);
467 return 0;
470 fclose(filestream);
471 ocl_source[source_length] = '\0';
473 *p_source_length = source_length;
474 return ocl_source;
477 /*! \brief Handles the dumping of the OpenCL JIT compilation log
479 * In a debug build:
480 * -Success: Save to file kernel_id.SUCCEEDED in the run folder.
481 * -Fail : Save to file kernel_id.FAILED in the run folder.
482 * Dump to stderr
483 * In a release build:
484 * -Success: Nothing is logged.
485 * -Fail : Save to a file kernel_id.FAILED in the run folder.
486 * If GMX_OCL_DUMP_LOG is set, log is always dumped to file
487 * If OCL_JIT_DUMP_STDERR is set, log is always dumped to stderr
489 * \param build_log String containing the OpenCL JIT compilation log
490 * \param build_options_string String containing the options used for the build
491 * \param build_status The OpenCL type status of the build (CL_SUCCESS etc)
492 * \param kernel_src_id The id of the kernel src used for the build (default)
494 * \throws std::bad_alloc if out of memory */
495 static void
496 handle_ocl_build_log(
497 const char * build_log,
498 const char * build_options_string,
499 cl_int build_status,
500 kernel_source_index_t kernel_src_id)
502 bool dumpStdErr = false;
503 bool dumpFile;
504 #ifdef NDEBUG
505 dumpFile = (build_status != CL_SUCCESS);
506 #else
507 dumpFile = true;
508 if (build_status != CL_SUCCESS)
510 dumpStdErr = true;
512 #endif
514 /* Override default handling */
515 if (getenv("GMX_OCL_DUMP_LOG") != NULL)
517 dumpFile = true;
519 if (getenv("OCL_JIT_DUMP_STDERR") != NULL)
521 dumpStdErr = true;
524 if (dumpFile || dumpStdErr)
526 FILE *build_log_file = NULL;
527 const char *fail_header = "Compilation of source file failed! \n";
528 const char *success_header = "Compilation of source file was successful! \n";
529 const char *log_header = "--------------LOG START---------------\n";
530 const char *log_footer = "---------------LOG END----------------\n";
531 char *build_info;
532 std::string log_fname;
534 build_info = (char*)malloc(32 + strlen(build_options_string) );
535 sprintf(build_info, "-- Used build options: %s\n", build_options_string);
537 if (dumpFile)
539 log_fname = gmx::formatString("%s.%s", kernel_filenames[kernel_src_id],
540 (build_status == CL_SUCCESS) ? "SUCCEEDED" : "FAILED");
541 build_log_file = fopen(log_fname.c_str(), "w");
544 size_t complete_message_size = 0;
545 char * complete_message;
548 complete_message_size = (build_status == CL_SUCCESS) ? strlen(success_header) : strlen(fail_header);
549 complete_message_size += strlen(build_info) + strlen(log_header) + strlen(log_footer);
550 complete_message_size += strlen(build_log);
551 complete_message_size += 1; //null termination
552 complete_message = (char*)malloc(complete_message_size);
554 sprintf(complete_message, "%s%s%s%s%s",
555 (build_status == CL_SUCCESS) ? success_header : fail_header,
556 build_info,
557 log_header,
558 build_log,
559 log_footer);
561 if (dumpFile)
563 if (build_log_file)
565 fprintf(build_log_file, "%s", complete_message);
568 printf("The OpenCL compilation log has been saved in \"%s\"\n", log_fname.c_str());
570 if (dumpStdErr)
572 if (build_status != CL_SUCCESS)
574 fprintf(stderr, "%s", complete_message);
577 if (build_log_file)
579 fclose(build_log_file);
582 free(complete_message);
583 free(build_info);
587 /*! \brief Get the warp size reported by device
589 * This is platform implementation dependant and seems to only work on the Nvidia and Amd platforms!
590 * Nvidia reports 32, Amd for GPU 64. Ignore the rest
592 * \param context Current OpenCL context
593 * \param device_id OpenCL device with the context
594 * \return cl_int value of the warp size
596 static cl_int
597 ocl_get_warp_size(cl_context context, cl_device_id device_id)
599 cl_int cl_error = CL_SUCCESS;
600 size_t warp_size = 0;
601 const char *dummy_kernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
603 cl_program program =
604 clCreateProgramWithSource(context, 1, (const char**)&dummy_kernel, NULL, &cl_error);
606 cl_error =
607 clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
609 cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
611 cl_error = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
612 sizeof(size_t), &warp_size, NULL);
614 clReleaseKernel(kernel);
615 clReleaseProgram(program);
617 assert(warp_size != 0);
618 assert(cl_error == CL_SUCCESS);
619 return warp_size;
623 /*! \brief Automatically select vendor-specific kernel from vendor id
625 * \param vendor_id Vendor id enumerator (amd,nvidia,intel,unknown)
626 * \return Vendor-specific kernel version
628 static kernel_vendor_spec_t
629 ocl_autoselect_kernel_from_vendor(ocl_vendor_id_t vendor_id)
631 kernel_vendor_spec_t kernel_vendor;
632 #ifndef NDEBUG
633 printf("Selecting kernel source automatically\n");
634 #endif
635 switch (vendor_id)
637 case OCL_VENDOR_AMD:
638 kernel_vendor = amd_vendor_kernels;
639 printf("Selecting kernel for AMD\n");
640 break;
641 case OCL_VENDOR_NVIDIA:
642 kernel_vendor = nvidia_vendor_kernels;
643 printf("Selecting kernel for NVIDIA\n");
644 break;
645 default:
646 kernel_vendor = generic_vendor_kernels;
647 printf("Selecting generic kernel\n");
648 break;
650 return kernel_vendor;
653 /*! \brief Returns the compiler define string needed to activate vendor-specific kernels
655 * \param kernel_spec Kernel vendor specification
656 * \return String with the define for the spec
658 static const char *
659 ocl_get_vendor_specific_define(kernel_vendor_spec_t kernel_spec)
661 assert(kernel_spec < auto_vendor_kernels );
662 #ifndef NDEBUG
663 printf("Setting up kernel vendor spec definitions: %s \n", kernel_vendor_spec_definitions[kernel_spec]);
664 #endif
665 return kernel_vendor_spec_definitions[kernel_spec];
668 /*! \brief Check if there's a valid cache available, and return it if so
670 * \param[in] ocl_binary_filename Name of file containing the binary cache
671 * \param[in] build_options_string Compiler command-line options to use (currently unused)
672 * \param[in] ocl_source NULL-terminated string of OpenCL source code (currently unused)
673 * \param[out] ocl_binary_size Size of the binary file once loaded in memory
674 * \param[out] ocl_binary Pointer to the binary file bytes (valid only if return is true)
675 * \return Whether the file reading was successful
677 * \todo Compare current build options and code against the build
678 * options and the code corresponding to the cache. If any change is
679 * detected this function must return false.
681 bool
682 check_ocl_cache(char *ocl_binary_filename,
683 char gmx_unused *build_options_string,
684 char gmx_unused *ocl_source,
685 size_t *ocl_binary_size,
686 unsigned char **ocl_binary)
688 FILE *f;
689 size_t read_count;
691 f = fopen(ocl_binary_filename, "rb");
692 if (!f)
694 return false;
697 fseek(f, 0, SEEK_END);
698 *ocl_binary_size = ftell(f);
699 *ocl_binary = (unsigned char*)malloc(*ocl_binary_size);
700 fseek(f, 0, SEEK_SET);
701 read_count = fread(*ocl_binary, 1, *ocl_binary_size, f);
702 fclose(f);
704 if (read_count != (*ocl_binary_size))
706 return false;
709 return true;
712 /*! \brief Builds a string with build options for the OpenCL kernels
714 * \throws std::bad_alloc if out of memory */
715 char*
716 ocl_get_build_options_string(cl_context context,
717 cl_device_id device_id,
718 kernel_vendor_spec_t kernel_vendor_spec,
719 ocl_vendor_id_t ocl_device_vendor,
720 const char * defines_for_kernel_types,
721 const char * runtime_consts)
723 char * build_options_string = NULL;
724 char custom_build_options_prepend[1024] = { 0 };
725 char *custom_build_options_append = NULL;
726 cl_int warp_size = 0;
728 /* Get the reported warp size. Compile a small dummy kernel to do so */
729 warp_size = ocl_get_warp_size(context, device_id);
731 /* Select vendor specific kernels automatically */
732 if (kernel_vendor_spec == auto_vendor_kernels)
734 kernel_vendor_spec = ocl_autoselect_kernel_from_vendor(ocl_device_vendor);
737 /* Create include paths for kernel sources.
738 All OpenCL kernel files are expected to be stored in one single folder. */
740 /* Apple does not seem to accept the quoted include paths other
741 * OpenCL implementations are happy with. Since the standard still says
742 * it should be quoted, we handle Apple as a special case.
744 #ifdef __APPLE__
745 std::string unescaped_ocl_root_path = get_ocl_root_path();
746 std::string ocl_root_path;
748 char incl_opt_start[] = "-I";
749 char incl_opt_end[] = "";
751 for (std::string::size_type i = 0; i < unescaped_ocl_root_path.length(); i++)
753 if (unescaped_ocl_root_path[i] == ' ')
755 ocl_root_path.push_back('\\');
757 ocl_root_path.push_back(unescaped_ocl_root_path[i]);
759 // Here the Apple ocl_root_path has all spaces prepended with a backslash
760 #else
761 std::string ocl_root_path = get_ocl_root_path();
763 char incl_opt_start[] = "-I\"";
764 char incl_opt_end[] = "\"";
766 #endif
767 size_t chars = 0;
769 custom_build_options_append =
770 (char*)calloc((ocl_root_path.length() /* Path to the OpenCL folder */
771 + strlen(incl_opt_start) /* -I" */
772 + strlen(incl_opt_end) /* " */
773 + 1 /* null char */
774 ), 1);
776 strncpy(&custom_build_options_append[chars], incl_opt_start, strlen(incl_opt_start));
777 chars += strlen(incl_opt_start);
779 strncpy(&custom_build_options_append[chars], ocl_root_path.c_str(), ocl_root_path.length());
780 chars += ocl_root_path.length();
782 strncpy(&custom_build_options_append[chars], incl_opt_end, strlen(incl_opt_end));
785 /* Get vendor specific define (amd,nvidia,nowarp) */
786 const char * kernel_vendor_spec_define =
787 ocl_get_vendor_specific_define(kernel_vendor_spec);
789 /* Compose the build options to be prepended. */
790 sprintf(custom_build_options_prepend,
791 "-DWARP_SIZE_TEST=%d %s %s %s",
792 warp_size,
793 kernel_vendor_spec_define,
794 defines_for_kernel_types,
795 runtime_consts ? runtime_consts : ""
798 /* Get the size of the complete build options string */
799 size_t build_options_length =
800 create_ocl_build_options_length(
801 ocl_device_vendor,
802 custom_build_options_prepend,
803 custom_build_options_append
806 build_options_string = (char *)malloc(build_options_length);
808 /* Compose the complete build options */
809 create_ocl_build_options(
810 build_options_string,
811 build_options_length,
812 ocl_device_vendor,
813 custom_build_options_prepend,
814 custom_build_options_append
817 if (custom_build_options_append)
819 free(custom_build_options_append);
822 return build_options_string;
825 /*! \brief Implement caching of OpenCL binaries
827 * \param[in] program Index of program to cache
828 * \param[in] file_name Name of file to use for the cache
830 void
831 print_ocl_binaries_to_file(cl_program program, char* file_name)
833 size_t ocl_binary_size = 0;
834 unsigned char *ocl_binary = NULL;
836 clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &ocl_binary_size, NULL);
838 ocl_binary = (unsigned char*)malloc(ocl_binary_size);
840 clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char *), &ocl_binary, NULL);
842 FILE *f = fopen(file_name, "wb");
843 fwrite(ocl_binary, 1, ocl_binary_size, f);
844 fclose(f);
846 free(ocl_binary);
849 /*! \brief Compile the kernels as described by kernel src id and vendor spec
851 * \param[in] kernel_source_file Index of the kernel src to be used (default)
852 * \param[in] kernel_vendor_spec Vendor-specific compilation (auto,nvidia,amd,nowarp)
853 * \param[in] defines_for_kernel_types Preprocessor defines that trigger the compilation of the kernels
854 * \param[out] result_str Gromacs error string
855 * \param[in] context Current context on the device to compile for
856 * \param[in] device_id OpenCL device id of the device to compile for
857 * \param[in] ocl_device_vendor Enumerator of the device vendor to compile for
858 * \param[out] p_program Pointer to the cl_program where the compiled
859 * cl_program will be stored
860 * \param[in] runtime_consts Optional string with runtime constants.
861 * Each constant is given according to the following
862 * format: "-Dname=value".
863 * Multiple defines are separated by blanks.
865 * \return cl_int with the build status AND any other OpenCL error appended to it
867 * \todo Consider whether we can parallelize the compilation of all
868 * the kernels by compiling them in separate programs - but since the
869 * resulting programs can't refer to each other, that might lead to
870 * bloat of util code?
872 * \throws std::bad_alloc if out of memory
874 cl_int
875 ocl_compile_program(
876 kernel_source_index_t kernel_source_file,
877 kernel_vendor_spec_t kernel_vendor_spec,
878 const char * defines_for_kernel_types,
879 char * result_str,
880 cl_context context,
881 cl_device_id device_id,
882 ocl_vendor_id_t ocl_device_vendor,
883 cl_program * p_program,
884 const char * runtime_consts
887 char * build_options_string = NULL;
888 cl_int cl_error = CL_SUCCESS;
890 char * ocl_source = NULL;
891 size_t ocl_source_length = 0;
892 size_t kernel_filename_len = 0;
894 bool bCacheOclBuild = false;
895 bool bOclCacheValid = false;
897 char ocl_binary_filename[256] = { 0 };
898 size_t ocl_binary_size = 0;
899 unsigned char *ocl_binary = NULL;
901 /* Load OpenCL source files */
903 char* kernel_filename = NULL;
905 /* Get the size of the kernel source filename */
906 kernel_filename_len = get_ocl_kernel_source_file_info(kernel_source_file);
907 if (kernel_filename_len)
909 kernel_filename = (char*)malloc(kernel_filename_len);
912 /* Get the actual full path and name of the source file with the kernels */
913 get_ocl_kernel_source_path(kernel_filename, kernel_source_file, kernel_filename_len);
915 /* Load the above source file and store its contents in ocl_source */
916 ocl_source = load_ocl_source(kernel_filename, &ocl_source_length);
918 if (!ocl_source)
920 sprintf(result_str, "Error loading OpenCL code %s", kernel_filename);
921 return CL_BUILD_PROGRAM_FAILURE;
924 /* The sources are loaded so the filename is not needed anymore */
925 free(kernel_filename);
928 /* Allocate and initialize the string with build options */
929 build_options_string =
930 ocl_get_build_options_string(context, device_id, kernel_vendor_spec,
931 ocl_device_vendor,
932 defines_for_kernel_types,
933 runtime_consts);
935 /* Check if OpenCL caching is ON - currently caching is disabled
936 until we resolve concurrency issues. */
937 /* bCacheOclBuild = (NULL == getenv("GMX_OCL_NOGENCACHE"));*/
938 if (bCacheOclBuild)
940 clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(ocl_binary_filename), ocl_binary_filename, NULL);
941 strcat(ocl_binary_filename, ".bin");
943 /* Check if there's a valid cache available */
944 bOclCacheValid = check_ocl_cache(ocl_binary_filename,
945 build_options_string,
946 ocl_source,
947 &ocl_binary_size, &ocl_binary);
950 /* Create OpenCL program */
951 if (bCacheOclBuild && bOclCacheValid)
953 /* Create program from pre-built binaries */
954 *p_program =
955 clCreateProgramWithBinary(
956 context,
958 &device_id,
959 &ocl_binary_size,
960 (const unsigned char**)&ocl_binary,
961 NULL,
962 &cl_error);
964 else
966 /* Create program from source code */
967 *p_program =
968 clCreateProgramWithSource(
969 context,
971 (const char**)(&ocl_source),
972 &ocl_source_length,
973 &cl_error
977 /* Build program */
978 cl_int build_status = CL_SUCCESS;
980 /* Now we are ready to launch the build */
981 build_status =
982 clBuildProgram(*p_program, 0, NULL, build_options_string, NULL, NULL);
984 if (build_status == CL_SUCCESS)
986 if (bCacheOclBuild)
988 /* If OpenCL caching is ON, but the current cache is not
989 valid => update it */
990 if (!bOclCacheValid)
992 print_ocl_binaries_to_file(*p_program, ocl_binary_filename);
995 else
996 if ((OCL_VENDOR_NVIDIA == ocl_device_vendor) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
998 /* If dumping intermediate files has been requested and this is an NVIDIA card
999 => write PTX to file */
1000 char ptx_filename[256];
1002 clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(ptx_filename), ptx_filename, NULL);
1003 strcat(ptx_filename, ".ptx");
1005 print_ocl_binaries_to_file(*p_program, ptx_filename);
1009 // Get log string size
1010 size_t build_log_size = 0;
1011 cl_error =
1012 clGetProgramBuildInfo(
1013 *p_program,
1014 device_id,
1015 CL_PROGRAM_BUILD_LOG,
1017 NULL,
1018 &build_log_size
1021 /* Regardless of success or failure, if there is something in the log
1022 * we might need to display it */
1023 if (build_log_size && (cl_error == CL_SUCCESS) )
1025 char *build_log = NULL;
1027 /* Allocate memory to fit the build log,
1028 it can be very large in case of errors */
1029 build_log = (char*)malloc(build_log_size);
1031 if (build_log)
1033 /* Get the actual compilation log */
1034 cl_error =
1035 clGetProgramBuildInfo(
1036 *p_program,
1037 device_id,
1038 CL_PROGRAM_BUILD_LOG,
1039 build_log_size,
1040 build_log,
1041 NULL
1044 /* Save or display the log */
1045 if (!cl_error)
1047 handle_ocl_build_log(
1048 build_log,
1049 build_options_string,
1050 build_status,
1051 kernel_source_file
1055 /* Build_log not needed anymore */
1056 free(build_log);
1061 /* Final clean up */
1062 if (ocl_binary)
1064 free(ocl_binary);
1067 if (build_options_string)
1069 free(build_options_string);
1072 if (ocl_source)
1074 free(ocl_source);
1077 /* Append any other error to the build_status */
1078 return build_status | cl_error;