[AArch64] Improve SVE constant moves
[official-gcc.git] / libgomp / plugin / plugin-hsa.c
blob80f23f9beb628135b857417ccf3df802b5f4c3a8
1 /* Plugin for HSAIL execution.
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
5 Contributed by Martin Jambor <mjambor@suse.cz> and
6 Martin Liska <mliska@suse.cz>.
8 This file is part of the GNU Offloading and Multi Processing Library
9 (libgomp).
11 Libgomp is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
14 any later version.
16 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 more details.
21 Under Section 7 of GPL version 3, you are granted additional
22 permissions described in the GCC Runtime Library Exception, version
23 3.1, as published by the Free Software Foundation.
25 You should have received a copy of the GNU General Public License and
26 a copy of the GCC Runtime Library Exception along with this program;
27 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
28 <http://www.gnu.org/licenses/>. */
30 #include "config.h"
31 #include "gstdint.h"
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <pthread.h>
36 #ifdef HAVE_INTTYPES_H
37 #include <inttypes.h>
38 #endif
39 #include <stdbool.h>
40 #include <hsa.h>
41 #include <plugin/hsa_ext_finalize.h>
42 #include <dlfcn.h>
43 #include "libgomp-plugin.h"
44 #include "gomp-constants.h"
45 #include "secure_getenv.h"
47 #ifdef HAVE_INTTYPES_H
48 typedef uint64_t print_uint64_t;
49 #else
50 #define PRIu64 "lu"
51 typedef unsigned long print_uint64_t;
52 #endif
54 /* As an HSA runtime is dlopened, following structure defines function
55 pointers utilized by the HSA plug-in. */
57 struct hsa_runtime_fn_info
59 /* HSA runtime. */
60 hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
61 const char **status_string);
62 hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
63 hsa_agent_info_t attribute,
64 void *value);
65 hsa_status_t (*hsa_init_fn) (void);
66 hsa_status_t (*hsa_iterate_agents_fn)
67 (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
68 hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
69 hsa_region_info_t attribute,
70 void *value);
71 hsa_status_t (*hsa_queue_create_fn)
72 (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
73 void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
74 void *data, uint32_t private_segment_size,
75 uint32_t group_segment_size, hsa_queue_t **queue);
76 hsa_status_t (*hsa_agent_iterate_regions_fn)
77 (hsa_agent_t agent,
78 hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
79 hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
80 hsa_status_t (*hsa_executable_create_fn)
81 (hsa_profile_t profile, hsa_executable_state_t executable_state,
82 const char *options, hsa_executable_t *executable);
83 hsa_status_t (*hsa_executable_global_variable_define_fn)
84 (hsa_executable_t executable, const char *variable_name, void *address);
85 hsa_status_t (*hsa_executable_load_code_object_fn)
86 (hsa_executable_t executable, hsa_agent_t agent,
87 hsa_code_object_t code_object, const char *options);
88 hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
89 const char *options);
90 hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
91 uint32_t num_consumers,
92 const hsa_agent_t *consumers,
93 hsa_signal_t *signal);
94 hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
95 void **ptr);
96 hsa_status_t (*hsa_memory_free_fn) (void *ptr);
97 hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
98 hsa_status_t (*hsa_executable_get_symbol_fn)
99 (hsa_executable_t executable, const char *module_name,
100 const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
101 hsa_executable_symbol_t *symbol);
102 hsa_status_t (*hsa_executable_symbol_get_info_fn)
103 (hsa_executable_symbol_t executable_symbol,
104 hsa_executable_symbol_info_t attribute, void *value);
105 uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
106 uint64_t value);
107 uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
108 void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
109 hsa_signal_value_t value);
110 void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
111 hsa_signal_value_t value);
112 hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
113 (hsa_signal_t signal, hsa_signal_condition_t condition,
114 hsa_signal_value_t compare_value, uint64_t timeout_hint,
115 hsa_wait_state_t wait_state_hint);
116 hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
117 hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
119 /* HSA finalizer. */
120 hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
121 hsa_ext_module_t module);
122 hsa_status_t (*hsa_ext_program_create_fn)
123 (hsa_machine_model_t machine_model, hsa_profile_t profile,
124 hsa_default_float_rounding_mode_t default_float_rounding_mode,
125 const char *options, hsa_ext_program_t *program);
126 hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
127 hsa_status_t (*hsa_ext_program_finalize_fn)
128 (hsa_ext_program_t program,hsa_isa_t isa,
129 int32_t call_convention, hsa_ext_control_directives_t control_directives,
130 const char *options, hsa_code_object_type_t code_object_type,
131 hsa_code_object_t *code_object);
134 /* HSA runtime functions that are initialized in init_hsa_context. */
136 static struct hsa_runtime_fn_info hsa_fns;
138 /* Keep the following GOMP prefixed structures in sync with respective parts of
139 the compiler. */
141 /* Structure describing the run-time and grid properties of an HSA kernel
142 lauch. */
144 struct GOMP_kernel_launch_attributes
146 /* Number of dimensions the workload has. Maximum number is 3. */
147 uint32_t ndim;
148 /* Size of the grid in the three respective dimensions. */
149 uint32_t gdims[3];
150 /* Size of work-groups in the respective dimensions. */
151 uint32_t wdims[3];
154 /* Collection of information needed for a dispatch of a kernel from a
155 kernel. */
157 struct GOMP_hsa_kernel_dispatch
159 /* Pointer to a command queue associated with a kernel dispatch agent. */
160 void *queue;
161 /* Pointer to reserved memory for OMP data struct copying. */
162 void *omp_data_memory;
163 /* Pointer to a memory space used for kernel arguments passing. */
164 void *kernarg_address;
165 /* Kernel object. */
166 uint64_t object;
167 /* Synchronization signal used for dispatch synchronization. */
168 uint64_t signal;
169 /* Private segment size. */
170 uint32_t private_segment_size;
171 /* Group segment size. */
172 uint32_t group_segment_size;
173 /* Number of children kernel dispatches. */
174 uint64_t kernel_dispatch_count;
175 /* Debug purpose argument. */
176 uint64_t debug;
177 /* Levels-var ICV. */
178 uint64_t omp_level;
179 /* Kernel dispatch structures created for children kernel dispatches. */
180 struct GOMP_hsa_kernel_dispatch **children_dispatches;
181 /* Number of threads. */
182 uint32_t omp_num_threads;
185 /* Part of the libgomp plugin interface. Return the name of the accelerator,
186 which is "hsa". */
188 const char *
189 GOMP_OFFLOAD_get_name (void)
191 return "hsa";
194 /* Part of the libgomp plugin interface. Return the specific capabilities the
195 HSA accelerator have. */
197 unsigned int
198 GOMP_OFFLOAD_get_caps (void)
200 return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
203 /* Part of the libgomp plugin interface. Identify as HSA accelerator. */
206 GOMP_OFFLOAD_get_type (void)
208 return OFFLOAD_TARGET_TYPE_HSA;
211 /* Return the libgomp version number we're compatible with. There is
212 no requirement for cross-version compatibility. */
214 unsigned
215 GOMP_OFFLOAD_version (void)
217 return GOMP_VERSION;
220 /* Flag to decide whether print to stderr information about what is going on.
221 Set in init_debug depending on environment variables. */
223 static bool debug;
225 /* Flag to decide if the runtime should suppress a possible fallback to host
226 execution. */
228 static bool suppress_host_fallback;
230 /* Flag to locate HSA runtime shared library that is dlopened
231 by this plug-in. */
233 static const char *hsa_runtime_lib;
235 /* Flag to decide if the runtime should support also CPU devices (can be
236 a simulator). */
238 static bool support_cpu_devices;
240 /* Initialize debug and suppress_host_fallback according to the environment. */
242 static void
243 init_enviroment_variables (void)
245 if (secure_getenv ("HSA_DEBUG"))
246 debug = true;
247 else
248 debug = false;
250 if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
251 suppress_host_fallback = true;
252 else
253 suppress_host_fallback = false;
255 hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
256 if (hsa_runtime_lib == NULL)
257 hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so";
259 support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
262 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
263 is set to true. */
265 #define HSA_LOG(prefix, ...) \
266 do \
268 if (debug) \
270 fprintf (stderr, prefix); \
271 fprintf (stderr, __VA_ARGS__); \
274 while (false)
276 /* Print a debugging message to stderr. */
278 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
280 /* Print a warning message to stderr. */
282 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
284 /* Print HSA warning STR with an HSA STATUS code. */
286 static void
287 hsa_warn (const char *str, hsa_status_t status)
289 if (!debug)
290 return;
292 const char *hsa_error_msg;
293 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
295 fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
298 /* Report a fatal error STR together with the HSA error corresponding to STATUS
299 and terminate execution of the current process. */
301 static void
302 hsa_fatal (const char *str, hsa_status_t status)
304 const char *hsa_error_msg;
305 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
306 GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
307 hsa_error_msg);
310 /* Like hsa_fatal, except only report error message, and return FALSE
311 for propagating error processing to outside of plugin. */
313 static bool
314 hsa_error (const char *str, hsa_status_t status)
316 const char *hsa_error_msg;
317 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
318 GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
319 hsa_error_msg);
320 return false;
323 struct hsa_kernel_description
325 const char *name;
326 unsigned omp_data_size;
327 bool gridified_kernel_p;
328 unsigned kernel_dependencies_count;
329 const char **kernel_dependencies;
332 struct global_var_info
334 const char *name;
335 void *address;
338 /* Data passed by the static initializer of a compilation unit containing BRIG
339 to GOMP_offload_register. */
341 struct brig_image_desc
343 hsa_ext_module_t brig_module;
344 const unsigned kernel_count;
345 struct hsa_kernel_description *kernel_infos;
346 const unsigned global_variable_count;
347 struct global_var_info *global_variables;
350 struct agent_info;
352 /* Information required to identify, finalize and run any given kernel. */
354 struct kernel_info
356 /* Name of the kernel, required to locate it within the brig module. */
357 const char *name;
358 /* Size of memory space for OMP data. */
359 unsigned omp_data_size;
360 /* The specific agent the kernel has been or will be finalized for and run
361 on. */
362 struct agent_info *agent;
363 /* The specific module where the kernel takes place. */
364 struct module_info *module;
365 /* Mutex enforcing that at most once thread ever initializes a kernel for
366 use. A thread should have locked agent->modules_rwlock for reading before
367 acquiring it. */
368 pthread_mutex_t init_mutex;
369 /* Flag indicating whether the kernel has been initialized and all fields
370 below it contain valid data. */
371 bool initialized;
372 /* Flag indicating that the kernel has a problem that blocks an execution. */
373 bool initialization_failed;
374 /* The object to be put into the dispatch queue. */
375 uint64_t object;
376 /* Required size of kernel arguments. */
377 uint32_t kernarg_segment_size;
378 /* Required size of group segment. */
379 uint32_t group_segment_size;
380 /* Required size of private segment. */
381 uint32_t private_segment_size;
382 /* List of all kernel dependencies. */
383 const char **dependencies;
384 /* Number of dependencies. */
385 unsigned dependencies_count;
386 /* Maximum OMP data size necessary for kernel from kernel dispatches. */
387 unsigned max_omp_data_size;
388 /* True if the kernel is gridified. */
389 bool gridified_kernel_p;
392 /* Information about a particular brig module, its image and kernels. */
394 struct module_info
396 /* The next and previous module in the linked list of modules of an agent. */
397 struct module_info *next, *prev;
398 /* The description with which the program has registered the image. */
399 struct brig_image_desc *image_desc;
401 /* Number of kernels in this module. */
402 int kernel_count;
403 /* An array of kernel_info structures describing each kernel in this
404 module. */
405 struct kernel_info kernels[];
408 /* Information about shared brig library. */
410 struct brig_library_info
412 char *file_name;
413 hsa_ext_module_t image;
416 /* Description of an HSA GPU agent and the program associated with it. */
418 struct agent_info
420 /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
421 hsa_agent_t id;
422 /* Whether the agent has been initialized. The fields below are usable only
423 if it has been. */
424 bool initialized;
425 /* The HSA ISA of this agent. */
426 hsa_isa_t isa;
427 /* Command queue of the agent. */
428 hsa_queue_t *command_q;
429 /* Kernel from kernel dispatch command queue. */
430 hsa_queue_t *kernel_dispatch_command_q;
431 /* The HSA memory region from which to allocate kernel arguments. */
432 hsa_region_t kernarg_region;
434 /* Read-write lock that protects kernels which are running or about to be run
435 from interference with loading and unloading of images. Needs to be
436 locked for reading while a kernel is being run, and for writing if the
437 list of modules is manipulated (and thus the HSA program invalidated). */
438 pthread_rwlock_t modules_rwlock;
439 /* The first module in a linked list of modules associated with this
440 kernel. */
441 struct module_info *first_module;
443 /* Mutex enforcing that only one thread will finalize the HSA program. A
444 thread should have locked agent->modules_rwlock for reading before
445 acquiring it. */
446 pthread_mutex_t prog_mutex;
447 /* Flag whether the HSA program that consists of all the modules has been
448 finalized. */
449 bool prog_finalized;
450 /* Flag whether the program was finalized but with a failure. */
451 bool prog_finalized_error;
452 /* HSA executable - the finalized program that is used to locate kernels. */
453 hsa_executable_t executable;
454 /* List of BRIG libraries. */
455 struct brig_library_info **brig_libraries;
456 /* Number of loaded shared BRIG libraries. */
457 unsigned brig_libraries_count;
460 /* Information about the whole HSA environment and all of its agents. */
462 struct hsa_context_info
464 /* Whether the structure has been initialized. */
465 bool initialized;
466 /* Number of usable GPU HSA agents in the system. */
467 int agent_count;
468 /* Array of agent_info structures describing the individual HSA agents. */
469 struct agent_info *agents;
472 /* Information about the whole HSA environment and all of its agents. */
474 static struct hsa_context_info hsa_context;
476 #define DLSYM_FN(function) \
477 hsa_fns.function##_fn = dlsym (handle, #function); \
478 if (hsa_fns.function##_fn == NULL) \
479 goto dl_fail;
481 static bool
482 init_hsa_runtime_functions (void)
484 void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
485 if (handle == NULL)
486 goto dl_fail;
488 DLSYM_FN (hsa_status_string)
489 DLSYM_FN (hsa_agent_get_info)
490 DLSYM_FN (hsa_init)
491 DLSYM_FN (hsa_iterate_agents)
492 DLSYM_FN (hsa_region_get_info)
493 DLSYM_FN (hsa_queue_create)
494 DLSYM_FN (hsa_agent_iterate_regions)
495 DLSYM_FN (hsa_executable_destroy)
496 DLSYM_FN (hsa_executable_create)
497 DLSYM_FN (hsa_executable_global_variable_define)
498 DLSYM_FN (hsa_executable_load_code_object)
499 DLSYM_FN (hsa_executable_freeze)
500 DLSYM_FN (hsa_signal_create)
501 DLSYM_FN (hsa_memory_allocate)
502 DLSYM_FN (hsa_memory_free)
503 DLSYM_FN (hsa_signal_destroy)
504 DLSYM_FN (hsa_executable_get_symbol)
505 DLSYM_FN (hsa_executable_symbol_get_info)
506 DLSYM_FN (hsa_queue_add_write_index_release)
507 DLSYM_FN (hsa_queue_load_read_index_acquire)
508 DLSYM_FN (hsa_signal_wait_acquire)
509 DLSYM_FN (hsa_signal_store_relaxed)
510 DLSYM_FN (hsa_signal_store_release)
511 DLSYM_FN (hsa_signal_load_acquire)
512 DLSYM_FN (hsa_queue_destroy)
513 DLSYM_FN (hsa_ext_program_add_module)
514 DLSYM_FN (hsa_ext_program_create)
515 DLSYM_FN (hsa_ext_program_destroy)
516 DLSYM_FN (hsa_ext_program_finalize)
517 return true;
519 dl_fail:
520 HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ());
521 return false;
524 /* Find kernel for an AGENT by name provided in KERNEL_NAME. */
526 static struct kernel_info *
527 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
529 struct module_info *module = agent->first_module;
531 while (module)
533 for (unsigned i = 0; i < module->kernel_count; i++)
534 if (strcmp (module->kernels[i].name, kernel_name) == 0)
535 return &module->kernels[i];
537 module = module->next;
540 return NULL;
543 /* Return true if the agent is a GPU and acceptable of concurrent submissions
544 from different threads. */
546 static bool
547 suitable_hsa_agent_p (hsa_agent_t agent)
549 hsa_device_type_t device_type;
550 hsa_status_t status
551 = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
552 &device_type);
553 if (status != HSA_STATUS_SUCCESS)
554 return false;
556 switch (device_type)
558 case HSA_DEVICE_TYPE_GPU:
559 break;
560 case HSA_DEVICE_TYPE_CPU:
561 if (!support_cpu_devices)
562 return false;
563 break;
564 default:
565 return false;
568 uint32_t features = 0;
569 status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
570 &features);
571 if (status != HSA_STATUS_SUCCESS
572 || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
573 return false;
574 hsa_queue_type_t queue_type;
575 status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
576 &queue_type);
577 if (status != HSA_STATUS_SUCCESS
578 || (queue_type != HSA_QUEUE_TYPE_MULTI))
579 return false;
581 return true;
584 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
585 agent_count in hsa_context. */
587 static hsa_status_t
588 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
590 if (suitable_hsa_agent_p (agent))
591 hsa_context.agent_count++;
592 return HSA_STATUS_SUCCESS;
595 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
596 id to the describing structure in the hsa context. The index of the
597 structure is pointed to by DATA, increment it afterwards. */
599 static hsa_status_t
600 assign_agent_ids (hsa_agent_t agent, void *data)
602 if (suitable_hsa_agent_p (agent))
604 int *agent_index = (int *) data;
605 hsa_context.agents[*agent_index].id = agent;
606 ++*agent_index;
608 return HSA_STATUS_SUCCESS;
611 /* Initialize hsa_context if it has not already been done.
612 Return TRUE on success. */
614 static bool
615 init_hsa_context (void)
617 hsa_status_t status;
618 int agent_index = 0;
620 if (hsa_context.initialized)
621 return true;
622 init_enviroment_variables ();
623 if (!init_hsa_runtime_functions ())
625 HSA_DEBUG ("Run-time could not be dynamically opened\n");
626 return false;
628 status = hsa_fns.hsa_init_fn ();
629 if (status != HSA_STATUS_SUCCESS)
630 return hsa_error ("Run-time could not be initialized", status);
631 HSA_DEBUG ("HSA run-time initialized\n");
632 status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
633 if (status != HSA_STATUS_SUCCESS)
634 return hsa_error ("HSA GPU devices could not be enumerated", status);
635 HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
637 hsa_context.agents
638 = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
639 * sizeof (struct agent_info));
640 status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
641 if (agent_index != hsa_context.agent_count)
643 GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
644 return false;
646 hsa_context.initialized = true;
647 return true;
650 /* Callback of dispatch queues to report errors. */
652 static void
653 queue_callback (hsa_status_t status,
654 hsa_queue_t *queue __attribute__ ((unused)),
655 void *data __attribute__ ((unused)))
657 hsa_fatal ("Asynchronous queue error", status);
660 /* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
661 used for kernarg allocations and if so write it to the memory pointed to by
662 DATA and break the query. */
664 static hsa_status_t
665 get_kernarg_memory_region (hsa_region_t region, void *data)
667 hsa_status_t status;
668 hsa_region_segment_t segment;
670 status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
671 &segment);
672 if (status != HSA_STATUS_SUCCESS)
673 return status;
674 if (segment != HSA_REGION_SEGMENT_GLOBAL)
675 return HSA_STATUS_SUCCESS;
677 uint32_t flags;
678 status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
679 &flags);
680 if (status != HSA_STATUS_SUCCESS)
681 return status;
682 if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
684 hsa_region_t *ret = (hsa_region_t *) data;
685 *ret = region;
686 return HSA_STATUS_INFO_BREAK;
688 return HSA_STATUS_SUCCESS;
691 /* Part of the libgomp plugin interface. Return the number of HSA devices on
692 the system. */
695 GOMP_OFFLOAD_get_num_devices (void)
697 if (!init_hsa_context ())
698 return 0;
699 return hsa_context.agent_count;
702 /* Part of the libgomp plugin interface. Initialize agent number N so that it
703 can be used for computation. Return TRUE on success. */
705 bool
706 GOMP_OFFLOAD_init_device (int n)
708 if (!init_hsa_context ())
709 return false;
710 if (n >= hsa_context.agent_count)
712 GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
713 return false;
715 struct agent_info *agent = &hsa_context.agents[n];
717 if (agent->initialized)
718 return true;
720 if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
722 GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
723 return false;
725 if (pthread_mutex_init (&agent->prog_mutex, NULL))
727 GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
728 return false;
731 uint32_t queue_size;
732 hsa_status_t status;
733 status = hsa_fns.hsa_agent_get_info_fn (agent->id,
734 HSA_AGENT_INFO_QUEUE_MAX_SIZE,
735 &queue_size);
736 if (status != HSA_STATUS_SUCCESS)
737 return hsa_error ("Error requesting maximum queue size of the HSA agent",
738 status);
739 status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
740 &agent->isa);
741 if (status != HSA_STATUS_SUCCESS)
742 return hsa_error ("Error querying the ISA of the agent", status);
743 status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
744 HSA_QUEUE_TYPE_MULTI,
745 queue_callback, NULL, UINT32_MAX,
746 UINT32_MAX,
747 &agent->command_q);
748 if (status != HSA_STATUS_SUCCESS)
749 return hsa_error ("Error creating command queue", status);
751 status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
752 HSA_QUEUE_TYPE_MULTI,
753 queue_callback, NULL, UINT32_MAX,
754 UINT32_MAX,
755 &agent->kernel_dispatch_command_q);
756 if (status != HSA_STATUS_SUCCESS)
757 return hsa_error ("Error creating kernel dispatch command queue", status);
759 agent->kernarg_region.handle = (uint64_t) -1;
760 status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
761 get_kernarg_memory_region,
762 &agent->kernarg_region);
763 if (agent->kernarg_region.handle == (uint64_t) -1)
765 GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
766 "arguments");
767 return false;
769 HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
770 (long long unsigned) agent->command_q->id);
771 HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
772 (long long unsigned) agent->kernel_dispatch_command_q->id);
773 agent->initialized = true;
774 return true;
777 /* Verify that hsa_context has already been initialized and return the
778 agent_info structure describing device number N. Return NULL on error. */
780 static struct agent_info *
781 get_agent_info (int n)
783 if (!hsa_context.initialized)
785 GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
786 return NULL;
788 if (n >= hsa_context.agent_count)
790 GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
791 return NULL;
793 if (!hsa_context.agents[n].initialized)
795 GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
796 return NULL;
798 return &hsa_context.agents[n];
801 /* Insert MODULE to the linked list of modules of AGENT. */
803 static void
804 add_module_to_agent (struct agent_info *agent, struct module_info *module)
806 if (agent->first_module)
807 agent->first_module->prev = module;
808 module->next = agent->first_module;
809 module->prev = NULL;
810 agent->first_module = module;
813 /* Remove MODULE from the linked list of modules of AGENT. */
815 static void
816 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
818 if (agent->first_module == module)
819 agent->first_module = module->next;
820 if (module->prev)
821 module->prev->next = module->next;
822 if (module->next)
823 module->next->prev = module->prev;
826 /* Free the HSA program in agent and everything associated with it and set
827 agent->prog_finalized and the initialized flags of all kernels to false.
828 Return TRUE on success. */
830 static bool
831 destroy_hsa_program (struct agent_info *agent)
833 if (!agent->prog_finalized || agent->prog_finalized_error)
834 return true;
836 hsa_status_t status;
838 HSA_DEBUG ("Destroying the current HSA program.\n");
840 status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
841 if (status != HSA_STATUS_SUCCESS)
842 return hsa_error ("Could not destroy HSA executable", status);
844 struct module_info *module;
845 for (module = agent->first_module; module; module = module->next)
847 int i;
848 for (i = 0; i < module->kernel_count; i++)
849 module->kernels[i].initialized = false;
851 agent->prog_finalized = false;
852 return true;
855 /* Initialize KERNEL from D and other parameters. Return true on success. */
857 static bool
858 init_basic_kernel_info (struct kernel_info *kernel,
859 struct hsa_kernel_description *d,
860 struct agent_info *agent,
861 struct module_info *module)
863 kernel->agent = agent;
864 kernel->module = module;
865 kernel->name = d->name;
866 kernel->omp_data_size = d->omp_data_size;
867 kernel->gridified_kernel_p = d->gridified_kernel_p;
868 kernel->dependencies_count = d->kernel_dependencies_count;
869 kernel->dependencies = d->kernel_dependencies;
870 if (pthread_mutex_init (&kernel->init_mutex, NULL))
872 GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
873 return false;
875 return true;
878 /* Part of the libgomp plugin interface. Load BRIG module described by struct
879 brig_image_desc in TARGET_DATA and return references to kernel descriptors
880 in TARGET_TABLE. */
883 GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
884 struct addr_pair **target_table)
886 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
888 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
889 " (expected %u, received %u)",
890 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
891 return -1;
894 struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
895 struct agent_info *agent;
896 struct addr_pair *pair;
897 struct module_info *module;
898 struct kernel_info *kernel;
899 int kernel_count = image_desc->kernel_count;
901 agent = get_agent_info (ord);
902 if (!agent)
903 return -1;
905 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
907 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
908 return -1;
910 if (agent->prog_finalized
911 && !destroy_hsa_program (agent))
912 return -1;
914 HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
915 pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
916 *target_table = pair;
917 module = (struct module_info *)
918 GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
919 + kernel_count * sizeof (struct kernel_info));
920 module->image_desc = image_desc;
921 module->kernel_count = kernel_count;
923 kernel = &module->kernels[0];
925 /* Allocate memory for kernel dependencies. */
926 for (unsigned i = 0; i < kernel_count; i++)
928 pair->start = (uintptr_t) kernel;
929 pair->end = (uintptr_t) (kernel + 1);
931 struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
932 if (!init_basic_kernel_info (kernel, d, agent, module))
933 return -1;
934 kernel++;
935 pair++;
938 add_module_to_agent (agent, module);
939 if (pthread_rwlock_unlock (&agent->modules_rwlock))
941 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
942 return -1;
944 return kernel_count;
947 /* Add a shared BRIG library from a FILE_NAME to an AGENT. */
949 static struct brig_library_info *
950 add_shared_library (const char *file_name, struct agent_info *agent)
952 struct brig_library_info *library = NULL;
954 void *f = dlopen (file_name, RTLD_NOW);
955 void *start = dlsym (f, "__brig_start");
956 void *end = dlsym (f, "__brig_end");
958 if (start == NULL || end == NULL)
959 return NULL;
961 unsigned size = end - start;
962 char *buf = (char *) GOMP_PLUGIN_malloc (size);
963 memcpy (buf, start, size);
965 library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
966 library->file_name = (char *) GOMP_PLUGIN_malloc
967 ((strlen (file_name) + 1));
968 strcpy (library->file_name, file_name);
969 library->image = (hsa_ext_module_t) buf;
971 return library;
974 /* Release memory used for BRIG shared libraries that correspond
975 to an AGENT. */
977 static void
978 release_agent_shared_libraries (struct agent_info *agent)
980 for (unsigned i = 0; i < agent->brig_libraries_count; i++)
981 if (agent->brig_libraries[i])
983 free (agent->brig_libraries[i]->file_name);
984 free (agent->brig_libraries[i]->image);
985 free (agent->brig_libraries[i]);
988 free (agent->brig_libraries);
991 /* Create and finalize the program consisting of all loaded modules. */
993 static void
994 create_and_finalize_hsa_program (struct agent_info *agent)
996 hsa_status_t status;
997 hsa_ext_program_t prog_handle;
998 int mi = 0;
1000 if (pthread_mutex_lock (&agent->prog_mutex))
1001 GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
1002 if (agent->prog_finalized)
1003 goto final;
1005 status = hsa_fns.hsa_ext_program_create_fn
1006 (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
1007 HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
1008 NULL, &prog_handle);
1009 if (status != HSA_STATUS_SUCCESS)
1010 hsa_fatal ("Could not create an HSA program", status);
1012 HSA_DEBUG ("Created a finalized program\n");
1014 struct module_info *module = agent->first_module;
1015 while (module)
1017 status = hsa_fns.hsa_ext_program_add_module_fn
1018 (prog_handle, module->image_desc->brig_module);
1019 if (status != HSA_STATUS_SUCCESS)
1020 hsa_fatal ("Could not add a module to the HSA program", status);
1021 module = module->next;
1022 mi++;
1025 /* Load all shared libraries. */
1026 const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
1027 const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
1029 agent->brig_libraries_count = libraries_count;
1030 agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
1031 (sizeof (struct brig_library_info) * libraries_count);
1033 for (unsigned i = 0; i < libraries_count; i++)
1035 struct brig_library_info *library = add_shared_library (libraries[i],
1036 agent);
1037 if (library == NULL)
1039 HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1040 libraries[i]);
1041 continue;
1044 status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
1045 library->image);
1046 if (status != HSA_STATUS_SUCCESS)
1047 hsa_warn ("Could not add a shared BRIG library the HSA program",
1048 status);
1049 else
1050 HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1051 libraries[i]);
1054 hsa_ext_control_directives_t control_directives;
1055 memset (&control_directives, 0, sizeof (control_directives));
1056 hsa_code_object_t code_object;
1057 status = hsa_fns.hsa_ext_program_finalize_fn
1058 (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
1059 control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
1060 if (status != HSA_STATUS_SUCCESS)
1062 hsa_warn ("Finalization of the HSA program failed", status);
1063 goto failure;
1066 HSA_DEBUG ("Finalization done\n");
1067 hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
1069 status
1070 = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
1071 HSA_EXECUTABLE_STATE_UNFROZEN,
1072 "", &agent->executable);
1073 if (status != HSA_STATUS_SUCCESS)
1074 hsa_fatal ("Could not create HSA executable", status);
1076 module = agent->first_module;
1077 while (module)
1079 /* Initialize all global variables declared in the module. */
1080 for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
1082 struct global_var_info *var;
1083 var = &module->image_desc->global_variables[i];
1084 status = hsa_fns.hsa_executable_global_variable_define_fn
1085 (agent->executable, var->name, var->address);
1087 HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
1088 var->address);
1090 if (status != HSA_STATUS_SUCCESS)
1091 hsa_fatal ("Could not define a global variable in the HSA program",
1092 status);
1095 module = module->next;
1098 status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
1099 agent->id,
1100 code_object, "");
1101 if (status != HSA_STATUS_SUCCESS)
1102 hsa_fatal ("Could not add a code object to the HSA executable", status);
1103 status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
1104 if (status != HSA_STATUS_SUCCESS)
1105 hsa_fatal ("Could not freeze the HSA executable", status);
1107 HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1109 /* If all goes good, jump to final. */
1110 goto final;
1112 failure:
1113 agent->prog_finalized_error = true;
1115 final:
1116 agent->prog_finalized = true;
1118 if (pthread_mutex_unlock (&agent->prog_mutex))
1119 GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1122 /* Create kernel dispatch data structure for given KERNEL. */
1124 static struct GOMP_hsa_kernel_dispatch *
1125 create_single_kernel_dispatch (struct kernel_info *kernel,
1126 unsigned omp_data_size)
1128 struct agent_info *agent = kernel->agent;
1129 struct GOMP_hsa_kernel_dispatch *shadow
1130 = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
1132 shadow->queue = agent->command_q;
1133 shadow->omp_data_memory
1134 = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
1135 unsigned dispatch_count = kernel->dependencies_count;
1136 shadow->kernel_dispatch_count = dispatch_count;
1138 shadow->children_dispatches
1139 = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
1141 shadow->object = kernel->object;
1143 hsa_signal_t sync_signal;
1144 hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
1145 if (status != HSA_STATUS_SUCCESS)
1146 hsa_fatal ("Error creating the HSA sync signal", status);
1148 shadow->signal = sync_signal.handle;
1149 shadow->private_segment_size = kernel->private_segment_size;
1150 shadow->group_segment_size = kernel->group_segment_size;
1152 status
1153 = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
1154 kernel->kernarg_segment_size,
1155 &shadow->kernarg_address);
1156 if (status != HSA_STATUS_SUCCESS)
1157 hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
1159 return shadow;
1162 /* Release data structure created for a kernel dispatch in SHADOW argument. */
1164 static void
1165 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
1167 HSA_DEBUG ("Released kernel dispatch: %p has value: %" PRIu64 " (%p)\n",
1168 shadow, (print_uint64_t) shadow->debug,
1169 (void *) (uintptr_t) shadow->debug);
1171 hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
1173 hsa_signal_t s;
1174 s.handle = shadow->signal;
1175 hsa_fns.hsa_signal_destroy_fn (s);
1177 free (shadow->omp_data_memory);
1179 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1180 release_kernel_dispatch (shadow->children_dispatches[i]);
1182 free (shadow->children_dispatches);
1183 free (shadow);
1186 /* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
1187 to calculate maximum necessary memory for OMP data allocation. */
1189 static void
1190 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
1192 hsa_status_t status;
1193 struct agent_info *agent = kernel->agent;
1194 hsa_executable_symbol_t kernel_symbol;
1195 status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
1196 kernel->name, agent->id,
1197 0, &kernel_symbol);
1198 if (status != HSA_STATUS_SUCCESS)
1200 hsa_warn ("Could not find symbol for kernel in the code object", status);
1201 goto failure;
1203 HSA_DEBUG ("Located kernel %s\n", kernel->name);
1204 status = hsa_fns.hsa_executable_symbol_get_info_fn
1205 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
1206 if (status != HSA_STATUS_SUCCESS)
1207 hsa_fatal ("Could not extract a kernel object from its symbol", status);
1208 status = hsa_fns.hsa_executable_symbol_get_info_fn
1209 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1210 &kernel->kernarg_segment_size);
1211 if (status != HSA_STATUS_SUCCESS)
1212 hsa_fatal ("Could not get info about kernel argument size", status);
1213 status = hsa_fns.hsa_executable_symbol_get_info_fn
1214 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1215 &kernel->group_segment_size);
1216 if (status != HSA_STATUS_SUCCESS)
1217 hsa_fatal ("Could not get info about kernel group segment size", status);
1218 status = hsa_fns.hsa_executable_symbol_get_info_fn
1219 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1220 &kernel->private_segment_size);
1221 if (status != HSA_STATUS_SUCCESS)
1222 hsa_fatal ("Could not get info about kernel private segment size",
1223 status);
1225 HSA_DEBUG ("Kernel structure for %s fully initialized with "
1226 "following segment sizes: \n", kernel->name);
1227 HSA_DEBUG (" group_segment_size: %u\n",
1228 (unsigned) kernel->group_segment_size);
1229 HSA_DEBUG (" private_segment_size: %u\n",
1230 (unsigned) kernel->private_segment_size);
1231 HSA_DEBUG (" kernarg_segment_size: %u\n",
1232 (unsigned) kernel->kernarg_segment_size);
1233 HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size);
1234 HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
1236 if (kernel->omp_data_size > *max_omp_data_size)
1237 *max_omp_data_size = kernel->omp_data_size;
1239 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1241 struct kernel_info *dependency
1242 = get_kernel_for_agent (agent, kernel->dependencies[i]);
1244 if (dependency == NULL)
1246 HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1247 "dependency name: %s\n", kernel->name,
1248 kernel->dependencies[i]);
1249 goto failure;
1252 if (dependency->dependencies_count > 0)
1254 HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1255 "a depth bigger than one\n");
1256 goto failure;
1259 init_single_kernel (dependency, max_omp_data_size);
1262 return;
1264 failure:
1265 kernel->initialization_failed = true;
1268 /* Indent stream F by INDENT spaces. */
1270 static void
1271 indent_stream (FILE *f, unsigned indent)
1273 fprintf (f, "%*s", indent, "");
1276 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
1278 static void
1279 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1281 indent_stream (stderr, indent);
1282 fprintf (stderr, "this: %p\n", dispatch);
1283 indent_stream (stderr, indent);
1284 fprintf (stderr, "queue: %p\n", dispatch->queue);
1285 indent_stream (stderr, indent);
1286 fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1287 indent_stream (stderr, indent);
1288 fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1289 indent_stream (stderr, indent);
1290 fprintf (stderr, "object: %" PRIu64 "\n", (print_uint64_t) dispatch->object);
1291 indent_stream (stderr, indent);
1292 fprintf (stderr, "signal: %" PRIu64 "\n", (print_uint64_t) dispatch->signal);
1293 indent_stream (stderr, indent);
1294 fprintf (stderr, "private_segment_size: %u\n",
1295 dispatch->private_segment_size);
1296 indent_stream (stderr, indent);
1297 fprintf (stderr, "group_segment_size: %u\n",
1298 dispatch->group_segment_size);
1299 indent_stream (stderr, indent);
1300 fprintf (stderr, "children dispatches: %" PRIu64 "\n",
1301 (print_uint64_t) dispatch->kernel_dispatch_count);
1302 indent_stream (stderr, indent);
1303 fprintf (stderr, "omp_num_threads: %u\n",
1304 dispatch->omp_num_threads);
1305 fprintf (stderr, "\n");
1307 for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1308 print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1311 /* Create kernel dispatch data structure for a KERNEL and all its
1312 dependencies. */
1314 static struct GOMP_hsa_kernel_dispatch *
1315 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1317 struct GOMP_hsa_kernel_dispatch *shadow
1318 = create_single_kernel_dispatch (kernel, omp_data_size);
1319 shadow->omp_num_threads = 64;
1320 shadow->debug = 0;
1321 shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1323 /* Create kernel dispatch data structures. We do not allow to have
1324 a kernel dispatch with depth bigger than one. */
1325 for (unsigned i = 0; i < kernel->dependencies_count; i++)
1327 struct kernel_info *dependency
1328 = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1329 shadow->children_dispatches[i]
1330 = create_single_kernel_dispatch (dependency, omp_data_size);
1331 shadow->children_dispatches[i]->queue
1332 = kernel->agent->kernel_dispatch_command_q;
1333 shadow->children_dispatches[i]->omp_level = 1;
1336 return shadow;
1339 /* Do all the work that is necessary before running KERNEL for the first time.
1340 The function assumes the program has been created, finalized and frozen by
1341 create_and_finalize_hsa_program. */
1343 static void
1344 init_kernel (struct kernel_info *kernel)
1346 if (pthread_mutex_lock (&kernel->init_mutex))
1347 GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1348 if (kernel->initialized)
1350 if (pthread_mutex_unlock (&kernel->init_mutex))
1351 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1352 "mutex");
1354 return;
1357 /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1358 dispatch operation. */
1359 init_single_kernel (kernel, &kernel->max_omp_data_size);
1361 if (!kernel->initialization_failed)
1362 HSA_DEBUG ("\n");
1364 kernel->initialized = true;
1365 if (pthread_mutex_unlock (&kernel->init_mutex))
1366 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1367 "mutex");
1370 /* Parse the target attributes INPUT provided by the compiler and return true
1371 if we should run anything all. If INPUT is NULL, fill DEF with default
1372 values, then store INPUT or DEF into *RESULT. */
1374 static bool
1375 parse_target_attributes (void **input,
1376 struct GOMP_kernel_launch_attributes *def,
1377 struct GOMP_kernel_launch_attributes **result)
1379 if (!input)
1380 GOMP_PLUGIN_fatal ("No target arguments provided");
1382 bool attrs_found = false;
1383 while (*input)
1385 uintptr_t id = (uintptr_t) *input;
1386 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1387 && ((id & GOMP_TARGET_ARG_ID_MASK)
1388 == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1390 input++;
1391 attrs_found = true;
1392 break;
1395 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1396 input++;
1397 input++;
1400 if (!attrs_found)
1402 def->ndim = 1;
1403 def->gdims[0] = 1;
1404 def->gdims[1] = 1;
1405 def->gdims[2] = 1;
1406 def->wdims[0] = 1;
1407 def->wdims[1] = 1;
1408 def->wdims[2] = 1;
1409 *result = def;
1410 HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1411 return true;
1414 struct GOMP_kernel_launch_attributes *kla;
1415 kla = (struct GOMP_kernel_launch_attributes *) *input;
1416 *result = kla;
1417 if (kla->ndim == 0 || kla->ndim > 3)
1418 GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
1420 HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
1421 unsigned i;
1422 for (i = 0; i < kla->ndim; i++)
1424 HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i,
1425 kla->gdims[i], kla->wdims[i]);
1426 if (kla->gdims[i] == 0)
1427 return false;
1429 return true;
1432 /* Return the group size given the requested GROUP size, GRID size and number
1433 of grid dimensions NDIM. */
1435 static uint32_t
1436 get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
1438 if (group == 0)
1440 /* TODO: Provide a default via environment or device characteristics. */
1441 if (ndim == 1)
1442 group = 64;
1443 else if (ndim == 2)
1444 group = 8;
1445 else
1446 group = 4;
1449 if (group > grid)
1450 group = grid;
1451 return group;
1454 /* Return true if the HSA runtime can run function FN_PTR. */
1456 bool
1457 GOMP_OFFLOAD_can_run (void *fn_ptr)
1459 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1460 struct agent_info *agent = kernel->agent;
1461 create_and_finalize_hsa_program (agent);
1463 if (agent->prog_finalized_error)
1464 goto failure;
1466 init_kernel (kernel);
1467 if (kernel->initialization_failed)
1468 goto failure;
1470 return true;
1472 failure:
1473 if (suppress_host_fallback)
1474 GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1475 HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1476 return false;
1479 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */
1481 void
1482 packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
1484 __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
1487 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1488 launchattributes from KLA. */
1490 void
1491 run_kernel (struct kernel_info *kernel, void *vars,
1492 struct GOMP_kernel_launch_attributes *kla)
1494 struct agent_info *agent = kernel->agent;
1495 if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1496 GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1498 if (!agent->initialized)
1499 GOMP_PLUGIN_fatal ("Agent must be initialized");
1501 if (!kernel->initialized)
1502 GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1504 struct GOMP_hsa_kernel_dispatch *shadow
1505 = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1507 if (debug)
1509 fprintf (stderr, "\nKernel has following dependencies:\n");
1510 print_kernel_dispatch (shadow, 2);
1513 uint64_t index
1514 = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
1515 HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1517 /* Wait until the queue is not full before writing the packet. */
1518 while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
1519 >= agent->command_q->size)
1522 hsa_kernel_dispatch_packet_t *packet;
1523 packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1524 + index % agent->command_q->size;
1526 memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1527 packet->grid_size_x = kla->gdims[0];
1528 packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
1529 kla->wdims[0]);
1531 if (kla->ndim >= 2)
1533 packet->grid_size_y = kla->gdims[1];
1534 packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
1535 kla->wdims[1]);
1537 else
1539 packet->grid_size_y = 1;
1540 packet->workgroup_size_y = 1;
1543 if (kla->ndim == 3)
1545 packet->grid_size_z = kla->gdims[2];
1546 packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
1547 kla->wdims[2]);
1549 else
1551 packet->grid_size_z = 1;
1552 packet->workgroup_size_z = 1;
1555 packet->private_segment_size = kernel->private_segment_size;
1556 packet->group_segment_size = kernel->group_segment_size;
1557 packet->kernel_object = kernel->object;
1558 packet->kernarg_address = shadow->kernarg_address;
1559 hsa_signal_t s;
1560 s.handle = shadow->signal;
1561 packet->completion_signal = s;
1562 hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
1563 memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1565 /* PR hsa/70337. */
1566 size_t vars_size = sizeof (vars);
1567 if (kernel->kernarg_segment_size > vars_size)
1569 if (kernel->kernarg_segment_size != vars_size
1570 + sizeof (struct hsa_kernel_runtime *))
1571 GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1572 memcpy (packet->kernarg_address + vars_size, &shadow,
1573 sizeof (struct hsa_kernel_runtime *));
1576 HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1578 uint16_t header;
1579 header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1580 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1581 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1583 HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1585 packet_store_release ((uint32_t *) packet, header,
1586 (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
1588 hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
1589 index);
1591 /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1592 signal wait and signal load operations on their own and we need to
1593 periodically call the hsa_signal_load_acquire on completion signals of
1594 children kernels in the CPU to make that happen. As soon the
1595 limitation will be resolved, this workaround can be removed. */
1597 HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1599 /* Root signal waits with 1ms timeout. */
1600 while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
1601 1000 * 1000,
1602 HSA_WAIT_STATE_BLOCKED) != 0)
1603 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1605 hsa_signal_t child_s;
1606 child_s.handle = shadow->children_dispatches[i]->signal;
1608 HSA_DEBUG ("Waiting for children completion signal: %" PRIu64 "\n",
1609 (print_uint64_t) shadow->children_dispatches[i]->signal);
1610 hsa_fns.hsa_signal_load_acquire_fn (child_s);
1613 release_kernel_dispatch (shadow);
1615 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1616 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1619 /* Part of the libgomp plugin interface. Run a kernel on device N (the number
1620 is actually ignored, we assume the FN_PTR has been mapped using the correct
1621 device) and pass it an array of pointers in VARS as a parameter. The kernel
1622 is identified by FN_PTR which must point to a kernel_info structure. */
1624 void
1625 GOMP_OFFLOAD_run (int n __attribute__((unused)),
1626 void *fn_ptr, void *vars, void **args)
1628 struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1629 struct GOMP_kernel_launch_attributes def;
1630 struct GOMP_kernel_launch_attributes *kla;
1631 if (!parse_target_attributes (args, &def, &kla))
1633 HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1634 return;
1636 run_kernel (kernel, vars, kla);
1639 /* Information to be passed to a thread running a kernel asycnronously. */
1641 struct async_run_info
1643 int device;
1644 void *tgt_fn;
1645 void *tgt_vars;
1646 void **args;
1647 void *async_data;
1650 /* Thread routine to run a kernel asynchronously. */
1652 static void *
1653 run_kernel_asynchronously (void *thread_arg)
1655 struct async_run_info *info = (struct async_run_info *) thread_arg;
1656 int device = info->device;
1657 void *tgt_fn = info->tgt_fn;
1658 void *tgt_vars = info->tgt_vars;
1659 void **args = info->args;
1660 void *async_data = info->async_data;
1662 free (info);
1663 GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1664 GOMP_PLUGIN_target_task_completion (async_data);
1665 return NULL;
1668 /* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
1669 does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1670 has finished. */
1672 void
1673 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1674 void **args, void *async_data)
1676 pthread_t pt;
1677 struct async_run_info *info;
1678 HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
1679 info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1681 info->device = device;
1682 info->tgt_fn = tgt_fn;
1683 info->tgt_vars = tgt_vars;
1684 info->args = args;
1685 info->async_data = async_data;
1687 int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1688 if (err != 0)
1689 GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1690 strerror (err));
1691 err = pthread_detach (pt);
1692 if (err != 0)
1693 GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1694 "asynchronously: %s", strerror (err));
1697 /* Deinitialize all information associated with MODULE and kernels within
1698 it. Return TRUE on success. */
1700 static bool
1701 destroy_module (struct module_info *module)
1703 int i;
1704 for (i = 0; i < module->kernel_count; i++)
1705 if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1707 GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1708 "mutex");
1709 return false;
1711 return true;
1714 /* Part of the libgomp plugin interface. Unload BRIG module described by
1715 struct brig_image_desc in TARGET_DATA from agent number N. Return
1716 TRUE on success. */
1718 bool
1719 GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
1721 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1723 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1724 " (expected %u, received %u)",
1725 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1726 return false;
1729 struct agent_info *agent;
1730 agent = get_agent_info (n);
1731 if (!agent)
1732 return false;
1734 if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1736 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1737 return false;
1739 struct module_info *module = agent->first_module;
1740 while (module)
1742 if (module->image_desc == target_data)
1743 break;
1744 module = module->next;
1746 if (!module)
1748 GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1749 "loaded before");
1750 return false;
1753 remove_module_from_agent (agent, module);
1754 if (!destroy_module (module))
1755 return false;
1756 free (module);
1757 if (!destroy_hsa_program (agent))
1758 return false;
1759 if (pthread_rwlock_unlock (&agent->modules_rwlock))
1761 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1762 return false;
1764 return true;
1767 /* Part of the libgomp plugin interface. Deinitialize all information and
1768 status associated with agent number N. We do not attempt any
1769 synchronization, assuming the user and libgomp will not attempt
1770 deinitialization of a device that is in any way being used at the same
1771 time. Return TRUE on success. */
1773 bool
1774 GOMP_OFFLOAD_fini_device (int n)
1776 struct agent_info *agent = get_agent_info (n);
1777 if (!agent)
1778 return false;
1780 if (!agent->initialized)
1781 return true;
1783 struct module_info *next_module = agent->first_module;
1784 while (next_module)
1786 struct module_info *module = next_module;
1787 next_module = module->next;
1788 if (!destroy_module (module))
1789 return false;
1790 free (module);
1792 agent->first_module = NULL;
1793 if (!destroy_hsa_program (agent))
1794 return false;
1796 release_agent_shared_libraries (agent);
1798 hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
1799 if (status != HSA_STATUS_SUCCESS)
1800 return hsa_error ("Error destroying command queue", status);
1801 status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
1802 if (status != HSA_STATUS_SUCCESS)
1803 return hsa_error ("Error destroying kernel dispatch command queue", status);
1804 if (pthread_mutex_destroy (&agent->prog_mutex))
1806 GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1807 return false;
1809 if (pthread_rwlock_destroy (&agent->modules_rwlock))
1811 GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1812 return false;
1814 agent->initialized = false;
1815 return true;
1818 /* Part of the libgomp plugin interface. Not implemented as it is not required
1819 for HSA. */
1821 void *
1822 GOMP_OFFLOAD_alloc (int ord, size_t size)
1824 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1825 "it should never be called");
1826 return NULL;
1829 /* Part of the libgomp plugin interface. Not implemented as it is not required
1830 for HSA. */
1832 bool
1833 GOMP_OFFLOAD_free (int ord, void *ptr)
1835 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1836 "it should never be called");
1837 return false;
1840 /* Part of the libgomp plugin interface. Not implemented as it is not required
1841 for HSA. */
1843 bool
1844 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1846 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1847 "it should never be called");
1848 return false;
1851 /* Part of the libgomp plugin interface. Not implemented as it is not required
1852 for HSA. */
1854 bool
1855 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1857 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1858 "it should never be called");
1859 return false;
1862 /* Part of the libgomp plugin interface. Not implemented as it is not required
1863 for HSA. */
1865 bool
1866 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1868 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1869 "it should never be called");
1870 return false;