1 /* Plugin for HSAIL execution.
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
5 Contributed by Martin Jambor <mjambor@suse.cz> and
6 Martin Liska <mliska@suse.cz>.
8 This file is part of the GNU Offloading and Multi Processing Library
11 Libgomp is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
16 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
21 Under Section 7 of GPL version 3, you are granted additional
22 permissions described in the GCC Runtime Library Exception, version
23 3.1, as published by the Free Software Foundation.
25 You should have received a copy of the GNU General Public License and
26 a copy of the GCC Runtime Library Exception along with this program;
27 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
28 <http://www.gnu.org/licenses/>. */
36 #ifdef HAVE_INTTYPES_H
41 #include <plugin/hsa_ext_finalize.h>
43 #include "libgomp-plugin.h"
44 #include "gomp-constants.h"
45 #include "secure_getenv.h"
47 #ifdef HAVE_INTTYPES_H
48 typedef uint64_t print_uint64_t
;
51 typedef unsigned long print_uint64_t
;
54 /* As an HSA runtime is dlopened, following structure defines function
55 pointers utilized by the HSA plug-in. */
57 struct hsa_runtime_fn_info
60 hsa_status_t (*hsa_status_string_fn
) (hsa_status_t status
,
61 const char **status_string
);
62 hsa_status_t (*hsa_agent_get_info_fn
) (hsa_agent_t agent
,
63 hsa_agent_info_t attribute
,
65 hsa_status_t (*hsa_init_fn
) (void);
66 hsa_status_t (*hsa_iterate_agents_fn
)
67 (hsa_status_t (*callback
)(hsa_agent_t agent
, void *data
), void *data
);
68 hsa_status_t (*hsa_region_get_info_fn
) (hsa_region_t region
,
69 hsa_region_info_t attribute
,
71 hsa_status_t (*hsa_queue_create_fn
)
72 (hsa_agent_t agent
, uint32_t size
, hsa_queue_type_t type
,
73 void (*callback
)(hsa_status_t status
, hsa_queue_t
*source
, void *data
),
74 void *data
, uint32_t private_segment_size
,
75 uint32_t group_segment_size
, hsa_queue_t
**queue
);
76 hsa_status_t (*hsa_agent_iterate_regions_fn
)
78 hsa_status_t (*callback
)(hsa_region_t region
, void *data
), void *data
);
79 hsa_status_t (*hsa_executable_destroy_fn
) (hsa_executable_t executable
);
80 hsa_status_t (*hsa_executable_create_fn
)
81 (hsa_profile_t profile
, hsa_executable_state_t executable_state
,
82 const char *options
, hsa_executable_t
*executable
);
83 hsa_status_t (*hsa_executable_global_variable_define_fn
)
84 (hsa_executable_t executable
, const char *variable_name
, void *address
);
85 hsa_status_t (*hsa_executable_load_code_object_fn
)
86 (hsa_executable_t executable
, hsa_agent_t agent
,
87 hsa_code_object_t code_object
, const char *options
);
88 hsa_status_t (*hsa_executable_freeze_fn
)(hsa_executable_t executable
,
90 hsa_status_t (*hsa_signal_create_fn
) (hsa_signal_value_t initial_value
,
91 uint32_t num_consumers
,
92 const hsa_agent_t
*consumers
,
93 hsa_signal_t
*signal
);
94 hsa_status_t (*hsa_memory_allocate_fn
) (hsa_region_t region
, size_t size
,
96 hsa_status_t (*hsa_memory_free_fn
) (void *ptr
);
97 hsa_status_t (*hsa_signal_destroy_fn
) (hsa_signal_t signal
);
98 hsa_status_t (*hsa_executable_get_symbol_fn
)
99 (hsa_executable_t executable
, const char *module_name
,
100 const char *symbol_name
, hsa_agent_t agent
, int32_t call_convention
,
101 hsa_executable_symbol_t
*symbol
);
102 hsa_status_t (*hsa_executable_symbol_get_info_fn
)
103 (hsa_executable_symbol_t executable_symbol
,
104 hsa_executable_symbol_info_t attribute
, void *value
);
105 uint64_t (*hsa_queue_add_write_index_release_fn
) (const hsa_queue_t
*queue
,
107 uint64_t (*hsa_queue_load_read_index_acquire_fn
) (const hsa_queue_t
*queue
);
108 void (*hsa_signal_store_relaxed_fn
) (hsa_signal_t signal
,
109 hsa_signal_value_t value
);
110 void (*hsa_signal_store_release_fn
) (hsa_signal_t signal
,
111 hsa_signal_value_t value
);
112 hsa_signal_value_t (*hsa_signal_wait_acquire_fn
)
113 (hsa_signal_t signal
, hsa_signal_condition_t condition
,
114 hsa_signal_value_t compare_value
, uint64_t timeout_hint
,
115 hsa_wait_state_t wait_state_hint
);
116 hsa_signal_value_t (*hsa_signal_load_acquire_fn
) (hsa_signal_t signal
);
117 hsa_status_t (*hsa_queue_destroy_fn
) (hsa_queue_t
*queue
);
120 hsa_status_t (*hsa_ext_program_add_module_fn
) (hsa_ext_program_t program
,
121 hsa_ext_module_t module
);
122 hsa_status_t (*hsa_ext_program_create_fn
)
123 (hsa_machine_model_t machine_model
, hsa_profile_t profile
,
124 hsa_default_float_rounding_mode_t default_float_rounding_mode
,
125 const char *options
, hsa_ext_program_t
*program
);
126 hsa_status_t (*hsa_ext_program_destroy_fn
) (hsa_ext_program_t program
);
127 hsa_status_t (*hsa_ext_program_finalize_fn
)
128 (hsa_ext_program_t program
,hsa_isa_t isa
,
129 int32_t call_convention
, hsa_ext_control_directives_t control_directives
,
130 const char *options
, hsa_code_object_type_t code_object_type
,
131 hsa_code_object_t
*code_object
);
134 /* HSA runtime functions that are initialized in init_hsa_context. */
136 static struct hsa_runtime_fn_info hsa_fns
;
138 /* Keep the following GOMP prefixed structures in sync with respective parts of
141 /* Structure describing the run-time and grid properties of an HSA kernel
144 struct GOMP_kernel_launch_attributes
146 /* Number of dimensions the workload has. Maximum number is 3. */
148 /* Size of the grid in the three respective dimensions. */
150 /* Size of work-groups in the respective dimensions. */
154 /* Collection of information needed for a dispatch of a kernel from a
157 struct GOMP_hsa_kernel_dispatch
159 /* Pointer to a command queue associated with a kernel dispatch agent. */
161 /* Pointer to reserved memory for OMP data struct copying. */
162 void *omp_data_memory
;
163 /* Pointer to a memory space used for kernel arguments passing. */
164 void *kernarg_address
;
167 /* Synchronization signal used for dispatch synchronization. */
169 /* Private segment size. */
170 uint32_t private_segment_size
;
171 /* Group segment size. */
172 uint32_t group_segment_size
;
173 /* Number of children kernel dispatches. */
174 uint64_t kernel_dispatch_count
;
175 /* Debug purpose argument. */
177 /* Levels-var ICV. */
179 /* Kernel dispatch structures created for children kernel dispatches. */
180 struct GOMP_hsa_kernel_dispatch
**children_dispatches
;
181 /* Number of threads. */
182 uint32_t omp_num_threads
;
185 /* Part of the libgomp plugin interface. Return the name of the accelerator,
189 GOMP_OFFLOAD_get_name (void)
194 /* Part of the libgomp plugin interface. Return the specific capabilities the
195 HSA accelerator have. */
198 GOMP_OFFLOAD_get_caps (void)
200 return GOMP_OFFLOAD_CAP_SHARED_MEM
| GOMP_OFFLOAD_CAP_OPENMP_400
;
203 /* Part of the libgomp plugin interface. Identify as HSA accelerator. */
206 GOMP_OFFLOAD_get_type (void)
208 return OFFLOAD_TARGET_TYPE_HSA
;
211 /* Return the libgomp version number we're compatible with. There is
212 no requirement for cross-version compatibility. */
215 GOMP_OFFLOAD_version (void)
220 /* Flag to decide whether print to stderr information about what is going on.
221 Set in init_debug depending on environment variables. */
225 /* Flag to decide if the runtime should suppress a possible fallback to host
228 static bool suppress_host_fallback
;
230 /* Flag to locate HSA runtime shared library that is dlopened
233 static const char *hsa_runtime_lib
;
235 /* Flag to decide if the runtime should support also CPU devices (can be
238 static bool support_cpu_devices
;
240 /* Initialize debug and suppress_host_fallback according to the environment. */
243 init_enviroment_variables (void)
245 if (secure_getenv ("HSA_DEBUG"))
250 if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
251 suppress_host_fallback
= true;
253 suppress_host_fallback
= false;
255 hsa_runtime_lib
= secure_getenv ("HSA_RUNTIME_LIB");
256 if (hsa_runtime_lib
== NULL
)
257 hsa_runtime_lib
= HSA_RUNTIME_LIB
"libhsa-runtime64.so";
259 support_cpu_devices
= secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
262 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
265 #define HSA_LOG(prefix, ...) \
270 fprintf (stderr, prefix); \
271 fprintf (stderr, __VA_ARGS__); \
276 /* Print a debugging message to stderr. */
278 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
280 /* Print a warning message to stderr. */
282 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
284 /* Print HSA warning STR with an HSA STATUS code. */
287 hsa_warn (const char *str
, hsa_status_t status
)
292 const char *hsa_error_msg
;
293 hsa_fns
.hsa_status_string_fn (status
, &hsa_error_msg
);
295 fprintf (stderr
, "HSA warning: %s\nRuntime message: %s", str
, hsa_error_msg
);
298 /* Report a fatal error STR together with the HSA error corresponding to STATUS
299 and terminate execution of the current process. */
302 hsa_fatal (const char *str
, hsa_status_t status
)
304 const char *hsa_error_msg
;
305 hsa_fns
.hsa_status_string_fn (status
, &hsa_error_msg
);
306 GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str
,
310 /* Like hsa_fatal, except only report error message, and return FALSE
311 for propagating error processing to outside of plugin. */
314 hsa_error (const char *str
, hsa_status_t status
)
316 const char *hsa_error_msg
;
317 hsa_fns
.hsa_status_string_fn (status
, &hsa_error_msg
);
318 GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str
,
323 struct hsa_kernel_description
326 unsigned omp_data_size
;
327 bool gridified_kernel_p
;
328 unsigned kernel_dependencies_count
;
329 const char **kernel_dependencies
;
332 struct global_var_info
338 /* Data passed by the static initializer of a compilation unit containing BRIG
339 to GOMP_offload_register. */
341 struct brig_image_desc
343 hsa_ext_module_t brig_module
;
344 const unsigned kernel_count
;
345 struct hsa_kernel_description
*kernel_infos
;
346 const unsigned global_variable_count
;
347 struct global_var_info
*global_variables
;
352 /* Information required to identify, finalize and run any given kernel. */
356 /* Name of the kernel, required to locate it within the brig module. */
358 /* Size of memory space for OMP data. */
359 unsigned omp_data_size
;
360 /* The specific agent the kernel has been or will be finalized for and run
362 struct agent_info
*agent
;
363 /* The specific module where the kernel takes place. */
364 struct module_info
*module
;
365 /* Mutex enforcing that at most once thread ever initializes a kernel for
366 use. A thread should have locked agent->modules_rwlock for reading before
368 pthread_mutex_t init_mutex
;
369 /* Flag indicating whether the kernel has been initialized and all fields
370 below it contain valid data. */
372 /* Flag indicating that the kernel has a problem that blocks an execution. */
373 bool initialization_failed
;
374 /* The object to be put into the dispatch queue. */
376 /* Required size of kernel arguments. */
377 uint32_t kernarg_segment_size
;
378 /* Required size of group segment. */
379 uint32_t group_segment_size
;
380 /* Required size of private segment. */
381 uint32_t private_segment_size
;
382 /* List of all kernel dependencies. */
383 const char **dependencies
;
384 /* Number of dependencies. */
385 unsigned dependencies_count
;
386 /* Maximum OMP data size necessary for kernel from kernel dispatches. */
387 unsigned max_omp_data_size
;
388 /* True if the kernel is gridified. */
389 bool gridified_kernel_p
;
392 /* Information about a particular brig module, its image and kernels. */
396 /* The next and previous module in the linked list of modules of an agent. */
397 struct module_info
*next
, *prev
;
398 /* The description with which the program has registered the image. */
399 struct brig_image_desc
*image_desc
;
401 /* Number of kernels in this module. */
403 /* An array of kernel_info structures describing each kernel in this
405 struct kernel_info kernels
[];
408 /* Information about shared brig library. */
410 struct brig_library_info
413 hsa_ext_module_t image
;
416 /* Description of an HSA GPU agent and the program associated with it. */
420 /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
422 /* Whether the agent has been initialized. The fields below are usable only
425 /* The HSA ISA of this agent. */
427 /* Command queue of the agent. */
428 hsa_queue_t
*command_q
;
429 /* Kernel from kernel dispatch command queue. */
430 hsa_queue_t
*kernel_dispatch_command_q
;
431 /* The HSA memory region from which to allocate kernel arguments. */
432 hsa_region_t kernarg_region
;
434 /* Read-write lock that protects kernels which are running or about to be run
435 from interference with loading and unloading of images. Needs to be
436 locked for reading while a kernel is being run, and for writing if the
437 list of modules is manipulated (and thus the HSA program invalidated). */
438 pthread_rwlock_t modules_rwlock
;
439 /* The first module in a linked list of modules associated with this
441 struct module_info
*first_module
;
443 /* Mutex enforcing that only one thread will finalize the HSA program. A
444 thread should have locked agent->modules_rwlock for reading before
446 pthread_mutex_t prog_mutex
;
447 /* Flag whether the HSA program that consists of all the modules has been
450 /* Flag whether the program was finalized but with a failure. */
451 bool prog_finalized_error
;
452 /* HSA executable - the finalized program that is used to locate kernels. */
453 hsa_executable_t executable
;
454 /* List of BRIG libraries. */
455 struct brig_library_info
**brig_libraries
;
456 /* Number of loaded shared BRIG libraries. */
457 unsigned brig_libraries_count
;
460 /* Information about the whole HSA environment and all of its agents. */
462 struct hsa_context_info
464 /* Whether the structure has been initialized. */
466 /* Number of usable GPU HSA agents in the system. */
468 /* Array of agent_info structures describing the individual HSA agents. */
469 struct agent_info
*agents
;
472 /* Information about the whole HSA environment and all of its agents. */
474 static struct hsa_context_info hsa_context
;
476 #define DLSYM_FN(function) \
477 hsa_fns.function##_fn = dlsym (handle, #function); \
478 if (hsa_fns.function##_fn == NULL) \
482 init_hsa_runtime_functions (void)
484 void *handle
= dlopen (hsa_runtime_lib
, RTLD_LAZY
);
488 DLSYM_FN (hsa_status_string
)
489 DLSYM_FN (hsa_agent_get_info
)
491 DLSYM_FN (hsa_iterate_agents
)
492 DLSYM_FN (hsa_region_get_info
)
493 DLSYM_FN (hsa_queue_create
)
494 DLSYM_FN (hsa_agent_iterate_regions
)
495 DLSYM_FN (hsa_executable_destroy
)
496 DLSYM_FN (hsa_executable_create
)
497 DLSYM_FN (hsa_executable_global_variable_define
)
498 DLSYM_FN (hsa_executable_load_code_object
)
499 DLSYM_FN (hsa_executable_freeze
)
500 DLSYM_FN (hsa_signal_create
)
501 DLSYM_FN (hsa_memory_allocate
)
502 DLSYM_FN (hsa_memory_free
)
503 DLSYM_FN (hsa_signal_destroy
)
504 DLSYM_FN (hsa_executable_get_symbol
)
505 DLSYM_FN (hsa_executable_symbol_get_info
)
506 DLSYM_FN (hsa_queue_add_write_index_release
)
507 DLSYM_FN (hsa_queue_load_read_index_acquire
)
508 DLSYM_FN (hsa_signal_wait_acquire
)
509 DLSYM_FN (hsa_signal_store_relaxed
)
510 DLSYM_FN (hsa_signal_store_release
)
511 DLSYM_FN (hsa_signal_load_acquire
)
512 DLSYM_FN (hsa_queue_destroy
)
513 DLSYM_FN (hsa_ext_program_add_module
)
514 DLSYM_FN (hsa_ext_program_create
)
515 DLSYM_FN (hsa_ext_program_destroy
)
516 DLSYM_FN (hsa_ext_program_finalize
)
520 HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib
, dlerror ());
524 /* Find kernel for an AGENT by name provided in KERNEL_NAME. */
526 static struct kernel_info
*
527 get_kernel_for_agent (struct agent_info
*agent
, const char *kernel_name
)
529 struct module_info
*module
= agent
->first_module
;
533 for (unsigned i
= 0; i
< module
->kernel_count
; i
++)
534 if (strcmp (module
->kernels
[i
].name
, kernel_name
) == 0)
535 return &module
->kernels
[i
];
537 module
= module
->next
;
543 /* Return true if the agent is a GPU and acceptable of concurrent submissions
544 from different threads. */
547 suitable_hsa_agent_p (hsa_agent_t agent
)
549 hsa_device_type_t device_type
;
551 = hsa_fns
.hsa_agent_get_info_fn (agent
, HSA_AGENT_INFO_DEVICE
,
553 if (status
!= HSA_STATUS_SUCCESS
)
558 case HSA_DEVICE_TYPE_GPU
:
560 case HSA_DEVICE_TYPE_CPU
:
561 if (!support_cpu_devices
)
568 uint32_t features
= 0;
569 status
= hsa_fns
.hsa_agent_get_info_fn (agent
, HSA_AGENT_INFO_FEATURE
,
571 if (status
!= HSA_STATUS_SUCCESS
572 || !(features
& HSA_AGENT_FEATURE_KERNEL_DISPATCH
))
574 hsa_queue_type_t queue_type
;
575 status
= hsa_fns
.hsa_agent_get_info_fn (agent
, HSA_AGENT_INFO_QUEUE_TYPE
,
577 if (status
!= HSA_STATUS_SUCCESS
578 || (queue_type
!= HSA_QUEUE_TYPE_MULTI
))
584 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
585 agent_count in hsa_context. */
588 count_gpu_agents (hsa_agent_t agent
, void *data
__attribute__ ((unused
)))
590 if (suitable_hsa_agent_p (agent
))
591 hsa_context
.agent_count
++;
592 return HSA_STATUS_SUCCESS
;
595 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
596 id to the describing structure in the hsa context. The index of the
597 structure is pointed to by DATA, increment it afterwards. */
600 assign_agent_ids (hsa_agent_t agent
, void *data
)
602 if (suitable_hsa_agent_p (agent
))
604 int *agent_index
= (int *) data
;
605 hsa_context
.agents
[*agent_index
].id
= agent
;
608 return HSA_STATUS_SUCCESS
;
611 /* Initialize hsa_context if it has not already been done.
612 Return TRUE on success. */
615 init_hsa_context (void)
620 if (hsa_context
.initialized
)
622 init_enviroment_variables ();
623 if (!init_hsa_runtime_functions ())
625 HSA_DEBUG ("Run-time could not be dynamically opened\n");
628 status
= hsa_fns
.hsa_init_fn ();
629 if (status
!= HSA_STATUS_SUCCESS
)
630 return hsa_error ("Run-time could not be initialized", status
);
631 HSA_DEBUG ("HSA run-time initialized\n");
632 status
= hsa_fns
.hsa_iterate_agents_fn (count_gpu_agents
, NULL
);
633 if (status
!= HSA_STATUS_SUCCESS
)
634 return hsa_error ("HSA GPU devices could not be enumerated", status
);
635 HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context
.agent_count
);
638 = GOMP_PLUGIN_malloc_cleared (hsa_context
.agent_count
639 * sizeof (struct agent_info
));
640 status
= hsa_fns
.hsa_iterate_agents_fn (assign_agent_ids
, &agent_index
);
641 if (agent_index
!= hsa_context
.agent_count
)
643 GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
646 hsa_context
.initialized
= true;
650 /* Callback of dispatch queues to report errors. */
653 queue_callback (hsa_status_t status
,
654 hsa_queue_t
*queue
__attribute__ ((unused
)),
655 void *data
__attribute__ ((unused
)))
657 hsa_fatal ("Asynchronous queue error", status
);
660 /* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
661 used for kernarg allocations and if so write it to the memory pointed to by
662 DATA and break the query. */
665 get_kernarg_memory_region (hsa_region_t region
, void *data
)
668 hsa_region_segment_t segment
;
670 status
= hsa_fns
.hsa_region_get_info_fn (region
, HSA_REGION_INFO_SEGMENT
,
672 if (status
!= HSA_STATUS_SUCCESS
)
674 if (segment
!= HSA_REGION_SEGMENT_GLOBAL
)
675 return HSA_STATUS_SUCCESS
;
678 status
= hsa_fns
.hsa_region_get_info_fn (region
, HSA_REGION_INFO_GLOBAL_FLAGS
,
680 if (status
!= HSA_STATUS_SUCCESS
)
682 if (flags
& HSA_REGION_GLOBAL_FLAG_KERNARG
)
684 hsa_region_t
*ret
= (hsa_region_t
*) data
;
686 return HSA_STATUS_INFO_BREAK
;
688 return HSA_STATUS_SUCCESS
;
691 /* Part of the libgomp plugin interface. Return the number of HSA devices on
695 GOMP_OFFLOAD_get_num_devices (void)
697 if (!init_hsa_context ())
699 return hsa_context
.agent_count
;
702 /* Part of the libgomp plugin interface. Initialize agent number N so that it
703 can be used for computation. Return TRUE on success. */
706 GOMP_OFFLOAD_init_device (int n
)
708 if (!init_hsa_context ())
710 if (n
>= hsa_context
.agent_count
)
712 GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n
);
715 struct agent_info
*agent
= &hsa_context
.agents
[n
];
717 if (agent
->initialized
)
720 if (pthread_rwlock_init (&agent
->modules_rwlock
, NULL
))
722 GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
725 if (pthread_mutex_init (&agent
->prog_mutex
, NULL
))
727 GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
733 status
= hsa_fns
.hsa_agent_get_info_fn (agent
->id
,
734 HSA_AGENT_INFO_QUEUE_MAX_SIZE
,
736 if (status
!= HSA_STATUS_SUCCESS
)
737 return hsa_error ("Error requesting maximum queue size of the HSA agent",
739 status
= hsa_fns
.hsa_agent_get_info_fn (agent
->id
, HSA_AGENT_INFO_ISA
,
741 if (status
!= HSA_STATUS_SUCCESS
)
742 return hsa_error ("Error querying the ISA of the agent", status
);
743 status
= hsa_fns
.hsa_queue_create_fn (agent
->id
, queue_size
,
744 HSA_QUEUE_TYPE_MULTI
,
745 queue_callback
, NULL
, UINT32_MAX
,
748 if (status
!= HSA_STATUS_SUCCESS
)
749 return hsa_error ("Error creating command queue", status
);
751 status
= hsa_fns
.hsa_queue_create_fn (agent
->id
, queue_size
,
752 HSA_QUEUE_TYPE_MULTI
,
753 queue_callback
, NULL
, UINT32_MAX
,
755 &agent
->kernel_dispatch_command_q
);
756 if (status
!= HSA_STATUS_SUCCESS
)
757 return hsa_error ("Error creating kernel dispatch command queue", status
);
759 agent
->kernarg_region
.handle
= (uint64_t) -1;
760 status
= hsa_fns
.hsa_agent_iterate_regions_fn (agent
->id
,
761 get_kernarg_memory_region
,
762 &agent
->kernarg_region
);
763 if (agent
->kernarg_region
.handle
== (uint64_t) -1)
765 GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
769 HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
770 (long long unsigned) agent
->command_q
->id
);
771 HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
772 (long long unsigned) agent
->kernel_dispatch_command_q
->id
);
773 agent
->initialized
= true;
777 /* Verify that hsa_context has already been initialized and return the
778 agent_info structure describing device number N. Return NULL on error. */
780 static struct agent_info
*
781 get_agent_info (int n
)
783 if (!hsa_context
.initialized
)
785 GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
788 if (n
>= hsa_context
.agent_count
)
790 GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n
);
793 if (!hsa_context
.agents
[n
].initialized
)
795 GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
798 return &hsa_context
.agents
[n
];
801 /* Insert MODULE to the linked list of modules of AGENT. */
804 add_module_to_agent (struct agent_info
*agent
, struct module_info
*module
)
806 if (agent
->first_module
)
807 agent
->first_module
->prev
= module
;
808 module
->next
= agent
->first_module
;
810 agent
->first_module
= module
;
813 /* Remove MODULE from the linked list of modules of AGENT. */
816 remove_module_from_agent (struct agent_info
*agent
, struct module_info
*module
)
818 if (agent
->first_module
== module
)
819 agent
->first_module
= module
->next
;
821 module
->prev
->next
= module
->next
;
823 module
->next
->prev
= module
->prev
;
826 /* Free the HSA program in agent and everything associated with it and set
827 agent->prog_finalized and the initialized flags of all kernels to false.
828 Return TRUE on success. */
831 destroy_hsa_program (struct agent_info
*agent
)
833 if (!agent
->prog_finalized
|| agent
->prog_finalized_error
)
838 HSA_DEBUG ("Destroying the current HSA program.\n");
840 status
= hsa_fns
.hsa_executable_destroy_fn (agent
->executable
);
841 if (status
!= HSA_STATUS_SUCCESS
)
842 return hsa_error ("Could not destroy HSA executable", status
);
844 struct module_info
*module
;
845 for (module
= agent
->first_module
; module
; module
= module
->next
)
848 for (i
= 0; i
< module
->kernel_count
; i
++)
849 module
->kernels
[i
].initialized
= false;
851 agent
->prog_finalized
= false;
855 /* Initialize KERNEL from D and other parameters. Return true on success. */
858 init_basic_kernel_info (struct kernel_info
*kernel
,
859 struct hsa_kernel_description
*d
,
860 struct agent_info
*agent
,
861 struct module_info
*module
)
863 kernel
->agent
= agent
;
864 kernel
->module
= module
;
865 kernel
->name
= d
->name
;
866 kernel
->omp_data_size
= d
->omp_data_size
;
867 kernel
->gridified_kernel_p
= d
->gridified_kernel_p
;
868 kernel
->dependencies_count
= d
->kernel_dependencies_count
;
869 kernel
->dependencies
= d
->kernel_dependencies
;
870 if (pthread_mutex_init (&kernel
->init_mutex
, NULL
))
872 GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
878 /* Part of the libgomp plugin interface. Load BRIG module described by struct
879 brig_image_desc in TARGET_DATA and return references to kernel descriptors
883 GOMP_OFFLOAD_load_image (int ord
, unsigned version
, const void *target_data
,
884 struct addr_pair
**target_table
)
886 if (GOMP_VERSION_DEV (version
) > GOMP_VERSION_HSA
)
888 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
889 " (expected %u, received %u)",
890 GOMP_VERSION_HSA
, GOMP_VERSION_DEV (version
));
894 struct brig_image_desc
*image_desc
= (struct brig_image_desc
*) target_data
;
895 struct agent_info
*agent
;
896 struct addr_pair
*pair
;
897 struct module_info
*module
;
898 struct kernel_info
*kernel
;
899 int kernel_count
= image_desc
->kernel_count
;
901 agent
= get_agent_info (ord
);
905 if (pthread_rwlock_wrlock (&agent
->modules_rwlock
))
907 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
910 if (agent
->prog_finalized
911 && !destroy_hsa_program (agent
))
914 HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count
);
915 pair
= GOMP_PLUGIN_malloc (kernel_count
* sizeof (struct addr_pair
));
916 *target_table
= pair
;
917 module
= (struct module_info
*)
918 GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info
)
919 + kernel_count
* sizeof (struct kernel_info
));
920 module
->image_desc
= image_desc
;
921 module
->kernel_count
= kernel_count
;
923 kernel
= &module
->kernels
[0];
925 /* Allocate memory for kernel dependencies. */
926 for (unsigned i
= 0; i
< kernel_count
; i
++)
928 pair
->start
= (uintptr_t) kernel
;
929 pair
->end
= (uintptr_t) (kernel
+ 1);
931 struct hsa_kernel_description
*d
= &image_desc
->kernel_infos
[i
];
932 if (!init_basic_kernel_info (kernel
, d
, agent
, module
))
938 add_module_to_agent (agent
, module
);
939 if (pthread_rwlock_unlock (&agent
->modules_rwlock
))
941 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
947 /* Add a shared BRIG library from a FILE_NAME to an AGENT. */
949 static struct brig_library_info
*
950 add_shared_library (const char *file_name
, struct agent_info
*agent
)
952 struct brig_library_info
*library
= NULL
;
954 void *f
= dlopen (file_name
, RTLD_NOW
);
955 void *start
= dlsym (f
, "__brig_start");
956 void *end
= dlsym (f
, "__brig_end");
958 if (start
== NULL
|| end
== NULL
)
961 unsigned size
= end
- start
;
962 char *buf
= (char *) GOMP_PLUGIN_malloc (size
);
963 memcpy (buf
, start
, size
);
965 library
= GOMP_PLUGIN_malloc (sizeof (struct agent_info
));
966 library
->file_name
= (char *) GOMP_PLUGIN_malloc
967 ((strlen (file_name
) + 1));
968 strcpy (library
->file_name
, file_name
);
969 library
->image
= (hsa_ext_module_t
) buf
;
974 /* Release memory used for BRIG shared libraries that correspond
978 release_agent_shared_libraries (struct agent_info
*agent
)
980 for (unsigned i
= 0; i
< agent
->brig_libraries_count
; i
++)
981 if (agent
->brig_libraries
[i
])
983 free (agent
->brig_libraries
[i
]->file_name
);
984 free (agent
->brig_libraries
[i
]->image
);
985 free (agent
->brig_libraries
[i
]);
988 free (agent
->brig_libraries
);
991 /* Create and finalize the program consisting of all loaded modules. */
994 create_and_finalize_hsa_program (struct agent_info
*agent
)
997 hsa_ext_program_t prog_handle
;
1000 if (pthread_mutex_lock (&agent
->prog_mutex
))
1001 GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
1002 if (agent
->prog_finalized
)
1005 status
= hsa_fns
.hsa_ext_program_create_fn
1006 (HSA_MACHINE_MODEL_LARGE
, HSA_PROFILE_FULL
,
1007 HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT
,
1008 NULL
, &prog_handle
);
1009 if (status
!= HSA_STATUS_SUCCESS
)
1010 hsa_fatal ("Could not create an HSA program", status
);
1012 HSA_DEBUG ("Created a finalized program\n");
1014 struct module_info
*module
= agent
->first_module
;
1017 status
= hsa_fns
.hsa_ext_program_add_module_fn
1018 (prog_handle
, module
->image_desc
->brig_module
);
1019 if (status
!= HSA_STATUS_SUCCESS
)
1020 hsa_fatal ("Could not add a module to the HSA program", status
);
1021 module
= module
->next
;
1025 /* Load all shared libraries. */
1026 const char *libraries
[] = { "libhsamath.so", "libhsastd.so" };
1027 const unsigned libraries_count
= sizeof (libraries
) / sizeof (const char *);
1029 agent
->brig_libraries_count
= libraries_count
;
1030 agent
->brig_libraries
= GOMP_PLUGIN_malloc_cleared
1031 (sizeof (struct brig_library_info
) * libraries_count
);
1033 for (unsigned i
= 0; i
< libraries_count
; i
++)
1035 struct brig_library_info
*library
= add_shared_library (libraries
[i
],
1037 if (library
== NULL
)
1039 HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1044 status
= hsa_fns
.hsa_ext_program_add_module_fn (prog_handle
,
1046 if (status
!= HSA_STATUS_SUCCESS
)
1047 hsa_warn ("Could not add a shared BRIG library the HSA program",
1050 HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1054 hsa_ext_control_directives_t control_directives
;
1055 memset (&control_directives
, 0, sizeof (control_directives
));
1056 hsa_code_object_t code_object
;
1057 status
= hsa_fns
.hsa_ext_program_finalize_fn
1058 (prog_handle
, agent
->isa
,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO
,
1059 control_directives
, "", HSA_CODE_OBJECT_TYPE_PROGRAM
, &code_object
);
1060 if (status
!= HSA_STATUS_SUCCESS
)
1062 hsa_warn ("Finalization of the HSA program failed", status
);
1066 HSA_DEBUG ("Finalization done\n");
1067 hsa_fns
.hsa_ext_program_destroy_fn (prog_handle
);
1070 = hsa_fns
.hsa_executable_create_fn (HSA_PROFILE_FULL
,
1071 HSA_EXECUTABLE_STATE_UNFROZEN
,
1072 "", &agent
->executable
);
1073 if (status
!= HSA_STATUS_SUCCESS
)
1074 hsa_fatal ("Could not create HSA executable", status
);
1076 module
= agent
->first_module
;
1079 /* Initialize all global variables declared in the module. */
1080 for (unsigned i
= 0; i
< module
->image_desc
->global_variable_count
; i
++)
1082 struct global_var_info
*var
;
1083 var
= &module
->image_desc
->global_variables
[i
];
1084 status
= hsa_fns
.hsa_executable_global_variable_define_fn
1085 (agent
->executable
, var
->name
, var
->address
);
1087 HSA_DEBUG ("Defining global variable: %s, address: %p\n", var
->name
,
1090 if (status
!= HSA_STATUS_SUCCESS
)
1091 hsa_fatal ("Could not define a global variable in the HSA program",
1095 module
= module
->next
;
1098 status
= hsa_fns
.hsa_executable_load_code_object_fn (agent
->executable
,
1101 if (status
!= HSA_STATUS_SUCCESS
)
1102 hsa_fatal ("Could not add a code object to the HSA executable", status
);
1103 status
= hsa_fns
.hsa_executable_freeze_fn (agent
->executable
, "");
1104 if (status
!= HSA_STATUS_SUCCESS
)
1105 hsa_fatal ("Could not freeze the HSA executable", status
);
1107 HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1109 /* If all goes good, jump to final. */
1113 agent
->prog_finalized_error
= true;
1116 agent
->prog_finalized
= true;
1118 if (pthread_mutex_unlock (&agent
->prog_mutex
))
1119 GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1122 /* Create kernel dispatch data structure for given KERNEL. */
1124 static struct GOMP_hsa_kernel_dispatch
*
1125 create_single_kernel_dispatch (struct kernel_info
*kernel
,
1126 unsigned omp_data_size
)
1128 struct agent_info
*agent
= kernel
->agent
;
1129 struct GOMP_hsa_kernel_dispatch
*shadow
1130 = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch
));
1132 shadow
->queue
= agent
->command_q
;
1133 shadow
->omp_data_memory
1134 = omp_data_size
> 0 ? GOMP_PLUGIN_malloc (omp_data_size
) : NULL
;
1135 unsigned dispatch_count
= kernel
->dependencies_count
;
1136 shadow
->kernel_dispatch_count
= dispatch_count
;
1138 shadow
->children_dispatches
1139 = GOMP_PLUGIN_malloc (dispatch_count
* sizeof (shadow
));
1141 shadow
->object
= kernel
->object
;
1143 hsa_signal_t sync_signal
;
1144 hsa_status_t status
= hsa_fns
.hsa_signal_create_fn (1, 0, NULL
, &sync_signal
);
1145 if (status
!= HSA_STATUS_SUCCESS
)
1146 hsa_fatal ("Error creating the HSA sync signal", status
);
1148 shadow
->signal
= sync_signal
.handle
;
1149 shadow
->private_segment_size
= kernel
->private_segment_size
;
1150 shadow
->group_segment_size
= kernel
->group_segment_size
;
1153 = hsa_fns
.hsa_memory_allocate_fn (agent
->kernarg_region
,
1154 kernel
->kernarg_segment_size
,
1155 &shadow
->kernarg_address
);
1156 if (status
!= HSA_STATUS_SUCCESS
)
1157 hsa_fatal ("Could not allocate memory for HSA kernel arguments", status
);
1162 /* Release data structure created for a kernel dispatch in SHADOW argument. */
1165 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch
*shadow
)
1167 HSA_DEBUG ("Released kernel dispatch: %p has value: %" PRIu64
" (%p)\n",
1168 shadow
, (print_uint64_t
) shadow
->debug
,
1169 (void *) (uintptr_t) shadow
->debug
);
1171 hsa_fns
.hsa_memory_free_fn (shadow
->kernarg_address
);
1174 s
.handle
= shadow
->signal
;
1175 hsa_fns
.hsa_signal_destroy_fn (s
);
1177 free (shadow
->omp_data_memory
);
1179 for (unsigned i
= 0; i
< shadow
->kernel_dispatch_count
; i
++)
1180 release_kernel_dispatch (shadow
->children_dispatches
[i
]);
1182 free (shadow
->children_dispatches
);
1186 /* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
1187 to calculate maximum necessary memory for OMP data allocation. */
1190 init_single_kernel (struct kernel_info
*kernel
, unsigned *max_omp_data_size
)
1192 hsa_status_t status
;
1193 struct agent_info
*agent
= kernel
->agent
;
1194 hsa_executable_symbol_t kernel_symbol
;
1195 status
= hsa_fns
.hsa_executable_get_symbol_fn (agent
->executable
, NULL
,
1196 kernel
->name
, agent
->id
,
1198 if (status
!= HSA_STATUS_SUCCESS
)
1200 hsa_warn ("Could not find symbol for kernel in the code object", status
);
1203 HSA_DEBUG ("Located kernel %s\n", kernel
->name
);
1204 status
= hsa_fns
.hsa_executable_symbol_get_info_fn
1205 (kernel_symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT
, &kernel
->object
);
1206 if (status
!= HSA_STATUS_SUCCESS
)
1207 hsa_fatal ("Could not extract a kernel object from its symbol", status
);
1208 status
= hsa_fns
.hsa_executable_symbol_get_info_fn
1209 (kernel_symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE
,
1210 &kernel
->kernarg_segment_size
);
1211 if (status
!= HSA_STATUS_SUCCESS
)
1212 hsa_fatal ("Could not get info about kernel argument size", status
);
1213 status
= hsa_fns
.hsa_executable_symbol_get_info_fn
1214 (kernel_symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE
,
1215 &kernel
->group_segment_size
);
1216 if (status
!= HSA_STATUS_SUCCESS
)
1217 hsa_fatal ("Could not get info about kernel group segment size", status
);
1218 status
= hsa_fns
.hsa_executable_symbol_get_info_fn
1219 (kernel_symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE
,
1220 &kernel
->private_segment_size
);
1221 if (status
!= HSA_STATUS_SUCCESS
)
1222 hsa_fatal ("Could not get info about kernel private segment size",
1225 HSA_DEBUG ("Kernel structure for %s fully initialized with "
1226 "following segment sizes: \n", kernel
->name
);
1227 HSA_DEBUG (" group_segment_size: %u\n",
1228 (unsigned) kernel
->group_segment_size
);
1229 HSA_DEBUG (" private_segment_size: %u\n",
1230 (unsigned) kernel
->private_segment_size
);
1231 HSA_DEBUG (" kernarg_segment_size: %u\n",
1232 (unsigned) kernel
->kernarg_segment_size
);
1233 HSA_DEBUG (" omp_data_size: %u\n", kernel
->omp_data_size
);
1234 HSA_DEBUG (" gridified_kernel_p: %u\n", kernel
->gridified_kernel_p
);
1236 if (kernel
->omp_data_size
> *max_omp_data_size
)
1237 *max_omp_data_size
= kernel
->omp_data_size
;
1239 for (unsigned i
= 0; i
< kernel
->dependencies_count
; i
++)
1241 struct kernel_info
*dependency
1242 = get_kernel_for_agent (agent
, kernel
->dependencies
[i
]);
1244 if (dependency
== NULL
)
1246 HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1247 "dependency name: %s\n", kernel
->name
,
1248 kernel
->dependencies
[i
]);
1252 if (dependency
->dependencies_count
> 0)
1254 HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1255 "a depth bigger than one\n");
1259 init_single_kernel (dependency
, max_omp_data_size
);
1265 kernel
->initialization_failed
= true;
1268 /* Indent stream F by INDENT spaces. */
1271 indent_stream (FILE *f
, unsigned indent
)
1273 fprintf (f
, "%*s", indent
, "");
1276 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
1279 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch
*dispatch
, unsigned indent
)
1281 indent_stream (stderr
, indent
);
1282 fprintf (stderr
, "this: %p\n", dispatch
);
1283 indent_stream (stderr
, indent
);
1284 fprintf (stderr
, "queue: %p\n", dispatch
->queue
);
1285 indent_stream (stderr
, indent
);
1286 fprintf (stderr
, "omp_data_memory: %p\n", dispatch
->omp_data_memory
);
1287 indent_stream (stderr
, indent
);
1288 fprintf (stderr
, "kernarg_address: %p\n", dispatch
->kernarg_address
);
1289 indent_stream (stderr
, indent
);
1290 fprintf (stderr
, "object: %" PRIu64
"\n", (print_uint64_t
) dispatch
->object
);
1291 indent_stream (stderr
, indent
);
1292 fprintf (stderr
, "signal: %" PRIu64
"\n", (print_uint64_t
) dispatch
->signal
);
1293 indent_stream (stderr
, indent
);
1294 fprintf (stderr
, "private_segment_size: %u\n",
1295 dispatch
->private_segment_size
);
1296 indent_stream (stderr
, indent
);
1297 fprintf (stderr
, "group_segment_size: %u\n",
1298 dispatch
->group_segment_size
);
1299 indent_stream (stderr
, indent
);
1300 fprintf (stderr
, "children dispatches: %" PRIu64
"\n",
1301 (print_uint64_t
) dispatch
->kernel_dispatch_count
);
1302 indent_stream (stderr
, indent
);
1303 fprintf (stderr
, "omp_num_threads: %u\n",
1304 dispatch
->omp_num_threads
);
1305 fprintf (stderr
, "\n");
1307 for (unsigned i
= 0; i
< dispatch
->kernel_dispatch_count
; i
++)
1308 print_kernel_dispatch (dispatch
->children_dispatches
[i
], indent
+ 2);
1311 /* Create kernel dispatch data structure for a KERNEL and all its
1314 static struct GOMP_hsa_kernel_dispatch
*
1315 create_kernel_dispatch (struct kernel_info
*kernel
, unsigned omp_data_size
)
1317 struct GOMP_hsa_kernel_dispatch
*shadow
1318 = create_single_kernel_dispatch (kernel
, omp_data_size
);
1319 shadow
->omp_num_threads
= 64;
1321 shadow
->omp_level
= kernel
->gridified_kernel_p
? 1 : 0;
1323 /* Create kernel dispatch data structures. We do not allow to have
1324 a kernel dispatch with depth bigger than one. */
1325 for (unsigned i
= 0; i
< kernel
->dependencies_count
; i
++)
1327 struct kernel_info
*dependency
1328 = get_kernel_for_agent (kernel
->agent
, kernel
->dependencies
[i
]);
1329 shadow
->children_dispatches
[i
]
1330 = create_single_kernel_dispatch (dependency
, omp_data_size
);
1331 shadow
->children_dispatches
[i
]->queue
1332 = kernel
->agent
->kernel_dispatch_command_q
;
1333 shadow
->children_dispatches
[i
]->omp_level
= 1;
1339 /* Do all the work that is necessary before running KERNEL for the first time.
1340 The function assumes the program has been created, finalized and frozen by
1341 create_and_finalize_hsa_program. */
1344 init_kernel (struct kernel_info
*kernel
)
1346 if (pthread_mutex_lock (&kernel
->init_mutex
))
1347 GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1348 if (kernel
->initialized
)
1350 if (pthread_mutex_unlock (&kernel
->init_mutex
))
1351 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1357 /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1358 dispatch operation. */
1359 init_single_kernel (kernel
, &kernel
->max_omp_data_size
);
1361 if (!kernel
->initialization_failed
)
1364 kernel
->initialized
= true;
1365 if (pthread_mutex_unlock (&kernel
->init_mutex
))
1366 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1370 /* Parse the target attributes INPUT provided by the compiler and return true
1371 if we should run anything all. If INPUT is NULL, fill DEF with default
1372 values, then store INPUT or DEF into *RESULT. */
1375 parse_target_attributes (void **input
,
1376 struct GOMP_kernel_launch_attributes
*def
,
1377 struct GOMP_kernel_launch_attributes
**result
)
1380 GOMP_PLUGIN_fatal ("No target arguments provided");
1382 bool attrs_found
= false;
1385 uintptr_t id
= (uintptr_t) *input
;
1386 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) == GOMP_DEVICE_HSA
1387 && ((id
& GOMP_TARGET_ARG_ID_MASK
)
1388 == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES
))
1395 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
1410 HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1414 struct GOMP_kernel_launch_attributes
*kla
;
1415 kla
= (struct GOMP_kernel_launch_attributes
*) *input
;
1417 if (kla
->ndim
== 0 || kla
->ndim
> 3)
1418 GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla
->ndim
);
1420 HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla
->ndim
);
1422 for (i
= 0; i
< kla
->ndim
; i
++)
1424 HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i
,
1425 kla
->gdims
[i
], kla
->wdims
[i
]);
1426 if (kla
->gdims
[i
] == 0)
1432 /* Return the group size given the requested GROUP size, GRID size and number
1433 of grid dimensions NDIM. */
1436 get_group_size (uint32_t ndim
, uint32_t grid
, uint32_t group
)
1440 /* TODO: Provide a default via environment or device characteristics. */
1454 /* Return true if the HSA runtime can run function FN_PTR. */
1457 GOMP_OFFLOAD_can_run (void *fn_ptr
)
1459 struct kernel_info
*kernel
= (struct kernel_info
*) fn_ptr
;
1460 struct agent_info
*agent
= kernel
->agent
;
1461 create_and_finalize_hsa_program (agent
);
1463 if (agent
->prog_finalized_error
)
1466 init_kernel (kernel
);
1467 if (kernel
->initialization_failed
)
1473 if (suppress_host_fallback
)
1474 GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1475 HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1479 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */
1482 packet_store_release (uint32_t* packet
, uint16_t header
, uint16_t rest
)
1484 __atomic_store_n (packet
, header
| (rest
<< 16), __ATOMIC_RELEASE
);
1487 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1488 launchattributes from KLA. */
1491 run_kernel (struct kernel_info
*kernel
, void *vars
,
1492 struct GOMP_kernel_launch_attributes
*kla
)
1494 struct agent_info
*agent
= kernel
->agent
;
1495 if (pthread_rwlock_rdlock (&agent
->modules_rwlock
))
1496 GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1498 if (!agent
->initialized
)
1499 GOMP_PLUGIN_fatal ("Agent must be initialized");
1501 if (!kernel
->initialized
)
1502 GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1504 struct GOMP_hsa_kernel_dispatch
*shadow
1505 = create_kernel_dispatch (kernel
, kernel
->max_omp_data_size
);
1509 fprintf (stderr
, "\nKernel has following dependencies:\n");
1510 print_kernel_dispatch (shadow
, 2);
1514 = hsa_fns
.hsa_queue_add_write_index_release_fn (agent
->command_q
, 1);
1515 HSA_DEBUG ("Got AQL index %llu\n", (long long int) index
);
1517 /* Wait until the queue is not full before writing the packet. */
1518 while (index
- hsa_fns
.hsa_queue_load_read_index_acquire_fn (agent
->command_q
)
1519 >= agent
->command_q
->size
)
1522 hsa_kernel_dispatch_packet_t
*packet
;
1523 packet
= ((hsa_kernel_dispatch_packet_t
*) agent
->command_q
->base_address
)
1524 + index
% agent
->command_q
->size
;
1526 memset (((uint8_t *) packet
) + 4, 0, sizeof (*packet
) - 4);
1527 packet
->grid_size_x
= kla
->gdims
[0];
1528 packet
->workgroup_size_x
= get_group_size (kla
->ndim
, kla
->gdims
[0],
1533 packet
->grid_size_y
= kla
->gdims
[1];
1534 packet
->workgroup_size_y
= get_group_size (kla
->ndim
, kla
->gdims
[1],
1539 packet
->grid_size_y
= 1;
1540 packet
->workgroup_size_y
= 1;
1545 packet
->grid_size_z
= kla
->gdims
[2];
1546 packet
->workgroup_size_z
= get_group_size (kla
->ndim
, kla
->gdims
[2],
1551 packet
->grid_size_z
= 1;
1552 packet
->workgroup_size_z
= 1;
1555 packet
->private_segment_size
= kernel
->private_segment_size
;
1556 packet
->group_segment_size
= kernel
->group_segment_size
;
1557 packet
->kernel_object
= kernel
->object
;
1558 packet
->kernarg_address
= shadow
->kernarg_address
;
1560 s
.handle
= shadow
->signal
;
1561 packet
->completion_signal
= s
;
1562 hsa_fns
.hsa_signal_store_relaxed_fn (s
, 1);
1563 memcpy (shadow
->kernarg_address
, &vars
, sizeof (vars
));
1566 size_t vars_size
= sizeof (vars
);
1567 if (kernel
->kernarg_segment_size
> vars_size
)
1569 if (kernel
->kernarg_segment_size
!= vars_size
1570 + sizeof (struct hsa_kernel_runtime
*))
1571 GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1572 memcpy (packet
->kernarg_address
+ vars_size
, &shadow
,
1573 sizeof (struct hsa_kernel_runtime
*));
1576 HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1579 header
= HSA_PACKET_TYPE_KERNEL_DISPATCH
<< HSA_PACKET_HEADER_TYPE
;
1580 header
|= HSA_FENCE_SCOPE_SYSTEM
<< HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE
;
1581 header
|= HSA_FENCE_SCOPE_SYSTEM
<< HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE
;
1583 HSA_DEBUG ("Going to dispatch kernel %s\n", kernel
->name
);
1585 packet_store_release ((uint32_t *) packet
, header
,
1586 (uint16_t) kla
->ndim
<< HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS
);
1588 hsa_fns
.hsa_signal_store_release_fn (agent
->command_q
->doorbell_signal
,
1591 /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1592 signal wait and signal load operations on their own and we need to
1593 periodically call the hsa_signal_load_acquire on completion signals of
1594 children kernels in the CPU to make that happen. As soon the
1595 limitation will be resolved, this workaround can be removed. */
1597 HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1599 /* Root signal waits with 1ms timeout. */
1600 while (hsa_fns
.hsa_signal_wait_acquire_fn (s
, HSA_SIGNAL_CONDITION_LT
, 1,
1602 HSA_WAIT_STATE_BLOCKED
) != 0)
1603 for (unsigned i
= 0; i
< shadow
->kernel_dispatch_count
; i
++)
1605 hsa_signal_t child_s
;
1606 child_s
.handle
= shadow
->children_dispatches
[i
]->signal
;
1608 HSA_DEBUG ("Waiting for children completion signal: %" PRIu64
"\n",
1609 (print_uint64_t
) shadow
->children_dispatches
[i
]->signal
);
1610 hsa_fns
.hsa_signal_load_acquire_fn (child_s
);
1613 release_kernel_dispatch (shadow
);
1615 if (pthread_rwlock_unlock (&agent
->modules_rwlock
))
1616 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1619 /* Part of the libgomp plugin interface. Run a kernel on device N (the number
1620 is actually ignored, we assume the FN_PTR has been mapped using the correct
1621 device) and pass it an array of pointers in VARS as a parameter. The kernel
1622 is identified by FN_PTR which must point to a kernel_info structure. */
1625 GOMP_OFFLOAD_run (int n
__attribute__((unused
)),
1626 void *fn_ptr
, void *vars
, void **args
)
1628 struct kernel_info
*kernel
= (struct kernel_info
*) fn_ptr
;
1629 struct GOMP_kernel_launch_attributes def
;
1630 struct GOMP_kernel_launch_attributes
*kla
;
1631 if (!parse_target_attributes (args
, &def
, &kla
))
1633 HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1636 run_kernel (kernel
, vars
, kla
);
1639 /* Information to be passed to a thread running a kernel asycnronously. */
1641 struct async_run_info
1650 /* Thread routine to run a kernel asynchronously. */
1653 run_kernel_asynchronously (void *thread_arg
)
1655 struct async_run_info
*info
= (struct async_run_info
*) thread_arg
;
1656 int device
= info
->device
;
1657 void *tgt_fn
= info
->tgt_fn
;
1658 void *tgt_vars
= info
->tgt_vars
;
1659 void **args
= info
->args
;
1660 void *async_data
= info
->async_data
;
1663 GOMP_OFFLOAD_run (device
, tgt_fn
, tgt_vars
, args
);
1664 GOMP_PLUGIN_target_task_completion (async_data
);
1668 /* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
1669 does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1673 GOMP_OFFLOAD_async_run (int device
, void *tgt_fn
, void *tgt_vars
,
1674 void **args
, void *async_data
)
1677 struct async_run_info
*info
;
1678 HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
1679 info
= GOMP_PLUGIN_malloc (sizeof (struct async_run_info
));
1681 info
->device
= device
;
1682 info
->tgt_fn
= tgt_fn
;
1683 info
->tgt_vars
= tgt_vars
;
1685 info
->async_data
= async_data
;
1687 int err
= pthread_create (&pt
, NULL
, &run_kernel_asynchronously
, info
);
1689 GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1691 err
= pthread_detach (pt
);
1693 GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1694 "asynchronously: %s", strerror (err
));
1697 /* Deinitialize all information associated with MODULE and kernels within
1698 it. Return TRUE on success. */
1701 destroy_module (struct module_info
*module
)
1704 for (i
= 0; i
< module
->kernel_count
; i
++)
1705 if (pthread_mutex_destroy (&module
->kernels
[i
].init_mutex
))
1707 GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1714 /* Part of the libgomp plugin interface. Unload BRIG module described by
1715 struct brig_image_desc in TARGET_DATA from agent number N. Return
1719 GOMP_OFFLOAD_unload_image (int n
, unsigned version
, const void *target_data
)
1721 if (GOMP_VERSION_DEV (version
) > GOMP_VERSION_HSA
)
1723 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1724 " (expected %u, received %u)",
1725 GOMP_VERSION_HSA
, GOMP_VERSION_DEV (version
));
1729 struct agent_info
*agent
;
1730 agent
= get_agent_info (n
);
1734 if (pthread_rwlock_wrlock (&agent
->modules_rwlock
))
1736 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1739 struct module_info
*module
= agent
->first_module
;
1742 if (module
->image_desc
== target_data
)
1744 module
= module
->next
;
1748 GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1753 remove_module_from_agent (agent
, module
);
1754 if (!destroy_module (module
))
1757 if (!destroy_hsa_program (agent
))
1759 if (pthread_rwlock_unlock (&agent
->modules_rwlock
))
1761 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1767 /* Part of the libgomp plugin interface. Deinitialize all information and
1768 status associated with agent number N. We do not attempt any
1769 synchronization, assuming the user and libgomp will not attempt
1770 deinitialization of a device that is in any way being used at the same
1771 time. Return TRUE on success. */
1774 GOMP_OFFLOAD_fini_device (int n
)
1776 struct agent_info
*agent
= get_agent_info (n
);
1780 if (!agent
->initialized
)
1783 struct module_info
*next_module
= agent
->first_module
;
1786 struct module_info
*module
= next_module
;
1787 next_module
= module
->next
;
1788 if (!destroy_module (module
))
1792 agent
->first_module
= NULL
;
1793 if (!destroy_hsa_program (agent
))
1796 release_agent_shared_libraries (agent
);
1798 hsa_status_t status
= hsa_fns
.hsa_queue_destroy_fn (agent
->command_q
);
1799 if (status
!= HSA_STATUS_SUCCESS
)
1800 return hsa_error ("Error destroying command queue", status
);
1801 status
= hsa_fns
.hsa_queue_destroy_fn (agent
->kernel_dispatch_command_q
);
1802 if (status
!= HSA_STATUS_SUCCESS
)
1803 return hsa_error ("Error destroying kernel dispatch command queue", status
);
1804 if (pthread_mutex_destroy (&agent
->prog_mutex
))
1806 GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1809 if (pthread_rwlock_destroy (&agent
->modules_rwlock
))
1811 GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1814 agent
->initialized
= false;
1818 /* Part of the libgomp plugin interface. Not implemented as it is not required
1822 GOMP_OFFLOAD_alloc (int ord
, size_t size
)
1824 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1825 "it should never be called");
1829 /* Part of the libgomp plugin interface. Not implemented as it is not required
1833 GOMP_OFFLOAD_free (int ord
, void *ptr
)
1835 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1836 "it should never be called");
1840 /* Part of the libgomp plugin interface. Not implemented as it is not required
1844 GOMP_OFFLOAD_dev2host (int ord
, void *dst
, const void *src
, size_t n
)
1846 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1847 "it should never be called");
1851 /* Part of the libgomp plugin interface. Not implemented as it is not required
1855 GOMP_OFFLOAD_host2dev (int ord
, void *dst
, const void *src
, size_t n
)
1857 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1858 "it should never be called");
1862 /* Part of the libgomp plugin interface. Not implemented as it is not required
1866 GOMP_OFFLOAD_dev2dev (int ord
, void *dst
, const void *src
, size_t n
)
1868 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1869 "it should never be called");