1 /* Plugin for HSAIL execution.
3 Copyright (C) 2013-2018 Free Software Foundation, Inc.
5 Contributed by Martin Jambor <mjambor@suse.cz> and
6 Martin Liska <mliska@suse.cz>.
8 This file is part of the GNU Offloading and Multi Processing Library
11 Libgomp is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 3, or (at your option)
16 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
21 Under Section 7 of GPL version 3, you are granted additional
22 permissions described in the GCC Runtime Library Exception, version
23 3.1, as published by the Free Software Foundation.
25 You should have received a copy of the GNU General Public License and
26 a copy of the GCC Runtime Library Exception along with this program;
27 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
28 <http://www.gnu.org/licenses/>. */
38 #include <plugin/hsa_ext_finalize.h>
40 #include "libgomp-plugin.h"
41 #include "gomp-constants.h"
42 #include "secure_getenv.h"
44 /* As an HSA runtime is dlopened, following structure defines function
45 pointers utilized by the HSA plug-in. */
47 struct hsa_runtime_fn_info
50 hsa_status_t (*hsa_status_string_fn
) (hsa_status_t status
,
51 const char **status_string
);
52 hsa_status_t (*hsa_agent_get_info_fn
) (hsa_agent_t agent
,
53 hsa_agent_info_t attribute
,
55 hsa_status_t (*hsa_init_fn
) (void);
56 hsa_status_t (*hsa_iterate_agents_fn
)
57 (hsa_status_t (*callback
)(hsa_agent_t agent
, void *data
), void *data
);
58 hsa_status_t (*hsa_region_get_info_fn
) (hsa_region_t region
,
59 hsa_region_info_t attribute
,
61 hsa_status_t (*hsa_queue_create_fn
)
62 (hsa_agent_t agent
, uint32_t size
, hsa_queue_type_t type
,
63 void (*callback
)(hsa_status_t status
, hsa_queue_t
*source
, void *data
),
64 void *data
, uint32_t private_segment_size
,
65 uint32_t group_segment_size
, hsa_queue_t
**queue
);
66 hsa_status_t (*hsa_agent_iterate_regions_fn
)
68 hsa_status_t (*callback
)(hsa_region_t region
, void *data
), void *data
);
69 hsa_status_t (*hsa_executable_destroy_fn
) (hsa_executable_t executable
);
70 hsa_status_t (*hsa_executable_create_fn
)
71 (hsa_profile_t profile
, hsa_executable_state_t executable_state
,
72 const char *options
, hsa_executable_t
*executable
);
73 hsa_status_t (*hsa_executable_global_variable_define_fn
)
74 (hsa_executable_t executable
, const char *variable_name
, void *address
);
75 hsa_status_t (*hsa_executable_load_code_object_fn
)
76 (hsa_executable_t executable
, hsa_agent_t agent
,
77 hsa_code_object_t code_object
, const char *options
);
78 hsa_status_t (*hsa_executable_freeze_fn
)(hsa_executable_t executable
,
80 hsa_status_t (*hsa_signal_create_fn
) (hsa_signal_value_t initial_value
,
81 uint32_t num_consumers
,
82 const hsa_agent_t
*consumers
,
83 hsa_signal_t
*signal
);
84 hsa_status_t (*hsa_memory_allocate_fn
) (hsa_region_t region
, size_t size
,
86 hsa_status_t (*hsa_memory_free_fn
) (void *ptr
);
87 hsa_status_t (*hsa_signal_destroy_fn
) (hsa_signal_t signal
);
88 hsa_status_t (*hsa_executable_get_symbol_fn
)
89 (hsa_executable_t executable
, const char *module_name
,
90 const char *symbol_name
, hsa_agent_t agent
, int32_t call_convention
,
91 hsa_executable_symbol_t
*symbol
);
92 hsa_status_t (*hsa_executable_symbol_get_info_fn
)
93 (hsa_executable_symbol_t executable_symbol
,
94 hsa_executable_symbol_info_t attribute
, void *value
);
95 uint64_t (*hsa_queue_add_write_index_release_fn
) (const hsa_queue_t
*queue
,
97 uint64_t (*hsa_queue_load_read_index_acquire_fn
) (const hsa_queue_t
*queue
);
98 void (*hsa_signal_store_relaxed_fn
) (hsa_signal_t signal
,
99 hsa_signal_value_t value
);
100 void (*hsa_signal_store_release_fn
) (hsa_signal_t signal
,
101 hsa_signal_value_t value
);
102 hsa_signal_value_t (*hsa_signal_wait_acquire_fn
)
103 (hsa_signal_t signal
, hsa_signal_condition_t condition
,
104 hsa_signal_value_t compare_value
, uint64_t timeout_hint
,
105 hsa_wait_state_t wait_state_hint
);
106 hsa_signal_value_t (*hsa_signal_load_acquire_fn
) (hsa_signal_t signal
);
107 hsa_status_t (*hsa_queue_destroy_fn
) (hsa_queue_t
*queue
);
110 hsa_status_t (*hsa_ext_program_add_module_fn
) (hsa_ext_program_t program
,
111 hsa_ext_module_t module
);
112 hsa_status_t (*hsa_ext_program_create_fn
)
113 (hsa_machine_model_t machine_model
, hsa_profile_t profile
,
114 hsa_default_float_rounding_mode_t default_float_rounding_mode
,
115 const char *options
, hsa_ext_program_t
*program
);
116 hsa_status_t (*hsa_ext_program_destroy_fn
) (hsa_ext_program_t program
);
117 hsa_status_t (*hsa_ext_program_finalize_fn
)
118 (hsa_ext_program_t program
,hsa_isa_t isa
,
119 int32_t call_convention
, hsa_ext_control_directives_t control_directives
,
120 const char *options
, hsa_code_object_type_t code_object_type
,
121 hsa_code_object_t
*code_object
);
124 /* HSA runtime functions that are initialized in init_hsa_context. */
126 static struct hsa_runtime_fn_info hsa_fns
;
128 /* Keep the following GOMP prefixed structures in sync with respective parts of
131 /* Structure describing the run-time and grid properties of an HSA kernel
134 struct GOMP_kernel_launch_attributes
136 /* Number of dimensions the workload has. Maximum number is 3. */
138 /* Size of the grid in the three respective dimensions. */
140 /* Size of work-groups in the respective dimensions. */
144 /* Collection of information needed for a dispatch of a kernel from a
147 struct GOMP_hsa_kernel_dispatch
149 /* Pointer to a command queue associated with a kernel dispatch agent. */
151 /* Pointer to reserved memory for OMP data struct copying. */
152 void *omp_data_memory
;
153 /* Pointer to a memory space used for kernel arguments passing. */
154 void *kernarg_address
;
157 /* Synchronization signal used for dispatch synchronization. */
159 /* Private segment size. */
160 uint32_t private_segment_size
;
161 /* Group segment size. */
162 uint32_t group_segment_size
;
163 /* Number of children kernel dispatches. */
164 uint64_t kernel_dispatch_count
;
165 /* Debug purpose argument. */
167 /* Levels-var ICV. */
169 /* Kernel dispatch structures created for children kernel dispatches. */
170 struct GOMP_hsa_kernel_dispatch
**children_dispatches
;
171 /* Number of threads. */
172 uint32_t omp_num_threads
;
175 /* Part of the libgomp plugin interface. Return the name of the accelerator,
179 GOMP_OFFLOAD_get_name (void)
184 /* Part of the libgomp plugin interface. Return the specific capabilities the
185 HSA accelerator have. */
188 GOMP_OFFLOAD_get_caps (void)
190 return GOMP_OFFLOAD_CAP_SHARED_MEM
| GOMP_OFFLOAD_CAP_OPENMP_400
;
193 /* Part of the libgomp plugin interface. Identify as HSA accelerator. */
196 GOMP_OFFLOAD_get_type (void)
198 return OFFLOAD_TARGET_TYPE_HSA
;
201 /* Return the libgomp version number we're compatible with. There is
202 no requirement for cross-version compatibility. */
205 GOMP_OFFLOAD_version (void)
210 /* Flag to decide whether print to stderr information about what is going on.
211 Set in init_debug depending on environment variables. */
215 /* Flag to decide if the runtime should suppress a possible fallback to host
218 static bool suppress_host_fallback
;
220 /* Flag to locate HSA runtime shared library that is dlopened
223 static const char *hsa_runtime_lib
;
225 /* Flag to decide if the runtime should support also CPU devices (can be
228 static bool support_cpu_devices
;
230 /* Initialize debug and suppress_host_fallback according to the environment. */
233 init_enviroment_variables (void)
235 if (secure_getenv ("HSA_DEBUG"))
240 if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
241 suppress_host_fallback
= true;
243 suppress_host_fallback
= false;
245 hsa_runtime_lib
= secure_getenv ("HSA_RUNTIME_LIB");
246 if (hsa_runtime_lib
== NULL
)
247 hsa_runtime_lib
= HSA_RUNTIME_LIB
"libhsa-runtime64.so";
249 support_cpu_devices
= secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
252 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
255 #define HSA_LOG(prefix, ...) \
260 fprintf (stderr, prefix); \
261 fprintf (stderr, __VA_ARGS__); \
266 /* Print a debugging message to stderr. */
268 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
270 /* Print a warning message to stderr. */
272 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
274 /* Print HSA warning STR with an HSA STATUS code. */
277 hsa_warn (const char *str
, hsa_status_t status
)
282 const char *hsa_error_msg
;
283 hsa_fns
.hsa_status_string_fn (status
, &hsa_error_msg
);
285 fprintf (stderr
, "HSA warning: %s\nRuntime message: %s", str
, hsa_error_msg
);
288 /* Report a fatal error STR together with the HSA error corresponding to STATUS
289 and terminate execution of the current process. */
292 hsa_fatal (const char *str
, hsa_status_t status
)
294 const char *hsa_error_msg
;
295 hsa_fns
.hsa_status_string_fn (status
, &hsa_error_msg
);
296 GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str
,
300 /* Like hsa_fatal, except only report error message, and return FALSE
301 for propagating error processing to outside of plugin. */
304 hsa_error (const char *str
, hsa_status_t status
)
306 const char *hsa_error_msg
;
307 hsa_fns
.hsa_status_string_fn (status
, &hsa_error_msg
);
308 GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str
,
313 struct hsa_kernel_description
316 unsigned omp_data_size
;
317 bool gridified_kernel_p
;
318 unsigned kernel_dependencies_count
;
319 const char **kernel_dependencies
;
322 struct global_var_info
328 /* Data passed by the static initializer of a compilation unit containing BRIG
329 to GOMP_offload_register. */
331 struct brig_image_desc
333 hsa_ext_module_t brig_module
;
334 const unsigned kernel_count
;
335 struct hsa_kernel_description
*kernel_infos
;
336 const unsigned global_variable_count
;
337 struct global_var_info
*global_variables
;
342 /* Information required to identify, finalize and run any given kernel. */
346 /* Name of the kernel, required to locate it within the brig module. */
348 /* Size of memory space for OMP data. */
349 unsigned omp_data_size
;
350 /* The specific agent the kernel has been or will be finalized for and run
352 struct agent_info
*agent
;
353 /* The specific module where the kernel takes place. */
354 struct module_info
*module
;
355 /* Mutex enforcing that at most once thread ever initializes a kernel for
356 use. A thread should have locked agent->modules_rwlock for reading before
358 pthread_mutex_t init_mutex
;
359 /* Flag indicating whether the kernel has been initialized and all fields
360 below it contain valid data. */
362 /* Flag indicating that the kernel has a problem that blocks an execution. */
363 bool initialization_failed
;
364 /* The object to be put into the dispatch queue. */
366 /* Required size of kernel arguments. */
367 uint32_t kernarg_segment_size
;
368 /* Required size of group segment. */
369 uint32_t group_segment_size
;
370 /* Required size of private segment. */
371 uint32_t private_segment_size
;
372 /* List of all kernel dependencies. */
373 const char **dependencies
;
374 /* Number of dependencies. */
375 unsigned dependencies_count
;
376 /* Maximum OMP data size necessary for kernel from kernel dispatches. */
377 unsigned max_omp_data_size
;
378 /* True if the kernel is gridified. */
379 bool gridified_kernel_p
;
382 /* Information about a particular brig module, its image and kernels. */
386 /* The next and previous module in the linked list of modules of an agent. */
387 struct module_info
*next
, *prev
;
388 /* The description with which the program has registered the image. */
389 struct brig_image_desc
*image_desc
;
391 /* Number of kernels in this module. */
393 /* An array of kernel_info structures describing each kernel in this
395 struct kernel_info kernels
[];
398 /* Information about shared brig library. */
400 struct brig_library_info
403 hsa_ext_module_t image
;
406 /* Description of an HSA GPU agent and the program associated with it. */
410 /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
412 /* Whether the agent has been initialized. The fields below are usable only
415 /* The HSA ISA of this agent. */
417 /* Command queue of the agent. */
418 hsa_queue_t
*command_q
;
419 /* Kernel from kernel dispatch command queue. */
420 hsa_queue_t
*kernel_dispatch_command_q
;
421 /* The HSA memory region from which to allocate kernel arguments. */
422 hsa_region_t kernarg_region
;
424 /* Read-write lock that protects kernels which are running or about to be run
425 from interference with loading and unloading of images. Needs to be
426 locked for reading while a kernel is being run, and for writing if the
427 list of modules is manipulated (and thus the HSA program invalidated). */
428 pthread_rwlock_t modules_rwlock
;
429 /* The first module in a linked list of modules associated with this
431 struct module_info
*first_module
;
433 /* Mutex enforcing that only one thread will finalize the HSA program. A
434 thread should have locked agent->modules_rwlock for reading before
436 pthread_mutex_t prog_mutex
;
437 /* Flag whether the HSA program that consists of all the modules has been
440 /* Flag whether the program was finalized but with a failure. */
441 bool prog_finalized_error
;
442 /* HSA executable - the finalized program that is used to locate kernels. */
443 hsa_executable_t executable
;
444 /* List of BRIG libraries. */
445 struct brig_library_info
**brig_libraries
;
446 /* Number of loaded shared BRIG libraries. */
447 unsigned brig_libraries_count
;
450 /* Information about the whole HSA environment and all of its agents. */
452 struct hsa_context_info
454 /* Whether the structure has been initialized. */
456 /* Number of usable GPU HSA agents in the system. */
458 /* Array of agent_info structures describing the individual HSA agents. */
459 struct agent_info
*agents
;
462 /* Information about the whole HSA environment and all of its agents. */
464 static struct hsa_context_info hsa_context
;
466 #define DLSYM_FN(function) \
467 hsa_fns.function##_fn = dlsym (handle, #function); \
468 if (hsa_fns.function##_fn == NULL) \
472 init_hsa_runtime_functions (void)
474 void *handle
= dlopen (hsa_runtime_lib
, RTLD_LAZY
);
478 DLSYM_FN (hsa_status_string
)
479 DLSYM_FN (hsa_agent_get_info
)
481 DLSYM_FN (hsa_iterate_agents
)
482 DLSYM_FN (hsa_region_get_info
)
483 DLSYM_FN (hsa_queue_create
)
484 DLSYM_FN (hsa_agent_iterate_regions
)
485 DLSYM_FN (hsa_executable_destroy
)
486 DLSYM_FN (hsa_executable_create
)
487 DLSYM_FN (hsa_executable_global_variable_define
)
488 DLSYM_FN (hsa_executable_load_code_object
)
489 DLSYM_FN (hsa_executable_freeze
)
490 DLSYM_FN (hsa_signal_create
)
491 DLSYM_FN (hsa_memory_allocate
)
492 DLSYM_FN (hsa_memory_free
)
493 DLSYM_FN (hsa_signal_destroy
)
494 DLSYM_FN (hsa_executable_get_symbol
)
495 DLSYM_FN (hsa_executable_symbol_get_info
)
496 DLSYM_FN (hsa_queue_add_write_index_release
)
497 DLSYM_FN (hsa_queue_load_read_index_acquire
)
498 DLSYM_FN (hsa_signal_wait_acquire
)
499 DLSYM_FN (hsa_signal_store_relaxed
)
500 DLSYM_FN (hsa_signal_store_release
)
501 DLSYM_FN (hsa_signal_load_acquire
)
502 DLSYM_FN (hsa_queue_destroy
)
503 DLSYM_FN (hsa_ext_program_add_module
)
504 DLSYM_FN (hsa_ext_program_create
)
505 DLSYM_FN (hsa_ext_program_destroy
)
506 DLSYM_FN (hsa_ext_program_finalize
)
510 HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib
, dlerror ());
514 /* Find kernel for an AGENT by name provided in KERNEL_NAME. */
516 static struct kernel_info
*
517 get_kernel_for_agent (struct agent_info
*agent
, const char *kernel_name
)
519 struct module_info
*module
= agent
->first_module
;
523 for (unsigned i
= 0; i
< module
->kernel_count
; i
++)
524 if (strcmp (module
->kernels
[i
].name
, kernel_name
) == 0)
525 return &module
->kernels
[i
];
527 module
= module
->next
;
533 /* Return true if the agent is a GPU and acceptable of concurrent submissions
534 from different threads. */
537 suitable_hsa_agent_p (hsa_agent_t agent
)
539 hsa_device_type_t device_type
;
541 = hsa_fns
.hsa_agent_get_info_fn (agent
, HSA_AGENT_INFO_DEVICE
,
543 if (status
!= HSA_STATUS_SUCCESS
)
548 case HSA_DEVICE_TYPE_GPU
:
550 case HSA_DEVICE_TYPE_CPU
:
551 if (!support_cpu_devices
)
558 uint32_t features
= 0;
559 status
= hsa_fns
.hsa_agent_get_info_fn (agent
, HSA_AGENT_INFO_FEATURE
,
561 if (status
!= HSA_STATUS_SUCCESS
562 || !(features
& HSA_AGENT_FEATURE_KERNEL_DISPATCH
))
564 hsa_queue_type_t queue_type
;
565 status
= hsa_fns
.hsa_agent_get_info_fn (agent
, HSA_AGENT_INFO_QUEUE_TYPE
,
567 if (status
!= HSA_STATUS_SUCCESS
568 || (queue_type
!= HSA_QUEUE_TYPE_MULTI
))
574 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
575 agent_count in hsa_context. */
578 count_gpu_agents (hsa_agent_t agent
, void *data
__attribute__ ((unused
)))
580 if (suitable_hsa_agent_p (agent
))
581 hsa_context
.agent_count
++;
582 return HSA_STATUS_SUCCESS
;
585 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
586 id to the describing structure in the hsa context. The index of the
587 structure is pointed to by DATA, increment it afterwards. */
590 assign_agent_ids (hsa_agent_t agent
, void *data
)
592 if (suitable_hsa_agent_p (agent
))
594 int *agent_index
= (int *) data
;
595 hsa_context
.agents
[*agent_index
].id
= agent
;
598 return HSA_STATUS_SUCCESS
;
601 /* Initialize hsa_context if it has not already been done.
602 Return TRUE on success. */
605 init_hsa_context (void)
610 if (hsa_context
.initialized
)
612 init_enviroment_variables ();
613 if (!init_hsa_runtime_functions ())
615 HSA_DEBUG ("Run-time could not be dynamically opened\n");
618 status
= hsa_fns
.hsa_init_fn ();
619 if (status
!= HSA_STATUS_SUCCESS
)
620 return hsa_error ("Run-time could not be initialized", status
);
621 HSA_DEBUG ("HSA run-time initialized\n");
622 status
= hsa_fns
.hsa_iterate_agents_fn (count_gpu_agents
, NULL
);
623 if (status
!= HSA_STATUS_SUCCESS
)
624 return hsa_error ("HSA GPU devices could not be enumerated", status
);
625 HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context
.agent_count
);
628 = GOMP_PLUGIN_malloc_cleared (hsa_context
.agent_count
629 * sizeof (struct agent_info
));
630 status
= hsa_fns
.hsa_iterate_agents_fn (assign_agent_ids
, &agent_index
);
631 if (agent_index
!= hsa_context
.agent_count
)
633 GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
636 hsa_context
.initialized
= true;
640 /* Callback of dispatch queues to report errors. */
643 queue_callback (hsa_status_t status
,
644 hsa_queue_t
*queue
__attribute__ ((unused
)),
645 void *data
__attribute__ ((unused
)))
647 hsa_fatal ("Asynchronous queue error", status
);
650 /* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
651 used for kernarg allocations and if so write it to the memory pointed to by
652 DATA and break the query. */
655 get_kernarg_memory_region (hsa_region_t region
, void *data
)
658 hsa_region_segment_t segment
;
660 status
= hsa_fns
.hsa_region_get_info_fn (region
, HSA_REGION_INFO_SEGMENT
,
662 if (status
!= HSA_STATUS_SUCCESS
)
664 if (segment
!= HSA_REGION_SEGMENT_GLOBAL
)
665 return HSA_STATUS_SUCCESS
;
668 status
= hsa_fns
.hsa_region_get_info_fn (region
, HSA_REGION_INFO_GLOBAL_FLAGS
,
670 if (status
!= HSA_STATUS_SUCCESS
)
672 if (flags
& HSA_REGION_GLOBAL_FLAG_KERNARG
)
674 hsa_region_t
*ret
= (hsa_region_t
*) data
;
676 return HSA_STATUS_INFO_BREAK
;
678 return HSA_STATUS_SUCCESS
;
681 /* Part of the libgomp plugin interface. Return the number of HSA devices on
685 GOMP_OFFLOAD_get_num_devices (void)
687 if (!init_hsa_context ())
689 return hsa_context
.agent_count
;
692 /* Part of the libgomp plugin interface. Initialize agent number N so that it
693 can be used for computation. Return TRUE on success. */
696 GOMP_OFFLOAD_init_device (int n
)
698 if (!init_hsa_context ())
700 if (n
>= hsa_context
.agent_count
)
702 GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n
);
705 struct agent_info
*agent
= &hsa_context
.agents
[n
];
707 if (agent
->initialized
)
710 if (pthread_rwlock_init (&agent
->modules_rwlock
, NULL
))
712 GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
715 if (pthread_mutex_init (&agent
->prog_mutex
, NULL
))
717 GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
723 status
= hsa_fns
.hsa_agent_get_info_fn (agent
->id
,
724 HSA_AGENT_INFO_QUEUE_MAX_SIZE
,
726 if (status
!= HSA_STATUS_SUCCESS
)
727 return hsa_error ("Error requesting maximum queue size of the HSA agent",
729 status
= hsa_fns
.hsa_agent_get_info_fn (agent
->id
, HSA_AGENT_INFO_ISA
,
731 if (status
!= HSA_STATUS_SUCCESS
)
732 return hsa_error ("Error querying the ISA of the agent", status
);
733 status
= hsa_fns
.hsa_queue_create_fn (agent
->id
, queue_size
,
734 HSA_QUEUE_TYPE_MULTI
,
735 queue_callback
, NULL
, UINT32_MAX
,
738 if (status
!= HSA_STATUS_SUCCESS
)
739 return hsa_error ("Error creating command queue", status
);
741 status
= hsa_fns
.hsa_queue_create_fn (agent
->id
, queue_size
,
742 HSA_QUEUE_TYPE_MULTI
,
743 queue_callback
, NULL
, UINT32_MAX
,
745 &agent
->kernel_dispatch_command_q
);
746 if (status
!= HSA_STATUS_SUCCESS
)
747 return hsa_error ("Error creating kernel dispatch command queue", status
);
749 agent
->kernarg_region
.handle
= (uint64_t) -1;
750 status
= hsa_fns
.hsa_agent_iterate_regions_fn (agent
->id
,
751 get_kernarg_memory_region
,
752 &agent
->kernarg_region
);
753 if (agent
->kernarg_region
.handle
== (uint64_t) -1)
755 GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
759 HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
760 (long long unsigned) agent
->command_q
->id
);
761 HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
762 (long long unsigned) agent
->kernel_dispatch_command_q
->id
);
763 agent
->initialized
= true;
767 /* Verify that hsa_context has already been initialized and return the
768 agent_info structure describing device number N. Return NULL on error. */
770 static struct agent_info
*
771 get_agent_info (int n
)
773 if (!hsa_context
.initialized
)
775 GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
778 if (n
>= hsa_context
.agent_count
)
780 GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n
);
783 if (!hsa_context
.agents
[n
].initialized
)
785 GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
788 return &hsa_context
.agents
[n
];
791 /* Insert MODULE to the linked list of modules of AGENT. */
794 add_module_to_agent (struct agent_info
*agent
, struct module_info
*module
)
796 if (agent
->first_module
)
797 agent
->first_module
->prev
= module
;
798 module
->next
= agent
->first_module
;
800 agent
->first_module
= module
;
803 /* Remove MODULE from the linked list of modules of AGENT. */
806 remove_module_from_agent (struct agent_info
*agent
, struct module_info
*module
)
808 if (agent
->first_module
== module
)
809 agent
->first_module
= module
->next
;
811 module
->prev
->next
= module
->next
;
813 module
->next
->prev
= module
->prev
;
816 /* Free the HSA program in agent and everything associated with it and set
817 agent->prog_finalized and the initialized flags of all kernels to false.
818 Return TRUE on success. */
821 destroy_hsa_program (struct agent_info
*agent
)
823 if (!agent
->prog_finalized
|| agent
->prog_finalized_error
)
828 HSA_DEBUG ("Destroying the current HSA program.\n");
830 status
= hsa_fns
.hsa_executable_destroy_fn (agent
->executable
);
831 if (status
!= HSA_STATUS_SUCCESS
)
832 return hsa_error ("Could not destroy HSA executable", status
);
834 struct module_info
*module
;
835 for (module
= agent
->first_module
; module
; module
= module
->next
)
838 for (i
= 0; i
< module
->kernel_count
; i
++)
839 module
->kernels
[i
].initialized
= false;
841 agent
->prog_finalized
= false;
845 /* Initialize KERNEL from D and other parameters. Return true on success. */
848 init_basic_kernel_info (struct kernel_info
*kernel
,
849 struct hsa_kernel_description
*d
,
850 struct agent_info
*agent
,
851 struct module_info
*module
)
853 kernel
->agent
= agent
;
854 kernel
->module
= module
;
855 kernel
->name
= d
->name
;
856 kernel
->omp_data_size
= d
->omp_data_size
;
857 kernel
->gridified_kernel_p
= d
->gridified_kernel_p
;
858 kernel
->dependencies_count
= d
->kernel_dependencies_count
;
859 kernel
->dependencies
= d
->kernel_dependencies
;
860 if (pthread_mutex_init (&kernel
->init_mutex
, NULL
))
862 GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
868 /* Part of the libgomp plugin interface. Load BRIG module described by struct
869 brig_image_desc in TARGET_DATA and return references to kernel descriptors
873 GOMP_OFFLOAD_load_image (int ord
, unsigned version
, const void *target_data
,
874 struct addr_pair
**target_table
)
876 if (GOMP_VERSION_DEV (version
) > GOMP_VERSION_HSA
)
878 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
879 " (expected %u, received %u)",
880 GOMP_VERSION_HSA
, GOMP_VERSION_DEV (version
));
884 struct brig_image_desc
*image_desc
= (struct brig_image_desc
*) target_data
;
885 struct agent_info
*agent
;
886 struct addr_pair
*pair
;
887 struct module_info
*module
;
888 struct kernel_info
*kernel
;
889 int kernel_count
= image_desc
->kernel_count
;
891 agent
= get_agent_info (ord
);
895 if (pthread_rwlock_wrlock (&agent
->modules_rwlock
))
897 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
900 if (agent
->prog_finalized
901 && !destroy_hsa_program (agent
))
904 HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count
);
905 pair
= GOMP_PLUGIN_malloc (kernel_count
* sizeof (struct addr_pair
));
906 *target_table
= pair
;
907 module
= (struct module_info
*)
908 GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info
)
909 + kernel_count
* sizeof (struct kernel_info
));
910 module
->image_desc
= image_desc
;
911 module
->kernel_count
= kernel_count
;
913 kernel
= &module
->kernels
[0];
915 /* Allocate memory for kernel dependencies. */
916 for (unsigned i
= 0; i
< kernel_count
; i
++)
918 pair
->start
= (uintptr_t) kernel
;
919 pair
->end
= (uintptr_t) (kernel
+ 1);
921 struct hsa_kernel_description
*d
= &image_desc
->kernel_infos
[i
];
922 if (!init_basic_kernel_info (kernel
, d
, agent
, module
))
928 add_module_to_agent (agent
, module
);
929 if (pthread_rwlock_unlock (&agent
->modules_rwlock
))
931 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
937 /* Add a shared BRIG library from a FILE_NAME to an AGENT. */
939 static struct brig_library_info
*
940 add_shared_library (const char *file_name
, struct agent_info
*agent
)
942 struct brig_library_info
*library
= NULL
;
944 void *f
= dlopen (file_name
, RTLD_NOW
);
945 void *start
= dlsym (f
, "__brig_start");
946 void *end
= dlsym (f
, "__brig_end");
948 if (start
== NULL
|| end
== NULL
)
951 unsigned size
= end
- start
;
952 char *buf
= (char *) GOMP_PLUGIN_malloc (size
);
953 memcpy (buf
, start
, size
);
955 library
= GOMP_PLUGIN_malloc (sizeof (struct agent_info
));
956 library
->file_name
= (char *) GOMP_PLUGIN_malloc
957 ((strlen (file_name
) + 1));
958 strcpy (library
->file_name
, file_name
);
959 library
->image
= (hsa_ext_module_t
) buf
;
964 /* Release memory used for BRIG shared libraries that correspond
968 release_agent_shared_libraries (struct agent_info
*agent
)
970 for (unsigned i
= 0; i
< agent
->brig_libraries_count
; i
++)
971 if (agent
->brig_libraries
[i
])
973 free (agent
->brig_libraries
[i
]->file_name
);
974 free (agent
->brig_libraries
[i
]->image
);
975 free (agent
->brig_libraries
[i
]);
978 free (agent
->brig_libraries
);
981 /* Create and finalize the program consisting of all loaded modules. */
984 create_and_finalize_hsa_program (struct agent_info
*agent
)
987 hsa_ext_program_t prog_handle
;
990 if (pthread_mutex_lock (&agent
->prog_mutex
))
991 GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
992 if (agent
->prog_finalized
)
995 status
= hsa_fns
.hsa_ext_program_create_fn
996 (HSA_MACHINE_MODEL_LARGE
, HSA_PROFILE_FULL
,
997 HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT
,
999 if (status
!= HSA_STATUS_SUCCESS
)
1000 hsa_fatal ("Could not create an HSA program", status
);
1002 HSA_DEBUG ("Created a finalized program\n");
1004 struct module_info
*module
= agent
->first_module
;
1007 status
= hsa_fns
.hsa_ext_program_add_module_fn
1008 (prog_handle
, module
->image_desc
->brig_module
);
1009 if (status
!= HSA_STATUS_SUCCESS
)
1010 hsa_fatal ("Could not add a module to the HSA program", status
);
1011 module
= module
->next
;
1015 /* Load all shared libraries. */
1016 const char *libraries
[] = { "libhsamath.so", "libhsastd.so" };
1017 const unsigned libraries_count
= sizeof (libraries
) / sizeof (const char *);
1019 agent
->brig_libraries_count
= libraries_count
;
1020 agent
->brig_libraries
= GOMP_PLUGIN_malloc_cleared
1021 (sizeof (struct brig_library_info
) * libraries_count
);
1023 for (unsigned i
= 0; i
< libraries_count
; i
++)
1025 struct brig_library_info
*library
= add_shared_library (libraries
[i
],
1027 if (library
== NULL
)
1029 HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1034 status
= hsa_fns
.hsa_ext_program_add_module_fn (prog_handle
,
1036 if (status
!= HSA_STATUS_SUCCESS
)
1037 hsa_warn ("Could not add a shared BRIG library the HSA program",
1040 HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1044 hsa_ext_control_directives_t control_directives
;
1045 memset (&control_directives
, 0, sizeof (control_directives
));
1046 hsa_code_object_t code_object
;
1047 status
= hsa_fns
.hsa_ext_program_finalize_fn
1048 (prog_handle
, agent
->isa
,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO
,
1049 control_directives
, "", HSA_CODE_OBJECT_TYPE_PROGRAM
, &code_object
);
1050 if (status
!= HSA_STATUS_SUCCESS
)
1052 hsa_warn ("Finalization of the HSA program failed", status
);
1056 HSA_DEBUG ("Finalization done\n");
1057 hsa_fns
.hsa_ext_program_destroy_fn (prog_handle
);
1060 = hsa_fns
.hsa_executable_create_fn (HSA_PROFILE_FULL
,
1061 HSA_EXECUTABLE_STATE_UNFROZEN
,
1062 "", &agent
->executable
);
1063 if (status
!= HSA_STATUS_SUCCESS
)
1064 hsa_fatal ("Could not create HSA executable", status
);
1066 module
= agent
->first_module
;
1069 /* Initialize all global variables declared in the module. */
1070 for (unsigned i
= 0; i
< module
->image_desc
->global_variable_count
; i
++)
1072 struct global_var_info
*var
;
1073 var
= &module
->image_desc
->global_variables
[i
];
1074 status
= hsa_fns
.hsa_executable_global_variable_define_fn
1075 (agent
->executable
, var
->name
, var
->address
);
1077 HSA_DEBUG ("Defining global variable: %s, address: %p\n", var
->name
,
1080 if (status
!= HSA_STATUS_SUCCESS
)
1081 hsa_fatal ("Could not define a global variable in the HSA program",
1085 module
= module
->next
;
1088 status
= hsa_fns
.hsa_executable_load_code_object_fn (agent
->executable
,
1091 if (status
!= HSA_STATUS_SUCCESS
)
1092 hsa_fatal ("Could not add a code object to the HSA executable", status
);
1093 status
= hsa_fns
.hsa_executable_freeze_fn (agent
->executable
, "");
1094 if (status
!= HSA_STATUS_SUCCESS
)
1095 hsa_fatal ("Could not freeze the HSA executable", status
);
1097 HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1099 /* If all goes good, jump to final. */
1103 agent
->prog_finalized_error
= true;
1106 agent
->prog_finalized
= true;
1108 if (pthread_mutex_unlock (&agent
->prog_mutex
))
1109 GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1112 /* Create kernel dispatch data structure for given KERNEL. */
1114 static struct GOMP_hsa_kernel_dispatch
*
1115 create_single_kernel_dispatch (struct kernel_info
*kernel
,
1116 unsigned omp_data_size
)
1118 struct agent_info
*agent
= kernel
->agent
;
1119 struct GOMP_hsa_kernel_dispatch
*shadow
1120 = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch
));
1122 shadow
->queue
= agent
->command_q
;
1123 shadow
->omp_data_memory
1124 = omp_data_size
> 0 ? GOMP_PLUGIN_malloc (omp_data_size
) : NULL
;
1125 unsigned dispatch_count
= kernel
->dependencies_count
;
1126 shadow
->kernel_dispatch_count
= dispatch_count
;
1128 shadow
->children_dispatches
1129 = GOMP_PLUGIN_malloc (dispatch_count
* sizeof (shadow
));
1131 shadow
->object
= kernel
->object
;
1133 hsa_signal_t sync_signal
;
1134 hsa_status_t status
= hsa_fns
.hsa_signal_create_fn (1, 0, NULL
, &sync_signal
);
1135 if (status
!= HSA_STATUS_SUCCESS
)
1136 hsa_fatal ("Error creating the HSA sync signal", status
);
1138 shadow
->signal
= sync_signal
.handle
;
1139 shadow
->private_segment_size
= kernel
->private_segment_size
;
1140 shadow
->group_segment_size
= kernel
->group_segment_size
;
1143 = hsa_fns
.hsa_memory_allocate_fn (agent
->kernarg_region
,
1144 kernel
->kernarg_segment_size
,
1145 &shadow
->kernarg_address
);
1146 if (status
!= HSA_STATUS_SUCCESS
)
1147 hsa_fatal ("Could not allocate memory for HSA kernel arguments", status
);
1152 /* Release data structure created for a kernel dispatch in SHADOW argument. */
1155 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch
*shadow
)
1157 HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow
,
1158 shadow
->debug
, (void *) shadow
->debug
);
1160 hsa_fns
.hsa_memory_free_fn (shadow
->kernarg_address
);
1163 s
.handle
= shadow
->signal
;
1164 hsa_fns
.hsa_signal_destroy_fn (s
);
1166 free (shadow
->omp_data_memory
);
1168 for (unsigned i
= 0; i
< shadow
->kernel_dispatch_count
; i
++)
1169 release_kernel_dispatch (shadow
->children_dispatches
[i
]);
1171 free (shadow
->children_dispatches
);
1175 /* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
1176 to calculate maximum necessary memory for OMP data allocation. */
1179 init_single_kernel (struct kernel_info
*kernel
, unsigned *max_omp_data_size
)
1181 hsa_status_t status
;
1182 struct agent_info
*agent
= kernel
->agent
;
1183 hsa_executable_symbol_t kernel_symbol
;
1184 status
= hsa_fns
.hsa_executable_get_symbol_fn (agent
->executable
, NULL
,
1185 kernel
->name
, agent
->id
,
1187 if (status
!= HSA_STATUS_SUCCESS
)
1189 hsa_warn ("Could not find symbol for kernel in the code object", status
);
1192 HSA_DEBUG ("Located kernel %s\n", kernel
->name
);
1193 status
= hsa_fns
.hsa_executable_symbol_get_info_fn
1194 (kernel_symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT
, &kernel
->object
);
1195 if (status
!= HSA_STATUS_SUCCESS
)
1196 hsa_fatal ("Could not extract a kernel object from its symbol", status
);
1197 status
= hsa_fns
.hsa_executable_symbol_get_info_fn
1198 (kernel_symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE
,
1199 &kernel
->kernarg_segment_size
);
1200 if (status
!= HSA_STATUS_SUCCESS
)
1201 hsa_fatal ("Could not get info about kernel argument size", status
);
1202 status
= hsa_fns
.hsa_executable_symbol_get_info_fn
1203 (kernel_symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE
,
1204 &kernel
->group_segment_size
);
1205 if (status
!= HSA_STATUS_SUCCESS
)
1206 hsa_fatal ("Could not get info about kernel group segment size", status
);
1207 status
= hsa_fns
.hsa_executable_symbol_get_info_fn
1208 (kernel_symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE
,
1209 &kernel
->private_segment_size
);
1210 if (status
!= HSA_STATUS_SUCCESS
)
1211 hsa_fatal ("Could not get info about kernel private segment size",
1214 HSA_DEBUG ("Kernel structure for %s fully initialized with "
1215 "following segment sizes: \n", kernel
->name
);
1216 HSA_DEBUG (" group_segment_size: %u\n",
1217 (unsigned) kernel
->group_segment_size
);
1218 HSA_DEBUG (" private_segment_size: %u\n",
1219 (unsigned) kernel
->private_segment_size
);
1220 HSA_DEBUG (" kernarg_segment_size: %u\n",
1221 (unsigned) kernel
->kernarg_segment_size
);
1222 HSA_DEBUG (" omp_data_size: %u\n", kernel
->omp_data_size
);
1223 HSA_DEBUG (" gridified_kernel_p: %u\n", kernel
->gridified_kernel_p
);
1225 if (kernel
->omp_data_size
> *max_omp_data_size
)
1226 *max_omp_data_size
= kernel
->omp_data_size
;
1228 for (unsigned i
= 0; i
< kernel
->dependencies_count
; i
++)
1230 struct kernel_info
*dependency
1231 = get_kernel_for_agent (agent
, kernel
->dependencies
[i
]);
1233 if (dependency
== NULL
)
1235 HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1236 "dependency name: %s\n", kernel
->name
,
1237 kernel
->dependencies
[i
]);
1241 if (dependency
->dependencies_count
> 0)
1243 HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1244 "a depth bigger than one\n");
1248 init_single_kernel (dependency
, max_omp_data_size
);
1254 kernel
->initialization_failed
= true;
1257 /* Indent stream F by INDENT spaces. */
1260 indent_stream (FILE *f
, unsigned indent
)
1262 fprintf (f
, "%*s", indent
, "");
1265 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
1268 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch
*dispatch
, unsigned indent
)
1270 indent_stream (stderr
, indent
);
1271 fprintf (stderr
, "this: %p\n", dispatch
);
1272 indent_stream (stderr
, indent
);
1273 fprintf (stderr
, "queue: %p\n", dispatch
->queue
);
1274 indent_stream (stderr
, indent
);
1275 fprintf (stderr
, "omp_data_memory: %p\n", dispatch
->omp_data_memory
);
1276 indent_stream (stderr
, indent
);
1277 fprintf (stderr
, "kernarg_address: %p\n", dispatch
->kernarg_address
);
1278 indent_stream (stderr
, indent
);
1279 fprintf (stderr
, "object: %lu\n", dispatch
->object
);
1280 indent_stream (stderr
, indent
);
1281 fprintf (stderr
, "signal: %lu\n", dispatch
->signal
);
1282 indent_stream (stderr
, indent
);
1283 fprintf (stderr
, "private_segment_size: %u\n",
1284 dispatch
->private_segment_size
);
1285 indent_stream (stderr
, indent
);
1286 fprintf (stderr
, "group_segment_size: %u\n",
1287 dispatch
->group_segment_size
);
1288 indent_stream (stderr
, indent
);
1289 fprintf (stderr
, "children dispatches: %lu\n",
1290 dispatch
->kernel_dispatch_count
);
1291 indent_stream (stderr
, indent
);
1292 fprintf (stderr
, "omp_num_threads: %u\n",
1293 dispatch
->omp_num_threads
);
1294 fprintf (stderr
, "\n");
1296 for (unsigned i
= 0; i
< dispatch
->kernel_dispatch_count
; i
++)
1297 print_kernel_dispatch (dispatch
->children_dispatches
[i
], indent
+ 2);
1300 /* Create kernel dispatch data structure for a KERNEL and all its
1303 static struct GOMP_hsa_kernel_dispatch
*
1304 create_kernel_dispatch (struct kernel_info
*kernel
, unsigned omp_data_size
)
1306 struct GOMP_hsa_kernel_dispatch
*shadow
1307 = create_single_kernel_dispatch (kernel
, omp_data_size
);
1308 shadow
->omp_num_threads
= 64;
1310 shadow
->omp_level
= kernel
->gridified_kernel_p
? 1 : 0;
1312 /* Create kernel dispatch data structures. We do not allow to have
1313 a kernel dispatch with depth bigger than one. */
1314 for (unsigned i
= 0; i
< kernel
->dependencies_count
; i
++)
1316 struct kernel_info
*dependency
1317 = get_kernel_for_agent (kernel
->agent
, kernel
->dependencies
[i
]);
1318 shadow
->children_dispatches
[i
]
1319 = create_single_kernel_dispatch (dependency
, omp_data_size
);
1320 shadow
->children_dispatches
[i
]->queue
1321 = kernel
->agent
->kernel_dispatch_command_q
;
1322 shadow
->children_dispatches
[i
]->omp_level
= 1;
1328 /* Do all the work that is necessary before running KERNEL for the first time.
1329 The function assumes the program has been created, finalized and frozen by
1330 create_and_finalize_hsa_program. */
1333 init_kernel (struct kernel_info
*kernel
)
1335 if (pthread_mutex_lock (&kernel
->init_mutex
))
1336 GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1337 if (kernel
->initialized
)
1339 if (pthread_mutex_unlock (&kernel
->init_mutex
))
1340 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1346 /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1347 dispatch operation. */
1348 init_single_kernel (kernel
, &kernel
->max_omp_data_size
);
1350 if (!kernel
->initialization_failed
)
1353 kernel
->initialized
= true;
1354 if (pthread_mutex_unlock (&kernel
->init_mutex
))
1355 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1359 /* Parse the target attributes INPUT provided by the compiler and return true
1360 if we should run anything all. If INPUT is NULL, fill DEF with default
1361 values, then store INPUT or DEF into *RESULT. */
1364 parse_target_attributes (void **input
,
1365 struct GOMP_kernel_launch_attributes
*def
,
1366 struct GOMP_kernel_launch_attributes
**result
)
1369 GOMP_PLUGIN_fatal ("No target arguments provided");
1371 bool attrs_found
= false;
1374 uintptr_t id
= (uintptr_t) *input
;
1375 if ((id
& GOMP_TARGET_ARG_DEVICE_MASK
) == GOMP_DEVICE_HSA
1376 && ((id
& GOMP_TARGET_ARG_ID_MASK
)
1377 == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES
))
1384 if (id
& GOMP_TARGET_ARG_SUBSEQUENT_PARAM
)
1399 HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1403 struct GOMP_kernel_launch_attributes
*kla
;
1404 kla
= (struct GOMP_kernel_launch_attributes
*) *input
;
1406 if (kla
->ndim
== 0 || kla
->ndim
> 3)
1407 GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla
->ndim
);
1409 HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla
->ndim
);
1411 for (i
= 0; i
< kla
->ndim
; i
++)
1413 HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i
,
1414 kla
->gdims
[i
], kla
->wdims
[i
]);
1415 if (kla
->gdims
[i
] == 0)
1421 /* Return the group size given the requested GROUP size, GRID size and number
1422 of grid dimensions NDIM. */
1425 get_group_size (uint32_t ndim
, uint32_t grid
, uint32_t group
)
1429 /* TODO: Provide a default via environment or device characteristics. */
1443 /* Return true if the HSA runtime can run function FN_PTR. */
1446 GOMP_OFFLOAD_can_run (void *fn_ptr
)
1448 struct kernel_info
*kernel
= (struct kernel_info
*) fn_ptr
;
1449 struct agent_info
*agent
= kernel
->agent
;
1450 create_and_finalize_hsa_program (agent
);
1452 if (agent
->prog_finalized_error
)
1455 init_kernel (kernel
);
1456 if (kernel
->initialization_failed
)
1462 if (suppress_host_fallback
)
1463 GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1464 HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1468 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */
1471 packet_store_release (uint32_t* packet
, uint16_t header
, uint16_t rest
)
1473 __atomic_store_n (packet
, header
| (rest
<< 16), __ATOMIC_RELEASE
);
1476 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1477 launchattributes from KLA. */
1480 run_kernel (struct kernel_info
*kernel
, void *vars
,
1481 struct GOMP_kernel_launch_attributes
*kla
)
1483 struct agent_info
*agent
= kernel
->agent
;
1484 if (pthread_rwlock_rdlock (&agent
->modules_rwlock
))
1485 GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1487 if (!agent
->initialized
)
1488 GOMP_PLUGIN_fatal ("Agent must be initialized");
1490 if (!kernel
->initialized
)
1491 GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1493 struct GOMP_hsa_kernel_dispatch
*shadow
1494 = create_kernel_dispatch (kernel
, kernel
->max_omp_data_size
);
1498 fprintf (stderr
, "\nKernel has following dependencies:\n");
1499 print_kernel_dispatch (shadow
, 2);
1503 = hsa_fns
.hsa_queue_add_write_index_release_fn (agent
->command_q
, 1);
1504 HSA_DEBUG ("Got AQL index %llu\n", (long long int) index
);
1506 /* Wait until the queue is not full before writing the packet. */
1507 while (index
- hsa_fns
.hsa_queue_load_read_index_acquire_fn (agent
->command_q
)
1508 >= agent
->command_q
->size
)
1511 hsa_kernel_dispatch_packet_t
*packet
;
1512 packet
= ((hsa_kernel_dispatch_packet_t
*) agent
->command_q
->base_address
)
1513 + index
% agent
->command_q
->size
;
1515 memset (((uint8_t *) packet
) + 4, 0, sizeof (*packet
) - 4);
1516 packet
->grid_size_x
= kla
->gdims
[0];
1517 packet
->workgroup_size_x
= get_group_size (kla
->ndim
, kla
->gdims
[0],
1522 packet
->grid_size_y
= kla
->gdims
[1];
1523 packet
->workgroup_size_y
= get_group_size (kla
->ndim
, kla
->gdims
[1],
1528 packet
->grid_size_y
= 1;
1529 packet
->workgroup_size_y
= 1;
1534 packet
->grid_size_z
= kla
->gdims
[2];
1535 packet
->workgroup_size_z
= get_group_size (kla
->ndim
, kla
->gdims
[2],
1540 packet
->grid_size_z
= 1;
1541 packet
->workgroup_size_z
= 1;
1544 packet
->private_segment_size
= kernel
->private_segment_size
;
1545 packet
->group_segment_size
= kernel
->group_segment_size
;
1546 packet
->kernel_object
= kernel
->object
;
1547 packet
->kernarg_address
= shadow
->kernarg_address
;
1549 s
.handle
= shadow
->signal
;
1550 packet
->completion_signal
= s
;
1551 hsa_fns
.hsa_signal_store_relaxed_fn (s
, 1);
1552 memcpy (shadow
->kernarg_address
, &vars
, sizeof (vars
));
1555 size_t vars_size
= sizeof (vars
);
1556 if (kernel
->kernarg_segment_size
> vars_size
)
1558 if (kernel
->kernarg_segment_size
!= vars_size
1559 + sizeof (struct hsa_kernel_runtime
*))
1560 GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1561 memcpy (packet
->kernarg_address
+ vars_size
, &shadow
,
1562 sizeof (struct hsa_kernel_runtime
*));
1565 HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1568 header
= HSA_PACKET_TYPE_KERNEL_DISPATCH
<< HSA_PACKET_HEADER_TYPE
;
1569 header
|= HSA_FENCE_SCOPE_SYSTEM
<< HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE
;
1570 header
|= HSA_FENCE_SCOPE_SYSTEM
<< HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE
;
1572 HSA_DEBUG ("Going to dispatch kernel %s\n", kernel
->name
);
1574 packet_store_release ((uint32_t *) packet
, header
,
1575 (uint16_t) kla
->ndim
<< HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS
);
1577 hsa_fns
.hsa_signal_store_release_fn (agent
->command_q
->doorbell_signal
,
1580 /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1581 signal wait and signal load operations on their own and we need to
1582 periodically call the hsa_signal_load_acquire on completion signals of
1583 children kernels in the CPU to make that happen. As soon the
1584 limitation will be resolved, this workaround can be removed. */
1586 HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1588 /* Root signal waits with 1ms timeout. */
1589 while (hsa_fns
.hsa_signal_wait_acquire_fn (s
, HSA_SIGNAL_CONDITION_LT
, 1,
1591 HSA_WAIT_STATE_BLOCKED
) != 0)
1592 for (unsigned i
= 0; i
< shadow
->kernel_dispatch_count
; i
++)
1594 hsa_signal_t child_s
;
1595 child_s
.handle
= shadow
->children_dispatches
[i
]->signal
;
1597 HSA_DEBUG ("Waiting for children completion signal: %lu\n",
1598 shadow
->children_dispatches
[i
]->signal
);
1599 hsa_fns
.hsa_signal_load_acquire_fn (child_s
);
1602 release_kernel_dispatch (shadow
);
1604 if (pthread_rwlock_unlock (&agent
->modules_rwlock
))
1605 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1608 /* Part of the libgomp plugin interface. Run a kernel on device N (the number
1609 is actually ignored, we assume the FN_PTR has been mapped using the correct
1610 device) and pass it an array of pointers in VARS as a parameter. The kernel
1611 is identified by FN_PTR which must point to a kernel_info structure. */
1614 GOMP_OFFLOAD_run (int n
__attribute__((unused
)),
1615 void *fn_ptr
, void *vars
, void **args
)
1617 struct kernel_info
*kernel
= (struct kernel_info
*) fn_ptr
;
1618 struct GOMP_kernel_launch_attributes def
;
1619 struct GOMP_kernel_launch_attributes
*kla
;
1620 if (!parse_target_attributes (args
, &def
, &kla
))
1622 HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1625 run_kernel (kernel
, vars
, kla
);
1628 /* Information to be passed to a thread running a kernel asycnronously. */
1630 struct async_run_info
1639 /* Thread routine to run a kernel asynchronously. */
1642 run_kernel_asynchronously (void *thread_arg
)
1644 struct async_run_info
*info
= (struct async_run_info
*) thread_arg
;
1645 int device
= info
->device
;
1646 void *tgt_fn
= info
->tgt_fn
;
1647 void *tgt_vars
= info
->tgt_vars
;
1648 void **args
= info
->args
;
1649 void *async_data
= info
->async_data
;
1652 GOMP_OFFLOAD_run (device
, tgt_fn
, tgt_vars
, args
);
1653 GOMP_PLUGIN_target_task_completion (async_data
);
1657 /* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
1658 does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1662 GOMP_OFFLOAD_async_run (int device
, void *tgt_fn
, void *tgt_vars
,
1663 void **args
, void *async_data
)
1666 struct async_run_info
*info
;
1667 HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
1668 info
= GOMP_PLUGIN_malloc (sizeof (struct async_run_info
));
1670 info
->device
= device
;
1671 info
->tgt_fn
= tgt_fn
;
1672 info
->tgt_vars
= tgt_vars
;
1674 info
->async_data
= async_data
;
1676 int err
= pthread_create (&pt
, NULL
, &run_kernel_asynchronously
, info
);
1678 GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1680 err
= pthread_detach (pt
);
1682 GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1683 "asynchronously: %s", strerror (err
));
1686 /* Deinitialize all information associated with MODULE and kernels within
1687 it. Return TRUE on success. */
1690 destroy_module (struct module_info
*module
)
1693 for (i
= 0; i
< module
->kernel_count
; i
++)
1694 if (pthread_mutex_destroy (&module
->kernels
[i
].init_mutex
))
1696 GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1703 /* Part of the libgomp plugin interface. Unload BRIG module described by
1704 struct brig_image_desc in TARGET_DATA from agent number N. Return
1708 GOMP_OFFLOAD_unload_image (int n
, unsigned version
, const void *target_data
)
1710 if (GOMP_VERSION_DEV (version
) > GOMP_VERSION_HSA
)
1712 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1713 " (expected %u, received %u)",
1714 GOMP_VERSION_HSA
, GOMP_VERSION_DEV (version
));
1718 struct agent_info
*agent
;
1719 agent
= get_agent_info (n
);
1723 if (pthread_rwlock_wrlock (&agent
->modules_rwlock
))
1725 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1728 struct module_info
*module
= agent
->first_module
;
1731 if (module
->image_desc
== target_data
)
1733 module
= module
->next
;
1737 GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1742 remove_module_from_agent (agent
, module
);
1743 if (!destroy_module (module
))
1746 if (!destroy_hsa_program (agent
))
1748 if (pthread_rwlock_unlock (&agent
->modules_rwlock
))
1750 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1756 /* Part of the libgomp plugin interface. Deinitialize all information and
1757 status associated with agent number N. We do not attempt any
1758 synchronization, assuming the user and libgomp will not attempt
1759 deinitialization of a device that is in any way being used at the same
1760 time. Return TRUE on success. */
1763 GOMP_OFFLOAD_fini_device (int n
)
1765 struct agent_info
*agent
= get_agent_info (n
);
1769 if (!agent
->initialized
)
1772 struct module_info
*next_module
= agent
->first_module
;
1775 struct module_info
*module
= next_module
;
1776 next_module
= module
->next
;
1777 if (!destroy_module (module
))
1781 agent
->first_module
= NULL
;
1782 if (!destroy_hsa_program (agent
))
1785 release_agent_shared_libraries (agent
);
1787 hsa_status_t status
= hsa_fns
.hsa_queue_destroy_fn (agent
->command_q
);
1788 if (status
!= HSA_STATUS_SUCCESS
)
1789 return hsa_error ("Error destroying command queue", status
);
1790 status
= hsa_fns
.hsa_queue_destroy_fn (agent
->kernel_dispatch_command_q
);
1791 if (status
!= HSA_STATUS_SUCCESS
)
1792 return hsa_error ("Error destroying kernel dispatch command queue", status
);
1793 if (pthread_mutex_destroy (&agent
->prog_mutex
))
1795 GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1798 if (pthread_rwlock_destroy (&agent
->modules_rwlock
))
1800 GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1803 agent
->initialized
= false;
1807 /* Part of the libgomp plugin interface. Not implemented as it is not required
1811 GOMP_OFFLOAD_alloc (int ord
, size_t size
)
1813 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1814 "it should never be called");
1818 /* Part of the libgomp plugin interface. Not implemented as it is not required
1822 GOMP_OFFLOAD_free (int ord
, void *ptr
)
1824 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1825 "it should never be called");
1829 /* Part of the libgomp plugin interface. Not implemented as it is not required
1833 GOMP_OFFLOAD_dev2host (int ord
, void *dst
, const void *src
, size_t n
)
1835 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1836 "it should never be called");
1840 /* Part of the libgomp plugin interface. Not implemented as it is not required
1844 GOMP_OFFLOAD_host2dev (int ord
, void *dst
, const void *src
, size_t n
)
1846 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1847 "it should never be called");
1851 /* Part of the libgomp plugin interface. Not implemented as it is not required
1855 GOMP_OFFLOAD_dev2dev (int ord
, void *dst
, const void *src
, size_t n
)
1857 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1858 "it should never be called");