1 /* OpenACC Profiling Interface
3 Copyright (C) 2019-2023 Free Software Foundation, Inc.
5 Contributed by Mentor, a Siemens Business.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
32 #include "secure_getenv.h"
42 #define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
44 /* Statically assert that the layout of the common fields in the
45 'acc_event_info' variants matches. */
47 STATIC_ASSERT (offsetof (acc_event_info
, event_type
)
48 == offsetof (acc_event_info
, data_event
.event_type
));
49 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.event_type
)
50 == offsetof (acc_event_info
, launch_event
.event_type
));
51 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.event_type
)
52 == offsetof (acc_event_info
, other_event
.event_type
));
54 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.valid_bytes
)
55 == offsetof (acc_event_info
, launch_event
.valid_bytes
));
56 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.valid_bytes
)
57 == offsetof (acc_event_info
, other_event
.valid_bytes
));
58 /* 'parent_construct' */
59 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.parent_construct
)
60 == offsetof (acc_event_info
, launch_event
.parent_construct
));
61 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.parent_construct
)
62 == offsetof (acc_event_info
, other_event
.parent_construct
));
64 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.implicit
)
65 == offsetof (acc_event_info
, launch_event
.implicit
));
66 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.implicit
)
67 == offsetof (acc_event_info
, other_event
.implicit
));
69 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.tool_info
)
70 == offsetof (acc_event_info
, launch_event
.tool_info
));
71 STATIC_ASSERT (offsetof (acc_event_info
, data_event
.tool_info
)
72 == offsetof (acc_event_info
, other_event
.tool_info
));
74 struct goacc_prof_callback_entry
79 struct goacc_prof_callback_entry
*next
;
82 /* Use a separate flag to minimize run-time performance impact for the (very
83 common) case that profiling is not enabled.
85 Once enabled, we're not going to disable this anymore, anywhere. We
86 probably could, by adding appropriate logic to 'acc_prof_register',
87 'acc_prof_unregister'. */
88 bool goacc_prof_enabled
= false;
90 /* Global state for registered callbacks.
91 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */
92 static bool goacc_prof_callbacks_enabled
[acc_ev_last
];
93 static struct goacc_prof_callback_entry
*goacc_prof_callback_entries
[acc_ev_last
];
94 /* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
95 'goacc_prof_callback_entries'. */
96 static gomp_mutex_t goacc_prof_lock
;
99 goacc_profiling_initialize (void)
101 gomp_mutex_init (&goacc_prof_lock
);
103 /* Initially, all callbacks for all events are enabled. */
104 for (int i
= 0; i
< acc_ev_last
; ++i
)
105 goacc_prof_callbacks_enabled
[i
] = true;
108 #ifdef PLUGIN_SUPPORT
109 char *acc_proflibs
= secure_getenv ("ACC_PROFLIB");
110 while (acc_proflibs
!= NULL
&& acc_proflibs
[0] != '\0')
112 char *acc_proflibs_sep
= strchr (acc_proflibs
, ';');
114 if (acc_proflibs_sep
== acc_proflibs
)
116 /* Stray ';' separator: make sure we don't 'dlopen' the main
122 if (acc_proflibs_sep
!= NULL
)
124 /* Single out the first library. */
125 acc_proflib
= gomp_malloc (acc_proflibs_sep
- acc_proflibs
+ 1);
126 memcpy (acc_proflib
, acc_proflibs
,
127 acc_proflibs_sep
- acc_proflibs
);
128 acc_proflib
[acc_proflibs_sep
- acc_proflibs
] = '\0';
132 /* No ';' separator, so only one library. */
133 acc_proflib
= acc_proflibs
;
136 gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__
, acc_proflib
);
137 void *dl_handle
= dlopen (acc_proflib
, RTLD_LAZY
);
138 if (dl_handle
!= NULL
)
140 typeof (&acc_register_library
) a_r_l
141 = dlsym (dl_handle
, "acc_register_library");
144 gomp_debug (0, " %s: calling %s:acc_register_library\n",
145 __FUNCTION__
, acc_proflib
);
146 a_r_l (acc_prof_register
, acc_prof_unregister
,
152 gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
153 acc_proflib
, dlerror ());
154 if (dl_handle
!= NULL
)
156 int err
= dlclose (dl_handle
);
164 if (acc_proflib
!= acc_proflibs
)
168 acc_proflibs
= acc_proflibs_sep
+ 1;
173 #endif /* PLUGIN_SUPPORT */
177 acc_prof_register (acc_event_t ev
, acc_prof_callback cb
, acc_register_t reg
)
179 gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
180 __FUNCTION__
, (int) ev
, (void *) cb
, (int) reg
);
183 /* For any events to be dispatched, the user first has to register a
184 callback, which makes this here a good place for enabling the whole
186 if (!GOACC_PROF_ENABLED
)
187 __atomic_store_n (&goacc_prof_enabled
, true, MEMMODEL_RELEASE
);
194 /* As end events invoke callbacks in the reverse order, we register these
195 in the reverse order here. */
197 } event_kind
= EVENT_KIND_BOGUS
;
201 case acc_ev_device_init_start
:
202 case acc_ev_device_shutdown_start
:
203 case acc_ev_runtime_shutdown
:
208 case acc_ev_enter_data_start
:
209 case acc_ev_exit_data_start
:
210 case acc_ev_update_start
:
211 case acc_ev_compute_construct_start
:
212 case acc_ev_enqueue_launch_start
:
213 case acc_ev_enqueue_upload_start
:
214 case acc_ev_enqueue_download_start
:
215 case acc_ev_wait_start
:
216 event_kind
= EVENT_KIND_NORMAL
;
218 case acc_ev_device_init_end
:
219 case acc_ev_device_shutdown_end
:
220 case acc_ev_enter_data_end
:
221 case acc_ev_exit_data_end
:
222 case acc_ev_update_end
:
223 case acc_ev_compute_construct_end
:
224 case acc_ev_enqueue_launch_end
:
225 case acc_ev_enqueue_upload_end
:
226 case acc_ev_enqueue_download_end
:
227 case acc_ev_wait_end
:
228 event_kind
= EVENT_KIND_END
;
233 if (event_kind
== EVENT_KIND_BOGUS
)
235 /* Silently ignore. */
236 gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n");
245 case acc_toggle_per_thread
:
251 /* Silently ignore. */
252 gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n");
257 if (reg
== acc_toggle
)
261 gomp_debug (0, " globally enabling callbacks\n");
262 gomp_mutex_lock (&goacc_prof_lock
);
263 /* For 'acc_ev_none', this acts as a global toggle. */
264 goacc_prof_callbacks_enabled
[ev
] = true;
265 gomp_mutex_unlock (&goacc_prof_lock
);
268 else if (ev
== acc_ev_none
&& cb
!= NULL
)
270 gomp_debug (0, " ignoring request\n");
274 else if (reg
== acc_toggle_per_thread
)
276 if (ev
== acc_ev_none
&& cb
== NULL
)
278 gomp_debug (0, " thread: enabling callbacks\n");
279 goacc_lazy_initialize ();
280 struct goacc_thread
*thr
= goacc_thread ();
281 thr
->prof_callbacks_enabled
= true;
284 /* Silently ignore. */
285 gomp_debug (0, " ignoring bogus request\n");
289 gomp_mutex_lock (&goacc_prof_lock
);
291 struct goacc_prof_callback_entry
*it
, *it_p
;
292 it
= goacc_prof_callback_entries
[ev
];
305 /* If we already have this callback registered, just increment its
310 gomp_debug (0, " already registered;"
311 " incrementing reference count to: %d\n", it
->ref
);
315 struct goacc_prof_callback_entry
*e
316 = gomp_malloc (sizeof (struct goacc_prof_callback_entry
));
320 bool prepend
= (event_kind
== EVENT_KIND_END
);
321 /* If we don't have any callback registered yet, also use the
322 'prepend' code path. */
327 gomp_debug (0, " prepending\n");
328 e
->next
= goacc_prof_callback_entries
[ev
];
329 goacc_prof_callback_entries
[ev
] = e
;
333 gomp_debug (0, " appending\n");
343 gomp_debug (0, " ignoring request: is not registered\n");
348 gomp_debug (0, " enabling\n");
353 case acc_toggle_per_thread
:
354 __builtin_unreachable ();
357 gomp_mutex_unlock (&goacc_prof_lock
);
361 acc_prof_unregister (acc_event_t ev
, acc_prof_callback cb
, acc_register_t reg
)
363 gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
364 __FUNCTION__
, (int) ev
, (void *) cb
, (int) reg
);
366 /* If profiling is not enabled, there cannot be anything to unregister. */
367 if (!GOACC_PROF_ENABLED
)
371 || ev
>= acc_ev_last
)
373 /* Silently ignore. */
374 gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n");
383 case acc_toggle_per_thread
:
389 /* Silently ignore. */
390 gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n");
395 if (reg
== acc_toggle
)
399 gomp_debug (0, " globally disabling callbacks\n");
400 gomp_mutex_lock (&goacc_prof_lock
);
401 /* For 'acc_ev_none', this acts as a global toggle. */
402 goacc_prof_callbacks_enabled
[ev
] = false;
403 gomp_mutex_unlock (&goacc_prof_lock
);
406 else if (ev
== acc_ev_none
&& cb
!= NULL
)
408 gomp_debug (0, " ignoring request\n");
412 else if (reg
== acc_toggle_per_thread
)
414 if (ev
== acc_ev_none
&& cb
== NULL
)
416 gomp_debug (0, " thread: disabling callbacks\n");
417 goacc_lazy_initialize ();
418 struct goacc_thread
*thr
= goacc_thread ();
419 thr
->prof_callbacks_enabled
= false;
422 /* Silently ignore. */
423 gomp_debug (0, " ignoring bogus request\n");
427 gomp_mutex_lock (&goacc_prof_lock
);
429 struct goacc_prof_callback_entry
*it
, *it_p
;
430 it
= goacc_prof_callback_entries
[ev
];
445 /* Silently ignore. */
446 gomp_debug (0, " ignoring bogus request: is not registered\n");
450 gomp_debug (0, " decrementing reference count to: %d\n", it
->ref
);
454 goacc_prof_callback_entries
[ev
] = it
->next
;
456 it_p
->next
= it
->next
;
464 gomp_debug (0, " ignoring request: is not registered\n");
469 gomp_debug (0, " disabling\n");
474 case acc_toggle_per_thread
:
475 __builtin_unreachable ();
478 gomp_mutex_unlock (&goacc_prof_lock
);
482 acc_prof_lookup (const char *name
)
484 gomp_debug (0, "%s (%s)\n",
485 __FUNCTION__
, name
?: "NULL");
491 acc_register_library (acc_prof_reg reg
, acc_prof_reg unreg
,
492 acc_prof_lookup_func lookup
)
497 /* Prepare to dispatch events? */
500 _goacc_profiling_dispatch_p (bool check_not_nested_p
)
502 gomp_debug (0, "%s\n", __FUNCTION__
);
506 struct goacc_thread
*thr
= goacc_thread ();
507 if (__builtin_expect (thr
== NULL
, false))
509 /* If we don't have any per-thread state yet, that means that per-thread
510 callback dispatch has not been explicitly disabled (which only a call
511 to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
512 that would have allocated per-thread state via
513 'goacc_lazy_initialize'); initially, all callbacks for all events are
515 gomp_debug (0, " %s: don't have any per-thread state yet\n", __FUNCTION__
);
519 if (check_not_nested_p
)
522 assert (thr
->prof_info
== NULL
);
523 assert (thr
->api_info
== NULL
);
526 if (__builtin_expect (!thr
->prof_callbacks_enabled
, true))
528 gomp_debug (0, " %s: disabled for this thread\n", __FUNCTION__
);
534 gomp_mutex_lock (&goacc_prof_lock
);
536 /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */
537 if (__builtin_expect (!goacc_prof_callbacks_enabled
[acc_ev_none
], true))
539 gomp_debug (0, " %s: disabled globally\n", __FUNCTION__
);
547 gomp_mutex_unlock (&goacc_prof_lock
);
553 /* Set up to dispatch events? */
556 _goacc_profiling_setup_p (struct goacc_thread
*thr
,
557 acc_prof_info
*prof_info
, acc_api_info
*api_info
)
559 gomp_debug (0, "%s (%p)\n", __FUNCTION__
, thr
);
561 /* If we don't have any per-thread state yet, we can't register 'prof_info'
563 if (__builtin_expect (thr
== NULL
, false))
565 gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
566 " the current call, construct, or directive\n");
570 if (thr
->prof_info
!= NULL
)
572 /* Profiling has already been set up for an outer construct. In this
573 case, we continue to use the existing information, and thus return
576 This can happen, for example, for an 'enter data' directive, which
577 sets up profiling, then calls into 'acc_copyin', which should not
578 again set up profiling, should not overwrite the existing
583 thr
->prof_info
= prof_info
;
584 thr
->api_info
= api_info
;
586 /* Fill in some defaults. */
588 prof_info
->event_type
= -1; /* Must be set later. */
589 prof_info
->valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
590 prof_info
->version
= _ACC_PROF_INFO_VERSION
;
593 prof_info
->device_type
= acc_device_type (thr
->dev
->type
);
594 prof_info
->device_number
= thr
->dev
->target_id
;
598 prof_info
->device_type
= -1;
599 prof_info
->device_number
= -1;
601 prof_info
->thread_id
= -1;
602 prof_info
->async
= acc_async_sync
;
603 prof_info
->async_queue
= prof_info
->async
;
604 prof_info
->src_file
= NULL
;
605 prof_info
->func_name
= NULL
;
606 prof_info
->line_no
= -1;
607 prof_info
->end_line_no
= -1;
608 prof_info
->func_line_no
= -1;
609 prof_info
->func_end_line_no
= -1;
611 api_info
->device_api
= acc_device_api_none
;
612 api_info
->valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
613 api_info
->device_type
= prof_info
->device_type
;
614 api_info
->vendor
= -1;
615 api_info
->device_handle
= NULL
;
616 api_info
->context_handle
= NULL
;
617 api_info
->async_handle
= NULL
;
624 This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
625 'GOACC_PROFILING_SETUP_P' returned a true result. */
628 goacc_profiling_dispatch (acc_prof_info
*prof_info
, acc_event_info
*event_info
,
629 acc_api_info
*apt_info
)
631 acc_event_t event_type
= event_info
->event_type
;
632 gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__
, (int) event_type
);
633 assert (event_type
> acc_ev_none
634 && event_type
< acc_ev_last
);
636 gomp_mutex_lock (&goacc_prof_lock
);
638 if (!goacc_prof_callbacks_enabled
[event_type
])
640 gomp_debug (0, " disabled for this event type\n");
645 for (struct goacc_prof_callback_entry
*e
646 = goacc_prof_callback_entries
[event_type
];
652 gomp_debug (0, " disabled for callback %p\n", e
->cb
);
656 gomp_debug (0, " calling callback %p\n", e
->cb
);
657 e
->cb (prof_info
, event_info
, apt_info
);
661 gomp_mutex_unlock (&goacc_prof_lock
);