Default to dwarf version 4 on hppa64-hpux
[official-gcc.git] / gcc / omp-offload.c
blobd881426ae65a6a9516aed3849c6e985ae4914cec
1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
4 Copyright (C) 2005-2021 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
41 #include "tree-cfg.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
51 #include "intl.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "cfgloop.h"
55 #include "context.h"
56 #include "convert.h"
58 /* Describe the OpenACC looping structure of a function. The entire
59 function is held in a 'NULL' loop. */
61 struct oacc_loop
63 oacc_loop *parent; /* Containing loop. */
65 oacc_loop *child; /* First inner loop. */
67 oacc_loop *sibling; /* Next loop within same parent. */
69 location_t loc; /* Location of the loop start. */
71 gcall *marker; /* Initial head marker. */
73 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
74 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
76 tree routine; /* Pseudo-loop enclosing a routine. */
78 unsigned mask; /* Partitioning mask. */
79 unsigned e_mask; /* Partitioning of element loops (when tiling). */
80 unsigned inner; /* Partitioning of inner loops. */
81 unsigned flags; /* Partitioning flags. */
82 vec<gcall *> ifns; /* Contained loop abstraction functions. */
83 tree chunk_size; /* Chunk size. */
84 gcall *head_end; /* Final marker of head sequence. */
87 /* Holds offload tables with decls. */
88 vec<tree, va_gc> *offload_funcs, *offload_vars;
90 /* Return level at which oacc routine may spawn a partitioned loop, or
91 -1 if it is not a routine (i.e. is an offload fn). */
93 int
94 oacc_fn_attrib_level (tree attr)
96 tree pos = TREE_VALUE (attr);
98 if (!TREE_PURPOSE (pos))
99 return -1;
101 int ix = 0;
102 for (ix = 0; ix != GOMP_DIM_MAX;
103 ix++, pos = TREE_CHAIN (pos))
104 if (!integer_zerop (TREE_PURPOSE (pos)))
105 break;
107 return ix;
110 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
111 adds their addresses and sizes to constructor-vector V_CTOR. */
113 static void
114 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
115 vec<constructor_elt, va_gc> *v_ctor)
117 unsigned len = vec_safe_length (v_decls);
118 for (unsigned i = 0; i < len; i++)
120 tree it = (*v_decls)[i];
121 bool is_var = VAR_P (it);
122 bool is_link_var
123 = is_var
124 #ifdef ACCEL_COMPILER
125 && DECL_HAS_VALUE_EXPR_P (it)
126 #endif
127 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
129 /* See also omp_finish_file and output_offload_tables in lto-cgraph.c. */
130 if (!in_lto_p && !symtab_node::get (it))
131 continue;
133 tree size = NULL_TREE;
134 if (is_var)
135 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
137 tree addr;
138 if (!is_link_var)
139 addr = build_fold_addr_expr (it);
140 else
142 #ifdef ACCEL_COMPILER
143 /* For "omp declare target link" vars add address of the pointer to
144 the target table, instead of address of the var. */
145 tree value_expr = DECL_VALUE_EXPR (it);
146 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
147 varpool_node::finalize_decl (link_ptr_decl);
148 addr = build_fold_addr_expr (link_ptr_decl);
149 #else
150 addr = build_fold_addr_expr (it);
151 #endif
153 /* Most significant bit of the size marks "omp declare target link"
154 vars in host and target tables. */
155 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
156 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
157 * BITS_PER_UNIT - 1);
158 size = wide_int_to_tree (const_ptr_type_node, isize);
161 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
162 if (is_var)
163 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
167 /* Return true if DECL is a function for which its references should be
168 analyzed. */
170 static bool
171 omp_declare_target_fn_p (tree decl)
173 return (TREE_CODE (decl) == FUNCTION_DECL
174 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
175 && !lookup_attribute ("omp declare target host",
176 DECL_ATTRIBUTES (decl))
177 && (!flag_openacc
178 || oacc_get_fn_attrib (decl) == NULL_TREE));
181 /* Return true if DECL Is a variable for which its initializer references
182 should be analyzed. */
184 static bool
185 omp_declare_target_var_p (tree decl)
187 return (VAR_P (decl)
188 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
189 && !lookup_attribute ("omp declare target link",
190 DECL_ATTRIBUTES (decl)));
193 /* Helper function for omp_discover_implicit_declare_target, called through
194 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
195 declare target to. */
197 static tree
198 omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data)
200 if (TREE_CODE (*tp) == CALL_EXPR
201 && CALL_EXPR_FN (*tp)
202 && TREE_CODE (CALL_EXPR_FN (*tp)) == ADDR_EXPR
203 && TREE_CODE (TREE_OPERAND (CALL_EXPR_FN (*tp), 0)) == FUNCTION_DECL
204 && lookup_attribute ("omp declare variant base",
205 DECL_ATTRIBUTES (TREE_OPERAND (CALL_EXPR_FN (*tp),
206 0))))
208 tree fn = TREE_OPERAND (CALL_EXPR_FN (*tp), 0);
209 for (tree attr = DECL_ATTRIBUTES (fn); attr; attr = TREE_CHAIN (attr))
211 attr = lookup_attribute ("omp declare variant base", attr);
212 if (attr == NULL_TREE)
213 break;
214 tree purpose = TREE_PURPOSE (TREE_VALUE (attr));
215 if (TREE_CODE (purpose) == FUNCTION_DECL)
216 omp_discover_declare_target_tgt_fn_r (&purpose, walk_subtrees, data);
219 else if (TREE_CODE (*tp) == FUNCTION_DECL)
221 tree decl = *tp;
222 tree id = get_identifier ("omp declare target");
223 symtab_node *node = symtab_node::get (*tp);
224 if (node != NULL)
226 while (node->alias_target
227 && TREE_CODE (node->alias_target) == FUNCTION_DECL)
229 if (!omp_declare_target_fn_p (node->decl)
230 && !lookup_attribute ("omp declare target host",
231 DECL_ATTRIBUTES (node->decl)))
233 node->offloadable = 1;
234 DECL_ATTRIBUTES (node->decl)
235 = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
237 node = symtab_node::get (node->alias_target);
239 symtab_node *new_node = node->ultimate_alias_target ();
240 decl = new_node->decl;
241 while (node != new_node)
243 if (!omp_declare_target_fn_p (node->decl)
244 && !lookup_attribute ("omp declare target host",
245 DECL_ATTRIBUTES (node->decl)))
247 node->offloadable = 1;
248 DECL_ATTRIBUTES (node->decl)
249 = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
251 gcc_assert (node->alias && node->analyzed);
252 node = node->get_alias_target ();
254 node->offloadable = 1;
255 if (ENABLE_OFFLOADING)
256 g->have_offload = true;
258 if (omp_declare_target_fn_p (decl)
259 || lookup_attribute ("omp declare target host",
260 DECL_ATTRIBUTES (decl)))
261 return NULL_TREE;
263 if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl))
264 ((vec<tree> *) data)->safe_push (decl);
265 DECL_ATTRIBUTES (decl) = tree_cons (id, NULL_TREE,
266 DECL_ATTRIBUTES (decl));
268 else if (TYPE_P (*tp))
269 *walk_subtrees = 0;
270 /* else if (TREE_CODE (*tp) == OMP_TARGET)
272 if (tree dev = omp_find_clause (OMP_TARGET_CLAUSES (*tp)))
273 if (OMP_DEVICE_ANCESTOR (dev))
274 *walk_subtrees = 0;
275 } */
276 return NULL_TREE;
279 /* Similarly, but ignore references outside of OMP_TARGET regions. */
281 static tree
282 omp_discover_declare_target_fn_r (tree *tp, int *walk_subtrees, void *data)
284 if (TREE_CODE (*tp) == OMP_TARGET)
286 /* And not OMP_DEVICE_ANCESTOR. */
287 walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
288 omp_discover_declare_target_tgt_fn_r,
289 data);
290 *walk_subtrees = 0;
292 else if (TYPE_P (*tp))
293 *walk_subtrees = 0;
294 return NULL_TREE;
297 /* Helper function for omp_discover_implicit_declare_target, called through
298 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
299 declare target to. */
301 static tree
302 omp_discover_declare_target_var_r (tree *tp, int *walk_subtrees, void *data)
304 if (TREE_CODE (*tp) == FUNCTION_DECL)
305 return omp_discover_declare_target_tgt_fn_r (tp, walk_subtrees, data);
306 else if (VAR_P (*tp)
307 && is_global_var (*tp)
308 && !omp_declare_target_var_p (*tp))
310 tree id = get_identifier ("omp declare target");
311 if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp)))
313 error_at (DECL_SOURCE_LOCATION (*tp),
314 "%qD specified both in declare target %<link%> and "
315 "implicitly in %<to%> clauses", *tp);
316 DECL_ATTRIBUTES (*tp)
317 = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp));
319 if (TREE_STATIC (*tp) && lang_hooks.decls.omp_get_decl_init (*tp))
320 ((vec<tree> *) data)->safe_push (*tp);
321 DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
322 symtab_node *node = symtab_node::get (*tp);
323 if (node != NULL && !node->offloadable)
325 node->offloadable = 1;
326 if (ENABLE_OFFLOADING)
328 g->have_offload = true;
329 if (is_a <varpool_node *> (node))
330 vec_safe_push (offload_vars, node->decl);
334 else if (TYPE_P (*tp))
335 *walk_subtrees = 0;
336 return NULL_TREE;
339 /* Perform the OpenMP implicit declare target to discovery. */
341 void
342 omp_discover_implicit_declare_target (void)
344 cgraph_node *node;
345 varpool_node *vnode;
346 auto_vec<tree> worklist;
348 FOR_EACH_DEFINED_FUNCTION (node)
349 if (DECL_SAVED_TREE (node->decl))
351 struct cgraph_node *cgn;
352 if (omp_declare_target_fn_p (node->decl))
353 worklist.safe_push (node->decl);
354 else if (DECL_STRUCT_FUNCTION (node->decl)
355 && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
356 worklist.safe_push (node->decl);
357 for (cgn = first_nested_function (node);
358 cgn; cgn = next_nested_function (cgn))
359 if (omp_declare_target_fn_p (cgn->decl))
360 worklist.safe_push (cgn->decl);
361 else if (DECL_STRUCT_FUNCTION (cgn->decl)
362 && DECL_STRUCT_FUNCTION (cgn->decl)->has_omp_target)
363 worklist.safe_push (cgn->decl);
365 FOR_EACH_VARIABLE (vnode)
366 if (lang_hooks.decls.omp_get_decl_init (vnode->decl)
367 && omp_declare_target_var_p (vnode->decl))
368 worklist.safe_push (vnode->decl);
369 while (!worklist.is_empty ())
371 tree decl = worklist.pop ();
372 if (VAR_P (decl))
373 walk_tree_without_duplicates (lang_hooks.decls.omp_get_decl_init (decl),
374 omp_discover_declare_target_var_r,
375 &worklist);
376 else if (omp_declare_target_fn_p (decl))
377 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
378 omp_discover_declare_target_tgt_fn_r,
379 &worklist);
380 else
381 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
382 omp_discover_declare_target_fn_r,
383 &worklist);
386 lang_hooks.decls.omp_finish_decl_inits ();
390 /* Create new symbols containing (address, size) pairs for global variables,
391 marked with "omp declare target" attribute, as well as addresses for the
392 functions, which are outlined offloading regions. */
393 void
394 omp_finish_file (void)
396 unsigned num_funcs = vec_safe_length (offload_funcs);
397 unsigned num_vars = vec_safe_length (offload_vars);
399 if (num_funcs == 0 && num_vars == 0)
400 return;
402 if (targetm_common.have_named_sections)
404 vec<constructor_elt, va_gc> *v_f, *v_v;
405 vec_alloc (v_f, num_funcs);
406 vec_alloc (v_v, num_vars * 2);
408 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
409 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
411 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
412 vec_safe_length (v_v));
413 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
414 num_funcs);
415 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
416 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
417 tree ctor_v = build_constructor (vars_decl_type, v_v);
418 tree ctor_f = build_constructor (funcs_decl_type, v_f);
419 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
420 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
421 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
422 get_identifier (".offload_func_table"),
423 funcs_decl_type);
424 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
425 get_identifier (".offload_var_table"),
426 vars_decl_type);
427 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
428 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
429 otherwise a joint table in a binary will contain padding between
430 tables from multiple object files. */
431 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
432 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
433 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
434 DECL_INITIAL (funcs_decl) = ctor_f;
435 DECL_INITIAL (vars_decl) = ctor_v;
436 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
437 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
439 varpool_node::finalize_decl (vars_decl);
440 varpool_node::finalize_decl (funcs_decl);
442 else
444 for (unsigned i = 0; i < num_funcs; i++)
446 tree it = (*offload_funcs)[i];
447 /* See also add_decls_addresses_to_decl_constructor
448 and output_offload_tables in lto-cgraph.c. */
449 if (!in_lto_p && !symtab_node::get (it))
450 continue;
451 targetm.record_offload_symbol (it);
453 for (unsigned i = 0; i < num_vars; i++)
455 tree it = (*offload_vars)[i];
456 if (!in_lto_p && !symtab_node::get (it))
457 continue;
458 #ifdef ACCEL_COMPILER
459 if (DECL_HAS_VALUE_EXPR_P (it)
460 && lookup_attribute ("omp declare target link",
461 DECL_ATTRIBUTES (it)))
463 tree value_expr = DECL_VALUE_EXPR (it);
464 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
465 targetm.record_offload_symbol (link_ptr_decl);
466 varpool_node::finalize_decl (link_ptr_decl);
468 else
469 #endif
470 targetm.record_offload_symbol (it);
475 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
476 axis DIM. Return a tmp var holding the result. */
478 static tree
479 oacc_dim_call (bool pos, int dim, gimple_seq *seq)
481 tree arg = build_int_cst (unsigned_type_node, dim);
482 tree size = create_tmp_var (integer_type_node);
483 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
484 gimple *call = gimple_build_call_internal (fn, 1, arg);
486 gimple_call_set_lhs (call, size);
487 gimple_seq_add_stmt (seq, call);
489 return size;
492 /* Find the number of threads (POS = false), or thread number (POS =
493 true) for an OpenACC region partitioned as MASK. Setup code
494 required for the calculation is added to SEQ. */
496 static tree
497 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
499 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
500 unsigned ix;
502 /* Start at gang level, and examine relevant dimension indices. */
503 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
504 if (GOMP_DIM_MASK (ix) & mask)
506 if (res)
508 /* We had an outer index, so scale that by the size of
509 this dimension. */
510 tree n = oacc_dim_call (false, ix, seq);
511 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
513 if (pos)
515 /* Determine index in this dimension. */
516 tree id = oacc_dim_call (true, ix, seq);
517 if (res)
518 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
519 else
520 res = id;
524 if (res == NULL_TREE)
525 res = integer_zero_node;
527 return res;
530 /* Transform IFN_GOACC_LOOP calls to actual code. See
531 expand_oacc_for for where these are generated. At the vector
532 level, we stride loops, such that each member of a warp will
533 operate on adjacent iterations. At the worker and gang level,
534 each gang/warp executes a set of contiguous iterations. Chunking
535 can override this such that each iteration engine executes a
536 contiguous chunk, and then moves on to stride to the next chunk. */
538 static void
539 oacc_xform_loop (gcall *call)
541 gimple_stmt_iterator gsi = gsi_for_stmt (call);
542 enum ifn_goacc_loop_kind code
543 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
544 tree dir = gimple_call_arg (call, 1);
545 tree range = gimple_call_arg (call, 2);
546 tree step = gimple_call_arg (call, 3);
547 tree chunk_size = NULL_TREE;
548 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
549 tree lhs = gimple_call_lhs (call);
550 tree type = NULL_TREE;
551 tree diff_type = TREE_TYPE (range);
552 tree r = NULL_TREE;
553 gimple_seq seq = NULL;
554 bool chunking = false, striding = true;
555 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
556 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
558 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
559 if (!lhs)
561 gsi_replace_with_seq (&gsi, seq, true);
562 return;
565 type = TREE_TYPE (lhs);
567 #ifdef ACCEL_COMPILER
568 chunk_size = gimple_call_arg (call, 4);
569 if (integer_minus_onep (chunk_size) /* Force static allocation. */
570 || integer_zerop (chunk_size)) /* Default (also static). */
572 /* If we're at the gang level, we want each to execute a
573 contiguous run of iterations. Otherwise we want each element
574 to stride. */
575 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
576 chunking = false;
578 else
580 /* Chunk of size 1 is striding. */
581 striding = integer_onep (chunk_size);
582 chunking = !striding;
584 #endif
586 /* striding=true, chunking=true
587 -> invalid.
588 striding=true, chunking=false
589 -> chunks=1
590 striding=false,chunking=true
591 -> chunks=ceil (range/(chunksize*threads*step))
592 striding=false,chunking=false
593 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
594 push_gimplify_context (true);
596 switch (code)
598 default: gcc_unreachable ();
600 case IFN_GOACC_LOOP_CHUNKS:
601 if (!chunking)
602 r = build_int_cst (type, 1);
603 else
605 /* chunk_max
606 = (range - dir) / (chunks * step * num_threads) + dir */
607 tree per = oacc_thread_numbers (false, mask, &seq);
608 per = fold_convert (type, per);
609 chunk_size = fold_convert (type, chunk_size);
610 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
611 per = fold_build2 (MULT_EXPR, type, per, step);
612 r = build2 (MINUS_EXPR, type, range, dir);
613 r = build2 (PLUS_EXPR, type, r, per);
614 r = build2 (TRUNC_DIV_EXPR, type, r, per);
616 break;
618 case IFN_GOACC_LOOP_STEP:
620 /* If striding, step by the entire compute volume, otherwise
621 step by the inner volume. */
622 unsigned volume = striding ? mask : inner_mask;
624 r = oacc_thread_numbers (false, volume, &seq);
625 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
627 break;
629 case IFN_GOACC_LOOP_OFFSET:
630 /* Enable vectorization on non-SIMT targets. */
631 if (!targetm.simt.vf
632 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
633 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
634 the loop. */
635 && (flag_tree_loop_vectorize
636 || !global_options_set.x_flag_tree_loop_vectorize))
638 basic_block bb = gsi_bb (gsi);
639 class loop *parent = bb->loop_father;
640 class loop *body = parent->inner;
642 parent->force_vectorize = true;
643 parent->safelen = INT_MAX;
645 /* "Chunking loops" may have inner loops. */
646 if (parent->inner)
648 body->force_vectorize = true;
649 body->safelen = INT_MAX;
652 cfun->has_force_vectorize_loops = true;
654 if (striding)
656 r = oacc_thread_numbers (true, mask, &seq);
657 r = fold_convert (diff_type, r);
659 else
661 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
662 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
663 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
664 inner_size, outer_size);
666 volume = fold_convert (diff_type, volume);
667 if (chunking)
668 chunk_size = fold_convert (diff_type, chunk_size);
669 else
671 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
673 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
674 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
675 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
678 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
679 fold_convert (diff_type, inner_size));
680 r = oacc_thread_numbers (true, outer_mask, &seq);
681 r = fold_convert (diff_type, r);
682 r = build2 (MULT_EXPR, diff_type, r, span);
684 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
685 inner = fold_convert (diff_type, inner);
686 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
688 if (chunking)
690 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
691 tree per
692 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
693 per = build2 (MULT_EXPR, diff_type, per, chunk);
695 r = build2 (PLUS_EXPR, diff_type, r, per);
698 r = fold_build2 (MULT_EXPR, diff_type, r, step);
699 if (type != diff_type)
700 r = fold_convert (type, r);
701 break;
703 case IFN_GOACC_LOOP_BOUND:
704 if (striding)
705 r = range;
706 else
708 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
709 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
710 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
711 inner_size, outer_size);
713 volume = fold_convert (diff_type, volume);
714 if (chunking)
715 chunk_size = fold_convert (diff_type, chunk_size);
716 else
718 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
720 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
721 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
722 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
725 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
726 fold_convert (diff_type, inner_size));
728 r = fold_build2 (MULT_EXPR, diff_type, span, step);
730 tree offset = gimple_call_arg (call, 6);
731 r = build2 (PLUS_EXPR, diff_type, r,
732 fold_convert (diff_type, offset));
733 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
734 diff_type, r, range);
736 if (diff_type != type)
737 r = fold_convert (type, r);
738 break;
741 gimplify_assign (lhs, r, &seq);
743 pop_gimplify_context (NULL);
745 gsi_replace_with_seq (&gsi, seq, true);
748 /* Transform a GOACC_TILE call. Determines the element loop span for
749 the specified loop of the nest. This is 1 if we're not tiling.
751 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
753 static void
754 oacc_xform_tile (gcall *call)
756 gimple_stmt_iterator gsi = gsi_for_stmt (call);
757 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
758 /* Inner loops have higher loop_nos. */
759 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
760 tree tile_size = gimple_call_arg (call, 2);
761 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
762 tree lhs = gimple_call_lhs (call);
763 tree type = TREE_TYPE (lhs);
764 gimple_seq seq = NULL;
765 tree span = build_int_cst (type, 1);
767 gcc_assert (!(e_mask
768 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
769 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
770 push_gimplify_context (!seen_error ());
772 #ifndef ACCEL_COMPILER
773 /* Partitioning disabled on host compilers. */
774 e_mask = 0;
775 #endif
776 if (!e_mask)
777 /* Not paritioning. */
778 span = integer_one_node;
779 else if (!integer_zerop (tile_size))
780 /* User explicitly specified size. */
781 span = tile_size;
782 else
784 /* Pick a size based on the paritioning of the element loop and
785 the number of loop nests. */
786 tree first_size = NULL_TREE;
787 tree second_size = NULL_TREE;
789 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
790 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
791 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
792 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
794 if (!first_size)
796 first_size = second_size;
797 second_size = NULL_TREE;
800 if (loop_no + 1 == collapse)
802 span = first_size;
803 if (!loop_no && second_size)
804 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
805 span, second_size);
807 else if (loop_no + 2 == collapse)
808 span = second_size;
809 else
810 span = NULL_TREE;
812 if (!span)
813 /* There's no obvious element size for this loop. Options
814 are 1, first_size or some non-unity constant (32 is my
815 favourite). We should gather some statistics. */
816 span = first_size;
819 span = fold_convert (type, span);
820 gimplify_assign (lhs, span, &seq);
822 pop_gimplify_context (NULL);
824 gsi_replace_with_seq (&gsi, seq, true);
827 /* Default partitioned and minimum partitioned dimensions. */
829 static int oacc_default_dims[GOMP_DIM_MAX];
830 static int oacc_min_dims[GOMP_DIM_MAX];
833 oacc_get_default_dim (int dim)
835 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
836 return oacc_default_dims[dim];
840 oacc_get_min_dim (int dim)
842 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
843 return oacc_min_dims[dim];
846 /* Parse the default dimension parameter. This is a set of
847 :-separated optional compute dimensions. Each specified dimension
848 is a positive integer. When device type support is added, it is
849 planned to be a comma separated list of such compute dimensions,
850 with all but the first prefixed by the colon-terminated device
851 type. */
853 static void
854 oacc_parse_default_dims (const char *dims)
856 int ix;
858 for (ix = GOMP_DIM_MAX; ix--;)
860 oacc_default_dims[ix] = -1;
861 oacc_min_dims[ix] = 1;
864 #ifndef ACCEL_COMPILER
865 /* Cannot be overridden on the host. */
866 dims = NULL;
867 #endif
868 if (dims)
870 const char *pos = dims;
872 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
874 if (ix)
876 if (*pos != ':')
877 goto malformed;
878 pos++;
881 if (*pos != ':')
883 long val;
884 const char *eptr;
886 errno = 0;
887 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
888 if (errno || val <= 0 || (int) val != val)
889 goto malformed;
890 pos = eptr;
891 oacc_default_dims[ix] = (int) val;
894 if (*pos)
896 malformed:
897 error_at (UNKNOWN_LOCATION,
898 "%<-fopenacc-dim%> operand is malformed at %qs", pos);
902 /* Allow the backend to validate the dimensions. */
903 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
904 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
907 /* Validate and update the dimensions for offloaded FN. ATTRS is the
908 raw attribute. DIMS is an array of dimensions, which is filled in.
909 LEVEL is the partitioning level of a routine, or -1 for an offload
910 region itself. USED is the mask of partitioned execution in the
911 function. */
913 static void
914 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
916 tree purpose[GOMP_DIM_MAX];
917 unsigned ix;
918 tree pos = TREE_VALUE (attrs);
920 /* Make sure the attribute creator attached the dimension
921 information. */
922 gcc_assert (pos);
924 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
926 purpose[ix] = TREE_PURPOSE (pos);
927 tree val = TREE_VALUE (pos);
928 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
929 pos = TREE_CHAIN (pos);
932 bool check = true;
933 #ifdef ACCEL_COMPILER
934 check = false;
935 #endif
936 if (check
937 && warn_openacc_parallelism
938 && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)))
940 static char const *const axes[] =
941 /* Must be kept in sync with GOMP_DIM enumeration. */
942 { "gang", "worker", "vector" };
943 for (ix = level >= 0 ? level : 0; ix != GOMP_DIM_MAX; ix++)
944 if (dims[ix] < 0)
945 ; /* Defaulting axis. */
946 else if ((used & GOMP_DIM_MASK (ix)) && dims[ix] == 1)
947 /* There is partitioned execution, but the user requested a
948 dimension size of 1. They're probably confused. */
949 warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
950 "region contains %s partitioned code but"
951 " is not %s partitioned", axes[ix], axes[ix]);
952 else if (!(used & GOMP_DIM_MASK (ix)) && dims[ix] != 1)
953 /* The dimension is explicitly partitioned to non-unity, but
954 no use is made within the region. */
955 warning_at (DECL_SOURCE_LOCATION (fn), OPT_Wopenacc_parallelism,
956 "region is %s partitioned but"
957 " does not contain %s partitioned code",
958 axes[ix], axes[ix]);
961 bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
963 /* Default anything left to 1 or a partitioned default. */
964 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
965 if (dims[ix] < 0)
967 /* The OpenACC spec says 'If the [num_gangs] clause is not
968 specified, an implementation-defined default will be used;
969 the default may depend on the code within the construct.'
970 (2.5.6). Thus an implementation is free to choose
971 non-unity default for a parallel region that doesn't have
972 any gang-partitioned loops. However, it appears that there
973 is a sufficient body of user code that expects non-gang
974 partitioned regions to not execute in gang-redundant mode.
975 So we (a) don't warn about the non-portability and (b) pick
976 the minimum permissible dimension size when there is no
977 partitioned execution. Otherwise we pick the global
978 default for the dimension, which the user can control. The
979 same wording and logic applies to num_workers and
980 vector_length, however the worker- or vector- single
981 execution doesn't have the same impact as gang-redundant
982 execution. (If the minimum gang-level partioning is not 1,
983 the target is probably too confusing.) */
984 dims[ix] = (used & GOMP_DIM_MASK (ix)
985 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
986 changed = true;
989 if (changed)
991 /* Replace the attribute with new values. */
992 pos = NULL_TREE;
993 for (ix = GOMP_DIM_MAX; ix--;)
994 pos = tree_cons (purpose[ix],
995 build_int_cst (integer_type_node, dims[ix]), pos);
996 oacc_replace_fn_attrib (fn, pos);
1000 /* Create an empty OpenACC loop structure at LOC. */
1002 static oacc_loop *
1003 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
1005 oacc_loop *loop = XCNEW (oacc_loop);
1007 loop->parent = parent;
1009 if (parent)
1011 loop->sibling = parent->child;
1012 parent->child = loop;
1015 loop->loc = loc;
1016 return loop;
1019 /* Create an outermost, dummy OpenACC loop for offloaded function
1020 DECL. */
1022 static oacc_loop *
1023 new_oacc_loop_outer (tree decl)
1025 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
1028 /* Start a new OpenACC loop structure beginning at head marker HEAD.
1029 Link into PARENT loop. Return the new loop. */
1031 static oacc_loop *
1032 new_oacc_loop (oacc_loop *parent, gcall *marker)
1034 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
1036 loop->marker = marker;
1038 /* TODO: This is where device_type flattening would occur for the loop
1039 flags. */
1041 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
1043 tree chunk_size = integer_zero_node;
1044 if (loop->flags & OLF_GANG_STATIC)
1045 chunk_size = gimple_call_arg (marker, 4);
1046 loop->chunk_size = chunk_size;
1048 return loop;
1051 /* Create a dummy loop encompassing a call to a openACC routine.
1052 Extract the routine's partitioning requirements. */
1054 static void
1055 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
1057 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
1058 int level = oacc_fn_attrib_level (attrs);
1060 gcc_assert (level >= 0);
1062 loop->marker = call;
1063 loop->routine = decl;
1064 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
1065 ^ (GOMP_DIM_MASK (level) - 1));
1068 /* Finish off the current OpenACC loop ending at tail marker TAIL.
1069 Return the parent loop. */
1071 static oacc_loop *
1072 finish_oacc_loop (oacc_loop *loop)
1074 /* If the loop has been collapsed, don't partition it. */
1075 if (loop->ifns.is_empty ())
1076 loop->mask = loop->flags = 0;
1077 return loop->parent;
1080 /* Free all OpenACC loop structures within LOOP (inclusive). */
1082 static void
1083 free_oacc_loop (oacc_loop *loop)
1085 if (loop->sibling)
1086 free_oacc_loop (loop->sibling);
1087 if (loop->child)
1088 free_oacc_loop (loop->child);
1090 loop->ifns.release ();
1091 free (loop);
1094 /* Dump out the OpenACC loop head or tail beginning at FROM. */
1096 static void
1097 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
1098 const char *title, int level)
1100 enum ifn_unique_kind kind
1101 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1103 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
1104 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1106 gimple *stmt = gsi_stmt (gsi);
1108 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1110 enum ifn_unique_kind k
1111 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
1112 (gimple_call_arg (stmt, 0)));
1114 if (k == kind && stmt != from)
1115 break;
1117 print_gimple_stmt (file, stmt, depth * 2 + 2);
1119 gsi_next (&gsi);
1120 while (gsi_end_p (gsi))
1121 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1125 /* Dump OpenACC loop LOOP, its children, and its siblings. */
1127 static void
1128 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
1130 int ix;
1132 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
1133 loop->flags, loop->mask,
1134 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
1136 if (loop->marker)
1137 print_gimple_stmt (file, loop->marker, depth * 2);
1139 if (loop->routine)
1140 fprintf (file, "%*sRoutine %s:%u:%s\n",
1141 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
1142 DECL_SOURCE_LINE (loop->routine),
1143 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
1145 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
1146 if (loop->heads[ix])
1147 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
1148 for (ix = GOMP_DIM_MAX; ix--;)
1149 if (loop->tails[ix])
1150 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
1152 if (loop->child)
1153 dump_oacc_loop (file, loop->child, depth + 1);
1154 if (loop->sibling)
1155 dump_oacc_loop (file, loop->sibling, depth);
1158 void debug_oacc_loop (oacc_loop *);
1160 /* Dump loops to stderr. */
1162 DEBUG_FUNCTION void
1163 debug_oacc_loop (oacc_loop *loop)
1165 dump_oacc_loop (stderr, loop, 0);
1168 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
1169 siblings. */
1171 static void
1172 inform_oacc_loop (const oacc_loop *loop)
1174 const char *gang
1175 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
1176 const char *worker
1177 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
1178 const char *vector
1179 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
1180 const char *seq = loop->mask == 0 ? " seq" : "";
1181 const dump_user_location_t loc
1182 = dump_user_location_t::from_location_t (loop->loc);
1183 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1184 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
1185 vector, seq);
1187 if (loop->child)
1188 inform_oacc_loop (loop->child);
1189 if (loop->sibling)
1190 inform_oacc_loop (loop->sibling);
1193 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
1194 structures as we go. By construction these loops are properly
1195 nested. */
1197 static void
1198 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
1200 int marker = 0;
1201 int remaining = 0;
1203 if (bb->flags & BB_VISITED)
1204 return;
1206 follow:
1207 bb->flags |= BB_VISITED;
1209 /* Scan for loop markers. */
1210 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1211 gsi_next (&gsi))
1213 gimple *stmt = gsi_stmt (gsi);
1215 if (!is_gimple_call (stmt))
1216 continue;
1218 gcall *call = as_a <gcall *> (stmt);
1220 /* If this is a routine, make a dummy loop for it. */
1221 if (tree decl = gimple_call_fndecl (call))
1222 if (tree attrs = oacc_get_fn_attrib (decl))
1224 gcc_assert (!marker);
1225 new_oacc_loop_routine (loop, call, decl, attrs);
1228 if (!gimple_call_internal_p (call))
1229 continue;
1231 switch (gimple_call_internal_fn (call))
1233 default:
1234 break;
1236 case IFN_GOACC_LOOP:
1237 case IFN_GOACC_TILE:
1238 /* Record the abstraction function, so we can manipulate it
1239 later. */
1240 loop->ifns.safe_push (call);
1241 break;
1243 case IFN_UNIQUE:
1244 enum ifn_unique_kind kind
1245 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
1246 (gimple_call_arg (call, 0)));
1247 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
1248 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
1250 if (gimple_call_num_args (call) == 2)
1252 gcc_assert (marker && !remaining);
1253 marker = 0;
1254 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
1255 loop = finish_oacc_loop (loop);
1256 else
1257 loop->head_end = call;
1259 else
1261 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1263 if (!marker)
1265 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1266 loop = new_oacc_loop (loop, call);
1267 remaining = count;
1269 gcc_assert (count == remaining);
1270 if (remaining)
1272 remaining--;
1273 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1274 loop->heads[marker] = call;
1275 else
1276 loop->tails[remaining] = call;
1278 marker++;
1283 if (remaining || marker)
1285 bb = single_succ (bb);
1286 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
1287 goto follow;
1290 /* Walk successor blocks. */
1291 edge e;
1292 edge_iterator ei;
1294 FOR_EACH_EDGE (e, ei, bb->succs)
1295 oacc_loop_discover_walk (loop, e->dest);
1298 /* LOOP is the first sibling. Reverse the order in place and return
1299 the new first sibling. Recurse to child loops. */
1301 static oacc_loop *
1302 oacc_loop_sibling_nreverse (oacc_loop *loop)
1304 oacc_loop *last = NULL;
1307 if (loop->child)
1308 loop->child = oacc_loop_sibling_nreverse (loop->child);
1310 oacc_loop *next = loop->sibling;
1311 loop->sibling = last;
1312 last = loop;
1313 loop = next;
1315 while (loop);
1317 return last;
1320 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1321 the current function. */
1323 static oacc_loop *
1324 oacc_loop_discovery ()
1326 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1327 in the following. */
1328 clear_bb_flags ();
1330 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1331 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1333 /* The siblings were constructed in reverse order, reverse them so
1334 that diagnostics come out in an unsurprising order. */
1335 top = oacc_loop_sibling_nreverse (top);
1337 return top;
1340 /* Transform the abstract internal function markers starting at FROM
1341 to be for partitioning level LEVEL. Stop when we meet another HEAD
1342 or TAIL marker. */
1344 static void
1345 oacc_loop_xform_head_tail (gcall *from, int level)
1347 enum ifn_unique_kind kind
1348 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1349 tree replacement = build_int_cst (unsigned_type_node, level);
1351 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1353 gimple *stmt = gsi_stmt (gsi);
1355 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1357 enum ifn_unique_kind k
1358 = ((enum ifn_unique_kind)
1359 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1361 if (k == IFN_UNIQUE_OACC_FORK
1362 || k == IFN_UNIQUE_OACC_JOIN
1363 || k == IFN_UNIQUE_OACC_PRIVATE)
1364 *gimple_call_arg_ptr (stmt, 2) = replacement;
1365 else if (k == kind && stmt != from)
1366 break;
1368 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1369 *gimple_call_arg_ptr (stmt, 3) = replacement;
1370 update_stmt (stmt);
1372 gsi_next (&gsi);
1373 while (gsi_end_p (gsi))
1374 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1378 /* Process the discovered OpenACC loops, setting the correct
1379 partitioning level etc. */
1381 static void
1382 oacc_loop_process (oacc_loop *loop)
1384 if (loop->child)
1385 oacc_loop_process (loop->child);
1387 if (loop->mask && !loop->routine)
1389 int ix;
1390 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1391 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
1392 tree chunk_arg = loop->chunk_size;
1393 gcall *call;
1395 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1397 switch (gimple_call_internal_fn (call))
1399 case IFN_GOACC_LOOP:
1401 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1402 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1403 if (!is_e)
1404 gimple_call_set_arg (call, 4, chunk_arg);
1406 break;
1408 case IFN_GOACC_TILE:
1409 gimple_call_set_arg (call, 3, mask_arg);
1410 gimple_call_set_arg (call, 4, e_mask_arg);
1411 break;
1413 default:
1414 gcc_unreachable ();
1416 update_stmt (call);
1419 unsigned dim = GOMP_DIM_GANG;
1420 unsigned mask = loop->mask | loop->e_mask;
1421 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1423 while (!(GOMP_DIM_MASK (dim) & mask))
1424 dim++;
1426 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1427 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1429 mask ^= GOMP_DIM_MASK (dim);
1433 if (loop->sibling)
1434 oacc_loop_process (loop->sibling);
1437 /* Walk the OpenACC loop heirarchy checking and assigning the
1438 programmer-specified partitionings. OUTER_MASK is the partitioning
1439 this loop is contained within. Return mask of partitioning
1440 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1441 bit. */
1443 static unsigned
1444 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1446 unsigned this_mask = loop->mask;
1447 unsigned mask_all = 0;
1448 bool noisy = true;
1450 #ifdef ACCEL_COMPILER
1451 /* When device_type is supported, we want the device compiler to be
1452 noisy, if the loop parameters are device_type-specific. */
1453 noisy = false;
1454 #endif
1456 if (!loop->routine)
1458 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1459 bool seq_par = (loop->flags & OLF_SEQ) != 0;
1460 bool tiling = (loop->flags & OLF_TILE) != 0;
1462 this_mask = ((loop->flags >> OLF_DIM_BASE)
1463 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1465 /* Apply auto partitioning if this is a non-partitioned regular
1466 loop, or (no more than) single axis tiled loop. */
1467 bool maybe_auto
1468 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1470 if ((this_mask != 0) + auto_par + seq_par > 1)
1472 if (noisy)
1473 error_at (loop->loc,
1474 seq_par
1475 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1476 : G_("%<auto%> conflicts with other OpenACC loop "
1477 "specifiers"));
1478 maybe_auto = false;
1479 loop->flags &= ~OLF_AUTO;
1480 if (seq_par)
1482 loop->flags
1483 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1484 this_mask = 0;
1488 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1490 loop->flags |= OLF_AUTO;
1491 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1495 if (this_mask & outer_mask)
1497 const oacc_loop *outer;
1498 for (outer = loop->parent; outer; outer = outer->parent)
1499 if ((outer->mask | outer->e_mask) & this_mask)
1500 break;
1502 if (noisy)
1504 if (outer)
1506 error_at (loop->loc,
1507 loop->routine
1508 ? G_("routine call uses same OpenACC parallelism"
1509 " as containing loop")
1510 : G_("inner loop uses same OpenACC parallelism"
1511 " as containing loop"));
1512 inform (outer->loc, "containing loop here");
1514 else
1515 error_at (loop->loc,
1516 loop->routine
1517 ? G_("routine call uses OpenACC parallelism disallowed"
1518 " by containing routine")
1519 : G_("loop uses OpenACC parallelism disallowed"
1520 " by containing routine"));
1522 if (loop->routine)
1523 inform (DECL_SOURCE_LOCATION (loop->routine),
1524 "routine %qD declared here", loop->routine);
1526 this_mask &= ~outer_mask;
1528 else
1530 unsigned outermost = least_bit_hwi (this_mask);
1532 if (outermost && outermost <= outer_mask)
1534 if (noisy)
1536 error_at (loop->loc,
1537 "incorrectly nested OpenACC loop parallelism");
1539 const oacc_loop *outer;
1540 for (outer = loop->parent;
1541 outer->flags && outer->flags < outermost;
1542 outer = outer->parent)
1543 continue;
1544 inform (outer->loc, "containing loop here");
1547 this_mask &= ~outermost;
1551 mask_all |= this_mask;
1553 if (loop->flags & OLF_TILE)
1555 /* When tiling, vector goes to the element loop, and failing
1556 that we put worker there. The std doesn't contemplate
1557 specifying all three. We choose to put worker and vector on
1558 the element loops in that case. */
1559 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1560 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1561 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1563 loop->e_mask = this_e_mask;
1564 this_mask ^= this_e_mask;
1567 loop->mask = this_mask;
1569 if (dump_file)
1570 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1571 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1572 loop->mask, loop->e_mask);
1574 if (loop->child)
1576 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1577 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
1578 mask_all |= loop->inner;
1581 if (loop->sibling)
1582 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1584 return mask_all;
1587 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1588 OUTER_MASK is the partitioning this loop is contained within.
1589 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1590 Return the cumulative partitioning used by this loop, siblings and
1591 children. */
1593 static unsigned
1594 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1595 bool outer_assign)
1597 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1598 bool noisy = true;
1599 bool tiling = loop->flags & OLF_TILE;
1601 #ifdef ACCEL_COMPILER
1602 /* When device_type is supported, we want the device compiler to be
1603 noisy, if the loop parameters are device_type-specific. */
1604 noisy = false;
1605 #endif
1607 if (assign && (!outer_assign || loop->inner))
1609 /* Allocate outermost and non-innermost loops at the outermost
1610 non-innermost available level. */
1611 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1613 /* Find the first outermost available partition. */
1614 while (this_mask <= outer_mask)
1615 this_mask <<= 1;
1617 /* Grab two axes if tiling, and we've not assigned anything */
1618 if (tiling && !(loop->mask | loop->e_mask))
1619 this_mask |= this_mask << 1;
1621 /* Prohibit the innermost partitioning at the moment. */
1622 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
1624 /* Don't use any dimension explicitly claimed by an inner loop. */
1625 this_mask &= ~loop->inner;
1627 if (tiling && !loop->e_mask)
1629 /* If we got two axes, allocate the inner one to the element
1630 loop. */
1631 loop->e_mask = this_mask & (this_mask << 1);
1632 this_mask ^= loop->e_mask;
1635 loop->mask |= this_mask;
1638 if (loop->child)
1640 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1641 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1642 outer_assign | assign);
1645 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
1647 /* Allocate the loop at the innermost available level. Note
1648 that we do this even if we already assigned this loop the
1649 outermost available level above. That way we'll partition
1650 this along 2 axes, if they are available. */
1651 unsigned this_mask = 0;
1653 /* Determine the outermost partitioning used within this loop. */
1654 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1655 this_mask = least_bit_hwi (this_mask);
1657 /* Pick the partitioning just inside that one. */
1658 this_mask >>= 1;
1660 /* And avoid picking one use by an outer loop. */
1661 this_mask &= ~outer_mask;
1663 /* If tiling and we failed completely above, grab the next one
1664 too. Making sure it doesn't hit an outer loop. */
1665 if (tiling)
1667 this_mask &= ~(loop->e_mask | loop->mask);
1668 unsigned tile_mask = ((this_mask >> 1)
1669 & ~(outer_mask | loop->e_mask | loop->mask));
1671 if (tile_mask || loop->mask)
1673 loop->e_mask |= this_mask;
1674 this_mask = tile_mask;
1676 if (!loop->e_mask && noisy)
1677 warning_at (loop->loc, 0,
1678 "insufficient partitioning available"
1679 " to parallelize element loop");
1682 loop->mask |= this_mask;
1683 if (!loop->mask && noisy)
1684 warning_at (loop->loc, 0,
1685 tiling
1686 ? G_("insufficient partitioning available"
1687 " to parallelize tile loop")
1688 : G_("insufficient partitioning available"
1689 " to parallelize loop"));
1692 if (assign && dump_file)
1693 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
1694 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1695 loop->mask, loop->e_mask);
1697 unsigned inner_mask = 0;
1699 if (loop->sibling)
1700 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1701 outer_mask, outer_assign);
1703 inner_mask |= loop->inner | loop->mask | loop->e_mask;
1705 return inner_mask;
1708 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1709 axes. Return mask of partitioning. */
1711 static unsigned
1712 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1714 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1716 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1718 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1719 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
1721 return mask_all;
1724 /* Default fork/join early expander. Delete the function calls if
1725 there is no RTL expander. */
1727 bool
1728 default_goacc_fork_join (gcall *ARG_UNUSED (call),
1729 const int *ARG_UNUSED (dims), bool is_fork)
1731 if (is_fork)
1732 return targetm.have_oacc_fork ();
1733 else
1734 return targetm.have_oacc_join ();
1737 /* Default goacc.reduction early expander.
1739 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1740 If RES_PTR is not integer-zerop:
1741 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1742 TEARDOWN - emit '*RES_PTR = VAR'
1743 If LHS is not NULL
1744 emit 'LHS = VAR' */
1746 void
1747 default_goacc_reduction (gcall *call)
1749 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1750 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1751 tree lhs = gimple_call_lhs (call);
1752 tree var = gimple_call_arg (call, 2);
1753 gimple_seq seq = NULL;
1755 if (code == IFN_GOACC_REDUCTION_SETUP
1756 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1758 /* Setup and Teardown need to copy from/to the receiver object,
1759 if there is one. */
1760 tree ref_to_res = gimple_call_arg (call, 1);
1762 if (!integer_zerop (ref_to_res))
1764 tree dst = build_simple_mem_ref (ref_to_res);
1765 tree src = var;
1767 if (code == IFN_GOACC_REDUCTION_SETUP)
1769 src = dst;
1770 dst = lhs;
1771 lhs = NULL;
1773 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1777 /* Copy VAR to LHS, if there is an LHS. */
1778 if (lhs)
1779 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1781 gsi_replace_with_seq (&gsi, seq, true);
1784 struct var_decl_rewrite_info
1786 gimple *stmt;
1787 hash_map<tree, tree> *adjusted_vars;
1788 bool avoid_pointer_conversion;
1789 bool modified;
1792 /* Helper function for execute_oacc_device_lower. Rewrite VAR_DECLs (by
1793 themselves or wrapped in various other nodes) according to ADJUSTED_VARS in
1794 the var_decl_rewrite_info pointed to via DATA. Used as part of coercing
1795 gang-private variables in OpenACC offload regions to reside in GPU shared
1796 memory. */
1798 static tree
1799 oacc_rewrite_var_decl (tree *tp, int *walk_subtrees, void *data)
1801 walk_stmt_info *wi = (walk_stmt_info *) data;
1802 var_decl_rewrite_info *info = (var_decl_rewrite_info *) wi->info;
1804 if (TREE_CODE (*tp) == ADDR_EXPR)
1806 tree arg = TREE_OPERAND (*tp, 0);
1807 tree *new_arg = info->adjusted_vars->get (arg);
1809 if (new_arg)
1811 if (info->avoid_pointer_conversion)
1813 *tp = build_fold_addr_expr (*new_arg);
1814 info->modified = true;
1815 *walk_subtrees = 0;
1817 else
1819 gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
1820 tree repl = build_fold_addr_expr (*new_arg);
1821 gimple *stmt1
1822 = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
1823 tree conv = convert_to_pointer (TREE_TYPE (*tp),
1824 gimple_assign_lhs (stmt1));
1825 gimple *stmt2
1826 = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
1827 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1828 gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1829 *tp = gimple_assign_lhs (stmt2);
1830 info->modified = true;
1831 *walk_subtrees = 0;
1835 else if (TREE_CODE (*tp) == COMPONENT_REF || TREE_CODE (*tp) == ARRAY_REF)
1837 tree *base = &TREE_OPERAND (*tp, 0);
1839 while (TREE_CODE (*base) == COMPONENT_REF
1840 || TREE_CODE (*base) == ARRAY_REF)
1841 base = &TREE_OPERAND (*base, 0);
1843 if (TREE_CODE (*base) != VAR_DECL)
1844 return NULL;
1846 tree *new_decl = info->adjusted_vars->get (*base);
1847 if (!new_decl)
1848 return NULL;
1850 int base_quals = TYPE_QUALS (TREE_TYPE (*new_decl));
1851 tree field = TREE_OPERAND (*tp, 1);
1853 /* Adjust the type of the field. */
1854 int field_quals = TYPE_QUALS (TREE_TYPE (field));
1855 if (TREE_CODE (field) == FIELD_DECL && field_quals != base_quals)
1857 tree *field_type = &TREE_TYPE (field);
1858 while (TREE_CODE (*field_type) == ARRAY_TYPE)
1859 field_type = &TREE_TYPE (*field_type);
1860 field_quals |= base_quals;
1861 *field_type = build_qualified_type (*field_type, field_quals);
1864 /* Adjust the type of the component ref itself. */
1865 tree comp_type = TREE_TYPE (*tp);
1866 int comp_quals = TYPE_QUALS (comp_type);
1867 if (TREE_CODE (*tp) == COMPONENT_REF && comp_quals != base_quals)
1869 comp_quals |= base_quals;
1870 TREE_TYPE (*tp)
1871 = build_qualified_type (comp_type, comp_quals);
1874 *base = *new_decl;
1875 info->modified = true;
1877 else if (TREE_CODE (*tp) == VAR_DECL)
1879 tree *new_decl = info->adjusted_vars->get (*tp);
1880 if (new_decl)
1882 *tp = *new_decl;
1883 info->modified = true;
1887 return NULL_TREE;
1890 /* Return TRUE if CALL is a call to a builtin atomic/sync operation. */
1892 static bool
1893 is_sync_builtin_call (gcall *call)
1895 tree callee = gimple_call_fndecl (call);
1897 if (callee != NULL_TREE
1898 && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
1899 switch (DECL_FUNCTION_CODE (callee))
1901 #undef DEF_SYNC_BUILTIN
1902 #define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
1903 #include "sync-builtins.def"
1904 #undef DEF_SYNC_BUILTIN
1905 return true;
1907 default:
1911 return false;
1914 /* Main entry point for oacc transformations which run on the device
1915 compiler after LTO, so we know what the target device is at this
1916 point (including the host fallback). */
1918 static unsigned int
1919 execute_oacc_loop_designation ()
1921 tree attrs = oacc_get_fn_attrib (current_function_decl);
1923 if (!attrs)
1924 /* Not an offloaded function. */
1925 return 0;
1927 /* Parse the default dim argument exactly once. */
1928 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1930 oacc_parse_default_dims (flag_openacc_dims);
1931 flag_openacc_dims = (char *)&flag_openacc_dims;
1934 bool is_oacc_parallel
1935 = (lookup_attribute ("oacc parallel",
1936 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1937 bool is_oacc_kernels
1938 = (lookup_attribute ("oacc kernels",
1939 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1940 bool is_oacc_serial
1941 = (lookup_attribute ("oacc serial",
1942 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1943 bool is_oacc_parallel_kernels_parallelized
1944 = (lookup_attribute ("oacc parallel_kernels_parallelized",
1945 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1946 bool is_oacc_parallel_kernels_gang_single
1947 = (lookup_attribute ("oacc parallel_kernels_gang_single",
1948 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1949 int fn_level = oacc_fn_attrib_level (attrs);
1950 bool is_oacc_routine = (fn_level >= 0);
1951 gcc_checking_assert (is_oacc_parallel
1952 + is_oacc_kernels
1953 + is_oacc_serial
1954 + is_oacc_parallel_kernels_parallelized
1955 + is_oacc_parallel_kernels_gang_single
1956 + is_oacc_routine
1957 == 1);
1959 bool is_oacc_kernels_parallelized
1960 = (lookup_attribute ("oacc kernels parallelized",
1961 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1962 if (is_oacc_kernels_parallelized)
1963 gcc_checking_assert (is_oacc_kernels);
1965 if (dump_file)
1967 if (is_oacc_parallel)
1968 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1969 else if (is_oacc_kernels)
1970 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1971 (is_oacc_kernels_parallelized
1972 ? "parallelized" : "unparallelized"));
1973 else if (is_oacc_serial)
1974 fprintf (dump_file, "Function is OpenACC serial offload\n");
1975 else if (is_oacc_parallel_kernels_parallelized)
1976 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1977 "parallel_kernels_parallelized");
1978 else if (is_oacc_parallel_kernels_gang_single)
1979 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1980 "parallel_kernels_gang_single");
1981 else if (is_oacc_routine)
1982 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1983 fn_level);
1984 else
1985 gcc_unreachable ();
1988 /* This doesn't belong into 'pass_oacc_loop_designation' conceptually, but
1989 it's a convenient place, so... */
1990 if (is_oacc_routine)
1992 tree attr = lookup_attribute ("omp declare target",
1993 DECL_ATTRIBUTES (current_function_decl));
1994 gcc_checking_assert (attr);
1995 tree clauses = TREE_VALUE (attr);
1996 gcc_checking_assert (clauses);
1998 /* Should this OpenACC routine be discarded? */
1999 bool discard = false;
2001 tree clause_nohost = omp_find_clause (clauses, OMP_CLAUSE_NOHOST);
2002 if (dump_file)
2003 fprintf (dump_file,
2004 "OpenACC routine '%s' %s '%s' clause.\n",
2005 lang_hooks.decl_printable_name (current_function_decl, 2),
2006 clause_nohost ? "has" : "doesn't have",
2007 omp_clause_code_name[OMP_CLAUSE_NOHOST]);
2008 /* Host compiler, 'nohost' clause? */
2009 #ifndef ACCEL_COMPILER
2010 if (clause_nohost)
2011 discard = true;
2012 #endif
2014 if (dump_file)
2015 fprintf (dump_file,
2016 "OpenACC routine '%s' %sdiscarded.\n",
2017 lang_hooks.decl_printable_name (current_function_decl, 2),
2018 discard ? "" : "not ");
2019 if (discard)
2021 TREE_ASM_WRITTEN (current_function_decl) = 1;
2022 return TODO_discard_function;
2026 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
2027 kernels, so remove the parallelism dimensions function attributes
2028 potentially set earlier on. */
2029 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
2031 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
2032 attrs = oacc_get_fn_attrib (current_function_decl);
2035 /* Discover, partition and process the loops. */
2036 oacc_loop *loops = oacc_loop_discovery ();
2038 unsigned outer_mask = 0;
2039 if (is_oacc_routine)
2040 outer_mask = GOMP_DIM_MASK (fn_level) - 1;
2041 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
2042 /* OpenACC kernels constructs are special: they currently don't use the
2043 generic oacc_loop infrastructure and attribute/dimension processing. */
2044 if (is_oacc_kernels && is_oacc_kernels_parallelized)
2046 /* Parallelized OpenACC kernels constructs use gang parallelism. See
2047 also tree-parloops.c:create_parallel_loop. */
2048 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
2051 int dims[GOMP_DIM_MAX];
2052 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
2054 if (dump_file)
2056 const char *comma = "Compute dimensions [";
2057 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
2058 fprintf (dump_file, "%s%d", comma, dims[ix]);
2059 fprintf (dump_file, "]\n");
2062 /* Verify that for OpenACC 'kernels' decomposed "gang-single" parts we launch
2063 a single gang only. */
2064 if (is_oacc_parallel_kernels_gang_single)
2065 gcc_checking_assert (dims[GOMP_DIM_GANG] == 1);
2067 oacc_loop_process (loops);
2068 if (dump_file)
2070 fprintf (dump_file, "OpenACC loops\n");
2071 dump_oacc_loop (dump_file, loops, 0);
2072 fprintf (dump_file, "\n");
2074 if (dump_enabled_p ())
2076 oacc_loop *l = loops;
2077 /* OpenACC kernels constructs are special: they currently don't use the
2078 generic oacc_loop infrastructure. */
2079 if (is_oacc_kernels)
2081 /* Create a fake oacc_loop for diagnostic purposes. */
2082 l = new_oacc_loop_raw (NULL,
2083 DECL_SOURCE_LOCATION (current_function_decl));
2084 l->mask = used_mask;
2086 else
2088 /* Skip the outermost, dummy OpenACC loop */
2089 l = l->child;
2091 if (l)
2092 inform_oacc_loop (l);
2093 if (is_oacc_kernels)
2094 free_oacc_loop (l);
2097 free_oacc_loop (loops);
2099 return 0;
2102 static unsigned int
2103 execute_oacc_device_lower ()
2105 tree attrs = oacc_get_fn_attrib (current_function_decl);
2107 if (!attrs)
2108 /* Not an offloaded function. */
2109 return 0;
2111 int dims[GOMP_DIM_MAX];
2112 for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
2113 dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
2115 hash_map<tree, tree> adjusted_vars;
2117 /* Now lower internal loop functions to target-specific code
2118 sequences. */
2119 basic_block bb;
2120 FOR_ALL_BB_FN (bb, cfun)
2121 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2123 gimple *stmt = gsi_stmt (gsi);
2124 if (!is_gimple_call (stmt))
2126 gsi_next (&gsi);
2127 continue;
2130 gcall *call = as_a <gcall *> (stmt);
2131 if (!gimple_call_internal_p (call))
2133 gsi_next (&gsi);
2134 continue;
2137 /* Rewind to allow rescan. */
2138 gsi_prev (&gsi);
2139 bool rescan = false, remove = false;
2140 enum internal_fn ifn_code = gimple_call_internal_fn (call);
2142 switch (ifn_code)
2144 default: break;
2146 case IFN_GOACC_TILE:
2147 oacc_xform_tile (call);
2148 rescan = true;
2149 break;
2151 case IFN_GOACC_LOOP:
2152 oacc_xform_loop (call);
2153 rescan = true;
2154 break;
2156 case IFN_GOACC_REDUCTION:
2157 /* Mark the function for SSA renaming. */
2158 mark_virtual_operands_for_renaming (cfun);
2160 /* If the level is -1, this ended up being an unused
2161 axis. Handle as a default. */
2162 if (integer_minus_onep (gimple_call_arg (call, 3)))
2163 default_goacc_reduction (call);
2164 else
2165 targetm.goacc.reduction (call);
2166 rescan = true;
2167 break;
2169 case IFN_UNIQUE:
2171 enum ifn_unique_kind kind
2172 = ((enum ifn_unique_kind)
2173 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
2175 switch (kind)
2177 default:
2178 break;
2180 case IFN_UNIQUE_OACC_FORK:
2181 case IFN_UNIQUE_OACC_JOIN:
2182 if (integer_minus_onep (gimple_call_arg (call, 2)))
2183 remove = true;
2184 else if (!targetm.goacc.fork_join
2185 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
2186 remove = true;
2187 break;
2189 case IFN_UNIQUE_OACC_HEAD_MARK:
2190 case IFN_UNIQUE_OACC_TAIL_MARK:
2191 remove = true;
2192 break;
2194 case IFN_UNIQUE_OACC_PRIVATE:
2196 dump_flags_t l_dump_flags
2197 = get_openacc_privatization_dump_flags ();
2199 location_t loc = gimple_location (stmt);
2200 if (LOCATION_LOCUS (loc) == UNKNOWN_LOCATION)
2201 loc = DECL_SOURCE_LOCATION (current_function_decl);
2202 const dump_user_location_t d_u_loc
2203 = dump_user_location_t::from_location_t (loc);
2205 HOST_WIDE_INT level
2206 = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
2207 gcc_checking_assert (level == -1
2208 || (level >= 0
2209 && level < GOMP_DIM_MAX));
2210 for (unsigned i = 3;
2211 i < gimple_call_num_args (call);
2212 i++)
2214 static char const *const axes[] =
2215 /* Must be kept in sync with GOMP_DIM enumeration. */
2216 { "gang", "worker", "vector" };
2218 tree arg = gimple_call_arg (call, i);
2219 gcc_checking_assert (TREE_CODE (arg) == ADDR_EXPR);
2220 tree decl = TREE_OPERAND (arg, 0);
2221 if (dump_enabled_p ())
2222 /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
2223 #if __GNUC__ >= 10
2224 # pragma GCC diagnostic push
2225 # pragma GCC diagnostic ignored "-Wformat"
2226 #endif
2227 dump_printf_loc (l_dump_flags, d_u_loc,
2228 "variable %<%T%> ought to be"
2229 " adjusted for OpenACC"
2230 " privatization level: %qs\n",
2231 decl,
2232 (level == -1
2233 ? "UNKNOWN" : axes[level]));
2234 #if __GNUC__ >= 10
2235 # pragma GCC diagnostic pop
2236 #endif
2237 bool adjusted;
2238 if (level == -1)
2239 adjusted = false;
2240 else if (!targetm.goacc.adjust_private_decl)
2241 adjusted = false;
2242 else if (level == GOMP_DIM_VECTOR)
2244 /* That's the default behavior. */
2245 adjusted = true;
2247 else
2249 tree oldtype = TREE_TYPE (decl);
2250 tree newdecl
2251 = targetm.goacc.adjust_private_decl (loc, decl,
2252 level);
2253 adjusted = (TREE_TYPE (newdecl) != oldtype
2254 || newdecl != decl);
2255 if (adjusted)
2256 adjusted_vars.put (decl, newdecl);
2258 if (adjusted
2259 && dump_enabled_p ())
2260 /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
2261 #if __GNUC__ >= 10
2262 # pragma GCC diagnostic push
2263 # pragma GCC diagnostic ignored "-Wformat"
2264 #endif
2265 dump_printf_loc (l_dump_flags, d_u_loc,
2266 "variable %<%T%> adjusted for"
2267 " OpenACC privatization level:"
2268 " %qs\n",
2269 decl, axes[level]);
2270 #if __GNUC__ >= 10
2271 # pragma GCC diagnostic pop
2272 #endif
2274 remove = true;
2276 break;
2278 break;
2282 if (gsi_end_p (gsi))
2283 /* We rewound past the beginning of the BB. */
2284 gsi = gsi_start_bb (bb);
2285 else
2286 /* Undo the rewind. */
2287 gsi_next (&gsi);
2289 if (remove)
2291 if (gimple_vdef (call))
2292 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
2293 if (gimple_call_lhs (call))
2295 /* Propagate the data dependency var. */
2296 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
2297 gimple_call_arg (call, 1));
2298 gsi_replace (&gsi, ass, false);
2300 else
2301 gsi_remove (&gsi, true);
2303 else if (!rescan)
2304 /* If not rescanning, advance over the call. */
2305 gsi_next (&gsi);
2308 /* Regarding the OpenACC privatization level, we're currently only looking at
2309 making the gang-private level work. Regarding that, we have the following
2310 configurations:
2312 - GCN offloading: 'targetm.goacc.adjust_private_decl' does the work (in
2313 particular, change 'TREE_TYPE', etc.) and there is no
2314 'targetm.goacc.expand_var_decl'.
2316 - nvptx offloading: 'targetm.goacc.adjust_private_decl' only sets a
2317 marker and then 'targetm.goacc.expand_var_decl' does the work.
2319 Eventually (in particular, for worker-private level?), both
2320 'targetm.goacc.adjust_private_decl' and 'targetm.goacc.expand_var_decl'
2321 may need to do things, but that's currently not meant to be addressed, and
2322 thus not fully worked out and implemented, and thus untested. Hence,
2323 'assert' what currently is implemented/tested, only. */
2325 if (targetm.goacc.expand_var_decl)
2326 gcc_assert (adjusted_vars.is_empty ());
2328 /* Make adjustments to gang-private local variables if required by the
2329 target, e.g. forcing them into a particular address space. Afterwards,
2330 ADDR_EXPR nodes which have adjusted variables as their argument need to
2331 be modified in one of two ways:
2333 1. They can be recreated, making a pointer to the variable in the new
2334 address space, or
2336 2. The address of the variable in the new address space can be taken,
2337 converted to the default (original) address space, and the result of
2338 that conversion subsituted in place of the original ADDR_EXPR node.
2340 Which of these is done depends on the gimple statement being processed.
2341 At present atomic operations and inline asms use (1), and everything else
2342 uses (2). At least on AMD GCN, there are atomic operations that work
2343 directly in the LDS address space.
2345 COMPONENT_REFS, ARRAY_REFS and plain VAR_DECLs are also rewritten to use
2346 the new decl, adjusting types of appropriate tree nodes as necessary. */
2348 if (targetm.goacc.adjust_private_decl
2349 && !adjusted_vars.is_empty ())
2351 FOR_ALL_BB_FN (bb, cfun)
2352 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2353 !gsi_end_p (gsi);
2354 gsi_next (&gsi))
2356 gimple *stmt = gsi_stmt (gsi);
2357 walk_stmt_info wi;
2358 var_decl_rewrite_info info;
2360 info.avoid_pointer_conversion
2361 = (is_gimple_call (stmt)
2362 && is_sync_builtin_call (as_a <gcall *> (stmt)))
2363 || gimple_code (stmt) == GIMPLE_ASM;
2364 info.stmt = stmt;
2365 info.modified = false;
2366 info.adjusted_vars = &adjusted_vars;
2368 memset (&wi, 0, sizeof (wi));
2369 wi.info = &info;
2371 walk_gimple_op (stmt, oacc_rewrite_var_decl, &wi);
2373 if (info.modified)
2374 update_stmt (stmt);
2378 return 0;
2381 /* Default launch dimension validator. Force everything to 1. A
2382 backend that wants to provide larger dimensions must override this
2383 hook. */
2385 bool
2386 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
2387 int ARG_UNUSED (fn_level),
2388 unsigned ARG_UNUSED (used))
2390 bool changed = false;
2392 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
2394 if (dims[ix] != 1)
2396 dims[ix] = 1;
2397 changed = true;
2401 return changed;
2404 /* Default dimension bound is unknown on accelerator and 1 on host. */
2407 default_goacc_dim_limit (int ARG_UNUSED (axis))
2409 #ifdef ACCEL_COMPILER
2410 return 0;
2411 #else
2412 return 1;
2413 #endif
2416 namespace {
2418 const pass_data pass_data_oacc_loop_designation =
2420 GIMPLE_PASS, /* type */
2421 "oaccloops", /* name */
2422 OPTGROUP_OMP, /* optinfo_flags */
2423 TV_NONE, /* tv_id */
2424 PROP_cfg, /* properties_required */
2425 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2426 0, /* properties_destroyed */
2427 0, /* todo_flags_start */
2428 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
2431 class pass_oacc_loop_designation : public gimple_opt_pass
2433 public:
2434 pass_oacc_loop_designation (gcc::context *ctxt)
2435 : gimple_opt_pass (pass_data_oacc_loop_designation, ctxt)
2438 /* opt_pass methods: */
2439 virtual bool gate (function *) { return flag_openacc; };
2441 virtual unsigned int execute (function *)
2443 return execute_oacc_loop_designation ();
2446 }; // class pass_oacc_loop_designation
2448 const pass_data pass_data_oacc_device_lower =
2450 GIMPLE_PASS, /* type */
2451 "oaccdevlow", /* name */
2452 OPTGROUP_OMP, /* optinfo_flags */
2453 TV_NONE, /* tv_id */
2454 PROP_cfg, /* properties_required */
2455 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2456 0, /* properties_destroyed */
2457 0, /* todo_flags_start */
2458 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
2461 class pass_oacc_device_lower : public gimple_opt_pass
2463 public:
2464 pass_oacc_device_lower (gcc::context *ctxt)
2465 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
2468 /* opt_pass methods: */
2469 virtual bool gate (function *) { return flag_openacc; };
2471 virtual unsigned int execute (function *)
2473 return execute_oacc_device_lower ();
2476 }; // class pass_oacc_device_lower
2478 } // anon namespace
2480 gimple_opt_pass *
2481 make_pass_oacc_loop_designation (gcc::context *ctxt)
2483 return new pass_oacc_loop_designation (ctxt);
2486 gimple_opt_pass *
2487 make_pass_oacc_device_lower (gcc::context *ctxt)
2489 return new pass_oacc_device_lower (ctxt);
2493 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
2494 GOMP_SIMT_ENTER call identifying the privatized variables, which are
2495 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
2496 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
2498 static void
2499 ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
2501 gimple *alloc_stmt = gsi_stmt (*gsi);
2502 tree simtrec = gimple_call_lhs (alloc_stmt);
2503 tree simduid = gimple_call_arg (alloc_stmt, 0);
2504 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
2505 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
2506 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
2507 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
2508 TREE_ADDRESSABLE (rectype) = 1;
2509 TREE_TYPE (simtrec) = build_pointer_type (rectype);
2510 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
2512 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
2513 if (*argp == null_pointer_node)
2514 continue;
2515 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
2516 && VAR_P (TREE_OPERAND (*argp, 0)));
2517 tree var = TREE_OPERAND (*argp, 0);
2519 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
2520 DECL_NAME (var), TREE_TYPE (var));
2521 SET_DECL_ALIGN (field, DECL_ALIGN (var));
2522 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
2523 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
2525 insert_field_into_struct (rectype, field);
2527 tree t = build_simple_mem_ref (simtrec);
2528 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
2529 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
2530 SET_DECL_VALUE_EXPR (var, t);
2531 DECL_HAS_VALUE_EXPR_P (var) = 1;
2532 *regimplify = true;
2534 layout_type (rectype);
2535 tree size = TYPE_SIZE_UNIT (rectype);
2536 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
2538 alloc_stmt
2539 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
2540 gimple_call_set_lhs (alloc_stmt, simtrec);
2541 gsi_replace (gsi, alloc_stmt, false);
2542 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
2543 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
2544 gsi_replace (&enter_gsi, enter_stmt, false);
2546 use_operand_p use;
2547 gimple *exit_stmt;
2548 if (single_imm_use (simtrec, &use, &exit_stmt))
2550 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
2551 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
2552 tree clobber = build_clobber (rectype);
2553 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
2554 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
2556 else
2557 gcc_checking_assert (has_zero_uses (simtrec));
2560 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2562 static tree
2563 find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
2565 tree t = *tp;
2567 if (VAR_P (t)
2568 && DECL_HAS_VALUE_EXPR_P (t)
2569 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
2571 *walk_subtrees = 0;
2572 return t;
2574 return NULL_TREE;
2577 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2578 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2579 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2580 internal functions on non-SIMT targets, and likewise some SIMD internal
2581 functions on SIMT targets. */
2583 static unsigned int
2584 execute_omp_device_lower ()
2586 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
2587 bool regimplify = false;
2588 basic_block bb;
2589 gimple_stmt_iterator gsi;
2590 bool calls_declare_variant_alt
2591 = cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
2592 FOR_EACH_BB_FN (bb, cfun)
2593 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2595 gimple *stmt = gsi_stmt (gsi);
2596 if (!is_gimple_call (stmt))
2597 continue;
2598 if (!gimple_call_internal_p (stmt))
2600 if (calls_declare_variant_alt)
2601 if (tree fndecl = gimple_call_fndecl (stmt))
2603 tree new_fndecl = omp_resolve_declare_variant (fndecl);
2604 if (new_fndecl != fndecl)
2606 gimple_call_set_fndecl (stmt, new_fndecl);
2607 update_stmt (stmt);
2610 continue;
2612 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
2613 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
2614 switch (gimple_call_internal_fn (stmt))
2616 case IFN_GOMP_USE_SIMT:
2617 rhs = vf == 1 ? integer_zero_node : integer_one_node;
2618 break;
2619 case IFN_GOMP_SIMT_ENTER:
2620 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2621 goto simtreg_enter_exit;
2622 case IFN_GOMP_SIMT_ENTER_ALLOC:
2623 if (vf != 1)
2624 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
2625 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
2626 goto simtreg_enter_exit;
2627 case IFN_GOMP_SIMT_EXIT:
2628 simtreg_enter_exit:
2629 if (vf != 1)
2630 continue;
2631 unlink_stmt_vdef (stmt);
2632 break;
2633 case IFN_GOMP_SIMT_LANE:
2634 case IFN_GOMP_SIMT_LAST_LANE:
2635 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
2636 break;
2637 case IFN_GOMP_SIMT_VF:
2638 rhs = build_int_cst (type, vf);
2639 break;
2640 case IFN_GOMP_SIMT_ORDERED_PRED:
2641 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
2642 if (rhs || !lhs)
2643 unlink_stmt_vdef (stmt);
2644 break;
2645 case IFN_GOMP_SIMT_VOTE_ANY:
2646 case IFN_GOMP_SIMT_XCHG_BFLY:
2647 case IFN_GOMP_SIMT_XCHG_IDX:
2648 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2649 break;
2650 case IFN_GOMP_SIMD_LANE:
2651 case IFN_GOMP_SIMD_LAST_LANE:
2652 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
2653 break;
2654 case IFN_GOMP_SIMD_VF:
2655 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
2656 break;
2657 default:
2658 continue;
2660 if (lhs && !rhs)
2661 continue;
2662 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
2663 gsi_replace (&gsi, stmt, false);
2665 if (regimplify)
2666 FOR_EACH_BB_REVERSE_FN (bb, cfun)
2667 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2668 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
2670 if (gimple_clobber_p (gsi_stmt (gsi)))
2671 gsi_remove (&gsi, true);
2672 else
2673 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2675 if (vf != 1)
2676 cfun->has_force_vectorize_loops = false;
2677 return 0;
2680 namespace {
2682 const pass_data pass_data_omp_device_lower =
2684 GIMPLE_PASS, /* type */
2685 "ompdevlow", /* name */
2686 OPTGROUP_OMP, /* optinfo_flags */
2687 TV_NONE, /* tv_id */
2688 PROP_cfg, /* properties_required */
2689 PROP_gimple_lomp_dev, /* properties_provided */
2690 0, /* properties_destroyed */
2691 0, /* todo_flags_start */
2692 TODO_update_ssa, /* todo_flags_finish */
2695 class pass_omp_device_lower : public gimple_opt_pass
2697 public:
2698 pass_omp_device_lower (gcc::context *ctxt)
2699 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
2702 /* opt_pass methods: */
2703 virtual bool gate (function *fun)
2705 return (!(fun->curr_properties & PROP_gimple_lomp_dev)
2706 || (flag_openmp
2707 && cgraph_node::get (fun->decl)->calls_declare_variant_alt));
2709 virtual unsigned int execute (function *)
2711 return execute_omp_device_lower ();
2714 }; // class pass_expand_omp_ssa
2716 } // anon namespace
2718 gimple_opt_pass *
2719 make_pass_omp_device_lower (gcc::context *ctxt)
2721 return new pass_omp_device_lower (ctxt);
2724 /* "omp declare target link" handling pass. */
2726 namespace {
2728 const pass_data pass_data_omp_target_link =
2730 GIMPLE_PASS, /* type */
2731 "omptargetlink", /* name */
2732 OPTGROUP_OMP, /* optinfo_flags */
2733 TV_NONE, /* tv_id */
2734 PROP_ssa, /* properties_required */
2735 0, /* properties_provided */
2736 0, /* properties_destroyed */
2737 0, /* todo_flags_start */
2738 TODO_update_ssa, /* todo_flags_finish */
2741 class pass_omp_target_link : public gimple_opt_pass
2743 public:
2744 pass_omp_target_link (gcc::context *ctxt)
2745 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2748 /* opt_pass methods: */
2749 virtual bool gate (function *fun)
2751 #ifdef ACCEL_COMPILER
2752 return offloading_function_p (fun->decl);
2753 #else
2754 (void) fun;
2755 return false;
2756 #endif
2759 virtual unsigned execute (function *);
2762 /* Callback for walk_gimple_stmt used to scan for link var operands. */
2764 static tree
2765 find_link_var_op (tree *tp, int *walk_subtrees, void *)
2767 tree t = *tp;
2769 if (VAR_P (t)
2770 && DECL_HAS_VALUE_EXPR_P (t)
2771 && is_global_var (t)
2772 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2774 *walk_subtrees = 0;
2775 return t;
2778 return NULL_TREE;
2781 unsigned
2782 pass_omp_target_link::execute (function *fun)
2784 basic_block bb;
2785 FOR_EACH_BB_FN (bb, fun)
2787 gimple_stmt_iterator gsi;
2788 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2790 if (gimple_call_builtin_p (gsi_stmt (gsi), BUILT_IN_GOMP_TARGET))
2792 /* Nullify the second argument of __builtin_GOMP_target_ext. */
2793 gimple_call_set_arg (gsi_stmt (gsi), 1, null_pointer_node);
2794 update_stmt (gsi_stmt (gsi));
2796 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2797 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2801 return 0;
2804 } // anon namespace
2806 gimple_opt_pass *
2807 make_pass_omp_target_link (gcc::context *ctxt)
2809 return new pass_omp_target_link (ctxt);