1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
4 Copyright (C) 2005-2024 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
52 #include "stringpool.h"
59 /* Describe the OpenACC looping structure of a function. The entire
60 function is held in a 'NULL' loop. */
64 oacc_loop
*parent
; /* Containing loop. */
66 oacc_loop
*child
; /* First inner loop. */
68 oacc_loop
*sibling
; /* Next loop within same parent. */
70 location_t loc
; /* Location of the loop start. */
72 gcall
*marker
; /* Initial head marker. */
74 gcall
*heads
[GOMP_DIM_MAX
]; /* Head marker functions. */
75 gcall
*tails
[GOMP_DIM_MAX
]; /* Tail marker functions. */
77 tree routine
; /* Pseudo-loop enclosing a routine. */
79 unsigned mask
; /* Partitioning mask. */
80 unsigned e_mask
; /* Partitioning of element loops (when tiling). */
81 unsigned inner
; /* Partitioning of inner loops. */
82 unsigned flags
; /* Partitioning flags. */
83 vec
<gcall
*> ifns
; /* Contained loop abstraction functions. */
84 tree chunk_size
; /* Chunk size. */
85 gcall
*head_end
; /* Final marker of head sequence. */
88 /* Holds offload tables with decls. */
89 vec
<tree
, va_gc
> *offload_funcs
, *offload_vars
, *offload_ind_funcs
;
91 /* Return level at which oacc routine may spawn a partitioned loop, or
92 -1 if it is not a routine (i.e. is an offload fn). */
95 oacc_fn_attrib_level (tree attr
)
97 tree pos
= TREE_VALUE (attr
);
99 if (!TREE_PURPOSE (pos
))
103 for (ix
= 0; ix
!= GOMP_DIM_MAX
;
104 ix
++, pos
= TREE_CHAIN (pos
))
105 if (!integer_zerop (TREE_PURPOSE (pos
)))
111 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
112 adds their addresses and sizes to constructor-vector V_CTOR. */
115 add_decls_addresses_to_decl_constructor (vec
<tree
, va_gc
> *v_decls
,
116 vec
<constructor_elt
, va_gc
> *v_ctor
)
118 unsigned len
= vec_safe_length (v_decls
);
119 for (unsigned i
= 0; i
< len
; i
++)
121 tree it
= (*v_decls
)[i
];
122 bool is_var
= VAR_P (it
);
125 #ifdef ACCEL_COMPILER
126 && DECL_HAS_VALUE_EXPR_P (it
)
128 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it
));
130 /* See also omp_finish_file and output_offload_tables in lto-cgraph.cc. */
131 if (!in_lto_p
&& !symtab_node::get (it
))
134 tree size
= NULL_TREE
;
136 size
= fold_convert (const_ptr_type_node
, DECL_SIZE_UNIT (it
));
140 addr
= build_fold_addr_expr (it
);
143 #ifdef ACCEL_COMPILER
144 /* For "omp declare target link" vars add address of the pointer to
145 the target table, instead of address of the var. */
146 tree value_expr
= DECL_VALUE_EXPR (it
);
147 tree link_ptr_decl
= TREE_OPERAND (value_expr
, 0);
148 varpool_node::finalize_decl (link_ptr_decl
);
149 addr
= build_fold_addr_expr (link_ptr_decl
);
151 addr
= build_fold_addr_expr (it
);
154 /* Most significant bit of the size marks "omp declare target link"
155 vars in host and target tables. */
156 unsigned HOST_WIDE_INT isize
= tree_to_uhwi (size
);
157 isize
|= 1ULL << (int_size_in_bytes (const_ptr_type_node
)
158 * BITS_PER_UNIT
- 1);
159 size
= wide_int_to_tree (const_ptr_type_node
, isize
);
162 CONSTRUCTOR_APPEND_ELT (v_ctor
, NULL_TREE
, addr
);
164 CONSTRUCTOR_APPEND_ELT (v_ctor
, NULL_TREE
, size
);
168 /* Return true if DECL is a function for which its references should be
172 omp_declare_target_fn_p (tree decl
)
174 return (TREE_CODE (decl
) == FUNCTION_DECL
175 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl
))
176 && !lookup_attribute ("omp declare target host",
177 DECL_ATTRIBUTES (decl
))
179 || oacc_get_fn_attrib (decl
) == NULL_TREE
));
182 /* Return true if DECL Is a variable for which its initializer references
183 should be analyzed. */
186 omp_declare_target_var_p (tree decl
)
189 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl
))
190 && !lookup_attribute ("omp declare target link",
191 DECL_ATTRIBUTES (decl
)));
194 /* Helper function for omp_discover_implicit_declare_target, called through
195 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
196 declare target to. */
199 omp_discover_declare_target_tgt_fn_r (tree
*tp
, int *walk_subtrees
, void *data
)
201 if (TREE_CODE (*tp
) == CALL_EXPR
202 && CALL_EXPR_FN (*tp
)
203 && TREE_CODE (CALL_EXPR_FN (*tp
)) == ADDR_EXPR
204 && TREE_CODE (TREE_OPERAND (CALL_EXPR_FN (*tp
), 0)) == FUNCTION_DECL
205 && lookup_attribute ("omp declare variant base",
206 DECL_ATTRIBUTES (TREE_OPERAND (CALL_EXPR_FN (*tp
),
209 tree fn
= TREE_OPERAND (CALL_EXPR_FN (*tp
), 0);
210 for (tree attr
= DECL_ATTRIBUTES (fn
); attr
; attr
= TREE_CHAIN (attr
))
212 attr
= lookup_attribute ("omp declare variant base", attr
);
213 if (attr
== NULL_TREE
)
215 tree purpose
= TREE_PURPOSE (TREE_VALUE (attr
));
216 if (TREE_CODE (purpose
) == FUNCTION_DECL
)
217 omp_discover_declare_target_tgt_fn_r (&purpose
, walk_subtrees
, data
);
220 else if (TREE_CODE (*tp
) == FUNCTION_DECL
)
223 tree id
= get_identifier ("omp declare target");
224 symtab_node
*node
= symtab_node::get (*tp
);
227 while (node
->alias_target
228 && TREE_CODE (node
->alias_target
) == FUNCTION_DECL
)
230 if (!omp_declare_target_fn_p (node
->decl
)
231 && !lookup_attribute ("omp declare target host",
232 DECL_ATTRIBUTES (node
->decl
)))
234 node
->offloadable
= 1;
235 DECL_ATTRIBUTES (node
->decl
)
236 = tree_cons (id
, NULL_TREE
, DECL_ATTRIBUTES (node
->decl
));
238 node
= symtab_node::get (node
->alias_target
);
240 symtab_node
*new_node
= node
->ultimate_alias_target ();
241 decl
= new_node
->decl
;
242 while (node
!= new_node
)
244 if (!omp_declare_target_fn_p (node
->decl
)
245 && !lookup_attribute ("omp declare target host",
246 DECL_ATTRIBUTES (node
->decl
)))
248 node
->offloadable
= 1;
249 DECL_ATTRIBUTES (node
->decl
)
250 = tree_cons (id
, NULL_TREE
, DECL_ATTRIBUTES (node
->decl
));
252 gcc_assert (node
->alias
&& node
->analyzed
);
253 node
= node
->get_alias_target ();
255 node
->offloadable
= 1;
256 if (ENABLE_OFFLOADING
)
257 g
->have_offload
= true;
259 if (omp_declare_target_fn_p (decl
)
260 || lookup_attribute ("omp declare target host",
261 DECL_ATTRIBUTES (decl
)))
264 if (!DECL_EXTERNAL (decl
) && DECL_SAVED_TREE (decl
))
265 ((vec
<tree
> *) data
)->safe_push (decl
);
266 DECL_ATTRIBUTES (decl
) = tree_cons (id
, NULL_TREE
,
267 DECL_ATTRIBUTES (decl
));
269 else if (TYPE_P (*tp
))
271 else if (TREE_CODE (*tp
) == OMP_TARGET
)
273 tree c
= omp_find_clause (OMP_CLAUSES (*tp
), OMP_CLAUSE_DEVICE
);
274 if (c
&& OMP_CLAUSE_DEVICE_ANCESTOR (c
))
280 /* Similarly, but ignore references outside of OMP_TARGET regions. */
283 omp_discover_declare_target_fn_r (tree
*tp
, int *walk_subtrees
, void *data
)
285 if (TREE_CODE (*tp
) == OMP_TARGET
)
287 tree c
= omp_find_clause (OMP_CLAUSES (*tp
), OMP_CLAUSE_DEVICE
);
288 if (!c
|| !OMP_CLAUSE_DEVICE_ANCESTOR (c
))
289 walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp
),
290 omp_discover_declare_target_tgt_fn_r
,
294 else if (TYPE_P (*tp
))
299 /* Helper function for omp_discover_implicit_declare_target, called through
300 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
301 declare target to. */
304 omp_discover_declare_target_var_r (tree
*tp
, int *walk_subtrees
, void *data
)
306 if (TREE_CODE (*tp
) == FUNCTION_DECL
)
307 return omp_discover_declare_target_tgt_fn_r (tp
, walk_subtrees
, data
);
309 && is_global_var (*tp
)
310 && !omp_declare_target_var_p (*tp
))
312 tree id
= get_identifier ("omp declare target");
313 if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp
)))
315 error_at (DECL_SOURCE_LOCATION (*tp
),
316 "%qD specified both in declare target %<link%> and "
317 "implicitly in %<to%> clauses", *tp
);
318 DECL_ATTRIBUTES (*tp
)
319 = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp
));
321 if (TREE_STATIC (*tp
) && lang_hooks
.decls
.omp_get_decl_init (*tp
))
322 ((vec
<tree
> *) data
)->safe_push (*tp
);
323 DECL_ATTRIBUTES (*tp
) = tree_cons (id
, NULL_TREE
, DECL_ATTRIBUTES (*tp
));
324 symtab_node
*node
= symtab_node::get (*tp
);
325 if (node
!= NULL
&& !node
->offloadable
)
327 node
->offloadable
= 1;
328 if (ENABLE_OFFLOADING
)
330 g
->have_offload
= true;
331 if (is_a
<varpool_node
*> (node
))
332 vec_safe_push (offload_vars
, node
->decl
);
336 else if (TYPE_P (*tp
))
341 /* Perform the OpenMP implicit declare target to discovery. */
344 omp_discover_implicit_declare_target (void)
348 auto_vec
<tree
> worklist
;
350 FOR_EACH_DEFINED_FUNCTION (node
)
351 if (DECL_SAVED_TREE (node
->decl
))
353 struct cgraph_node
*cgn
;
354 if (lookup_attribute ("omp declare target indirect",
355 DECL_ATTRIBUTES (node
->decl
)))
356 vec_safe_push (offload_ind_funcs
, node
->decl
);
357 if (omp_declare_target_fn_p (node
->decl
))
358 worklist
.safe_push (node
->decl
);
359 else if (DECL_STRUCT_FUNCTION (node
->decl
)
360 && DECL_STRUCT_FUNCTION (node
->decl
)->has_omp_target
)
361 worklist
.safe_push (node
->decl
);
362 for (cgn
= first_nested_function (node
);
363 cgn
; cgn
= next_nested_function (cgn
))
364 if (omp_declare_target_fn_p (cgn
->decl
))
365 worklist
.safe_push (cgn
->decl
);
366 else if (DECL_STRUCT_FUNCTION (cgn
->decl
)
367 && DECL_STRUCT_FUNCTION (cgn
->decl
)->has_omp_target
)
368 worklist
.safe_push (cgn
->decl
);
370 FOR_EACH_VARIABLE (vnode
)
371 if (lang_hooks
.decls
.omp_get_decl_init (vnode
->decl
)
372 && omp_declare_target_var_p (vnode
->decl
))
373 worklist
.safe_push (vnode
->decl
);
374 while (!worklist
.is_empty ())
376 tree decl
= worklist
.pop ();
378 walk_tree_without_duplicates (lang_hooks
.decls
.omp_get_decl_init (decl
),
379 omp_discover_declare_target_var_r
,
381 else if (omp_declare_target_fn_p (decl
))
382 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl
),
383 omp_discover_declare_target_tgt_fn_r
,
386 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl
),
387 omp_discover_declare_target_fn_r
,
391 lang_hooks
.decls
.omp_finish_decl_inits ();
395 /* Create new symbols containing (address, size) pairs for global variables,
396 marked with "omp declare target" attribute, as well as addresses for the
397 functions, which are outlined offloading regions. */
399 omp_finish_file (void)
401 unsigned num_funcs
= vec_safe_length (offload_funcs
);
402 unsigned num_vars
= vec_safe_length (offload_vars
);
403 unsigned num_ind_funcs
= vec_safe_length (offload_ind_funcs
);
405 if (num_funcs
== 0 && num_vars
== 0 && num_ind_funcs
== 0)
408 if (targetm_common
.have_named_sections
)
410 vec
<constructor_elt
, va_gc
> *v_f
, *v_v
, *v_if
;
411 vec_alloc (v_f
, num_funcs
);
412 vec_alloc (v_v
, num_vars
* 2);
413 vec_alloc (v_if
, num_ind_funcs
);
415 add_decls_addresses_to_decl_constructor (offload_funcs
, v_f
);
416 add_decls_addresses_to_decl_constructor (offload_vars
, v_v
);
417 add_decls_addresses_to_decl_constructor (offload_ind_funcs
, v_if
);
419 tree vars_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
420 vec_safe_length (v_v
));
421 tree funcs_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
423 tree ind_funcs_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
426 SET_TYPE_ALIGN (vars_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
427 SET_TYPE_ALIGN (funcs_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
428 SET_TYPE_ALIGN (ind_funcs_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
429 tree ctor_v
= build_constructor (vars_decl_type
, v_v
);
430 tree ctor_f
= build_constructor (funcs_decl_type
, v_f
);
431 tree ctor_if
= build_constructor (ind_funcs_decl_type
, v_if
);
432 TREE_CONSTANT (ctor_v
) = TREE_CONSTANT (ctor_f
) = TREE_CONSTANT (ctor_if
) = 1;
433 TREE_STATIC (ctor_v
) = TREE_STATIC (ctor_f
) = TREE_STATIC (ctor_if
) = 1;
434 tree funcs_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
435 get_identifier (".offload_func_table"),
437 tree vars_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
438 get_identifier (".offload_var_table"),
440 tree ind_funcs_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
441 get_identifier (".offload_ind_func_table"),
442 ind_funcs_decl_type
);
443 TREE_STATIC (funcs_decl
) = TREE_STATIC (ind_funcs_decl
) = 1;
444 TREE_STATIC (vars_decl
) = 1;
445 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
446 otherwise a joint table in a binary will contain padding between
447 tables from multiple object files. */
448 DECL_USER_ALIGN (funcs_decl
) = DECL_USER_ALIGN (ind_funcs_decl
) = 1;
449 DECL_USER_ALIGN (vars_decl
) = 1;
450 SET_DECL_ALIGN (funcs_decl
, TYPE_ALIGN (funcs_decl_type
));
451 SET_DECL_ALIGN (vars_decl
, TYPE_ALIGN (vars_decl_type
));
452 SET_DECL_ALIGN (ind_funcs_decl
, TYPE_ALIGN (ind_funcs_decl_type
));
453 DECL_INITIAL (funcs_decl
) = ctor_f
;
454 DECL_INITIAL (vars_decl
) = ctor_v
;
455 DECL_INITIAL (ind_funcs_decl
) = ctor_if
;
456 set_decl_section_name (funcs_decl
, OFFLOAD_FUNC_TABLE_SECTION_NAME
);
457 set_decl_section_name (vars_decl
, OFFLOAD_VAR_TABLE_SECTION_NAME
);
458 set_decl_section_name (ind_funcs_decl
,
459 OFFLOAD_IND_FUNC_TABLE_SECTION_NAME
);
460 varpool_node::finalize_decl (vars_decl
);
461 varpool_node::finalize_decl (funcs_decl
);
462 varpool_node::finalize_decl (ind_funcs_decl
);
466 for (unsigned i
= 0; i
< num_funcs
; i
++)
468 tree it
= (*offload_funcs
)[i
];
469 /* See also add_decls_addresses_to_decl_constructor
470 and output_offload_tables in lto-cgraph.cc. */
471 if (!in_lto_p
&& !symtab_node::get (it
))
473 targetm
.record_offload_symbol (it
);
475 for (unsigned i
= 0; i
< num_vars
; i
++)
477 tree it
= (*offload_vars
)[i
];
478 if (!in_lto_p
&& !symtab_node::get (it
))
480 #ifdef ACCEL_COMPILER
481 if (DECL_HAS_VALUE_EXPR_P (it
)
482 && lookup_attribute ("omp declare target link",
483 DECL_ATTRIBUTES (it
)))
485 tree value_expr
= DECL_VALUE_EXPR (it
);
486 tree link_ptr_decl
= TREE_OPERAND (value_expr
, 0);
487 targetm
.record_offload_symbol (link_ptr_decl
);
488 varpool_node::finalize_decl (link_ptr_decl
);
492 targetm
.record_offload_symbol (it
);
494 for (unsigned i
= 0; i
< num_ind_funcs
; i
++)
496 tree it
= (*offload_ind_funcs
)[i
];
497 /* See also add_decls_addresses_to_decl_constructor
498 and output_offload_tables in lto-cgraph.cc. */
499 if (!in_lto_p
&& !symtab_node::get (it
))
501 targetm
.record_offload_symbol (it
);
506 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
507 axis DIM. Return a tmp var holding the result. */
510 oacc_dim_call (bool pos
, int dim
, gimple_seq
*seq
)
512 tree arg
= build_int_cst (unsigned_type_node
, dim
);
513 tree size
= create_tmp_var (integer_type_node
);
514 enum internal_fn fn
= pos
? IFN_GOACC_DIM_POS
: IFN_GOACC_DIM_SIZE
;
515 gimple
*call
= gimple_build_call_internal (fn
, 1, arg
);
517 gimple_call_set_lhs (call
, size
);
518 gimple_seq_add_stmt (seq
, call
);
523 /* Find the number of threads (POS = false), or thread number (POS =
524 true) for an OpenACC region partitioned as MASK. Setup code
525 required for the calculation is added to SEQ. */
528 oacc_thread_numbers (bool pos
, int mask
, gimple_seq
*seq
)
530 tree res
= pos
? NULL_TREE
: build_int_cst (unsigned_type_node
, 1);
533 /* Start at gang level, and examine relevant dimension indices. */
534 for (ix
= GOMP_DIM_GANG
; ix
!= GOMP_DIM_MAX
; ix
++)
535 if (GOMP_DIM_MASK (ix
) & mask
)
539 /* We had an outer index, so scale that by the size of
541 tree n
= oacc_dim_call (false, ix
, seq
);
542 res
= fold_build2 (MULT_EXPR
, integer_type_node
, res
, n
);
546 /* Determine index in this dimension. */
547 tree id
= oacc_dim_call (true, ix
, seq
);
549 res
= fold_build2 (PLUS_EXPR
, integer_type_node
, res
, id
);
555 if (res
== NULL_TREE
)
556 res
= integer_zero_node
;
561 /* Transform IFN_GOACC_LOOP calls to actual code. See
562 expand_oacc_for for where these are generated. At the vector
563 level, we stride loops, such that each member of a warp will
564 operate on adjacent iterations. At the worker and gang level,
565 each gang/warp executes a set of contiguous iterations. Chunking
566 can override this such that each iteration engine executes a
567 contiguous chunk, and then moves on to stride to the next chunk. */
570 oacc_xform_loop (gcall
*call
)
572 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
573 enum ifn_goacc_loop_kind code
574 = (enum ifn_goacc_loop_kind
) TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
575 tree dir
= gimple_call_arg (call
, 1);
576 tree range
= gimple_call_arg (call
, 2);
577 tree step
= gimple_call_arg (call
, 3);
578 tree chunk_size
= NULL_TREE
;
579 unsigned mask
= (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call
, 5));
580 tree lhs
= gimple_call_lhs (call
);
581 tree type
= NULL_TREE
;
582 tree diff_type
= TREE_TYPE (range
);
584 gimple_seq seq
= NULL
;
585 bool chunking
= false, striding
= true;
586 unsigned outer_mask
= mask
& (~mask
+ 1); // Outermost partitioning
587 unsigned inner_mask
= mask
& ~outer_mask
; // Inner partitioning (if any)
589 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
592 gsi_replace_with_seq (&gsi
, seq
, true);
596 type
= TREE_TYPE (lhs
);
598 #ifdef ACCEL_COMPILER
599 chunk_size
= gimple_call_arg (call
, 4);
600 if (integer_minus_onep (chunk_size
) /* Force static allocation. */
601 || integer_zerop (chunk_size
)) /* Default (also static). */
603 /* If we're at the gang level, we want each to execute a
604 contiguous run of iterations. Otherwise we want each element
606 striding
= !(outer_mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
));
611 /* Chunk of size 1 is striding. */
612 striding
= integer_onep (chunk_size
);
613 chunking
= !striding
;
617 /* striding=true, chunking=true
619 striding=true, chunking=false
621 striding=false,chunking=true
622 -> chunks=ceil (range/(chunksize*threads*step))
623 striding=false,chunking=false
624 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
625 push_gimplify_context (true);
629 default: gcc_unreachable ();
631 case IFN_GOACC_LOOP_CHUNKS
:
633 r
= build_int_cst (type
, 1);
637 = (range - dir) / (chunks * step * num_threads) + dir */
638 tree per
= oacc_thread_numbers (false, mask
, &seq
);
639 per
= fold_convert (type
, per
);
640 chunk_size
= fold_convert (type
, chunk_size
);
641 per
= fold_build2 (MULT_EXPR
, type
, per
, chunk_size
);
642 per
= fold_build2 (MULT_EXPR
, type
, per
, step
);
643 r
= build2 (MINUS_EXPR
, type
, range
, dir
);
644 r
= build2 (PLUS_EXPR
, type
, r
, per
);
645 r
= build2 (TRUNC_DIV_EXPR
, type
, r
, per
);
649 case IFN_GOACC_LOOP_STEP
:
651 /* If striding, step by the entire compute volume, otherwise
652 step by the inner volume. */
653 unsigned volume
= striding
? mask
: inner_mask
;
655 r
= oacc_thread_numbers (false, volume
, &seq
);
656 r
= build2 (MULT_EXPR
, type
, fold_convert (type
, r
), step
);
660 case IFN_GOACC_LOOP_OFFSET
:
661 /* Enable vectorization on non-SIMT targets. */
663 && outer_mask
== GOMP_DIM_MASK (GOMP_DIM_VECTOR
)
664 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
666 && (flag_tree_loop_vectorize
667 || !OPTION_SET_P (flag_tree_loop_vectorize
)))
669 basic_block bb
= gsi_bb (gsi
);
670 class loop
*parent
= bb
->loop_father
;
671 class loop
*body
= parent
->inner
;
673 parent
->force_vectorize
= true;
674 parent
->safelen
= INT_MAX
;
676 /* "Chunking loops" may have inner loops. */
679 body
->force_vectorize
= true;
680 body
->safelen
= INT_MAX
;
683 cfun
->has_force_vectorize_loops
= true;
687 r
= oacc_thread_numbers (true, mask
, &seq
);
688 r
= fold_convert (diff_type
, r
);
692 tree inner_size
= oacc_thread_numbers (false, inner_mask
, &seq
);
693 tree outer_size
= oacc_thread_numbers (false, outer_mask
, &seq
);
694 tree volume
= fold_build2 (MULT_EXPR
, TREE_TYPE (inner_size
),
695 inner_size
, outer_size
);
697 volume
= fold_convert (diff_type
, volume
);
699 chunk_size
= fold_convert (diff_type
, chunk_size
);
702 tree per
= fold_build2 (MULT_EXPR
, diff_type
, volume
, step
);
704 chunk_size
= build2 (MINUS_EXPR
, diff_type
, range
, dir
);
705 chunk_size
= build2 (PLUS_EXPR
, diff_type
, chunk_size
, per
);
706 chunk_size
= build2 (TRUNC_DIV_EXPR
, diff_type
, chunk_size
, per
);
709 tree span
= build2 (MULT_EXPR
, diff_type
, chunk_size
,
710 fold_convert (diff_type
, inner_size
));
711 r
= oacc_thread_numbers (true, outer_mask
, &seq
);
712 r
= fold_convert (diff_type
, r
);
713 r
= build2 (MULT_EXPR
, diff_type
, r
, span
);
715 tree inner
= oacc_thread_numbers (true, inner_mask
, &seq
);
716 inner
= fold_convert (diff_type
, inner
);
717 r
= fold_build2 (PLUS_EXPR
, diff_type
, r
, inner
);
721 tree chunk
= fold_convert (diff_type
, gimple_call_arg (call
, 6));
723 = fold_build2 (MULT_EXPR
, diff_type
, volume
, chunk_size
);
724 per
= build2 (MULT_EXPR
, diff_type
, per
, chunk
);
726 r
= build2 (PLUS_EXPR
, diff_type
, r
, per
);
729 r
= fold_build2 (MULT_EXPR
, diff_type
, r
, step
);
730 if (type
!= diff_type
)
731 r
= fold_convert (type
, r
);
734 case IFN_GOACC_LOOP_BOUND
:
739 tree inner_size
= oacc_thread_numbers (false, inner_mask
, &seq
);
740 tree outer_size
= oacc_thread_numbers (false, outer_mask
, &seq
);
741 tree volume
= fold_build2 (MULT_EXPR
, TREE_TYPE (inner_size
),
742 inner_size
, outer_size
);
744 volume
= fold_convert (diff_type
, volume
);
746 chunk_size
= fold_convert (diff_type
, chunk_size
);
749 tree per
= fold_build2 (MULT_EXPR
, diff_type
, volume
, step
);
751 chunk_size
= build2 (MINUS_EXPR
, diff_type
, range
, dir
);
752 chunk_size
= build2 (PLUS_EXPR
, diff_type
, chunk_size
, per
);
753 chunk_size
= build2 (TRUNC_DIV_EXPR
, diff_type
, chunk_size
, per
);
756 tree span
= build2 (MULT_EXPR
, diff_type
, chunk_size
,
757 fold_convert (diff_type
, inner_size
));
759 r
= fold_build2 (MULT_EXPR
, diff_type
, span
, step
);
761 tree offset
= gimple_call_arg (call
, 6);
762 r
= build2 (PLUS_EXPR
, diff_type
, r
,
763 fold_convert (diff_type
, offset
));
764 r
= build2 (integer_onep (dir
) ? MIN_EXPR
: MAX_EXPR
,
765 diff_type
, r
, range
);
767 if (diff_type
!= type
)
768 r
= fold_convert (type
, r
);
772 gimplify_assign (lhs
, r
, &seq
);
774 pop_gimplify_context (NULL
);
776 gsi_replace_with_seq (&gsi
, seq
, true);
779 /* Transform a GOACC_TILE call. Determines the element loop span for
780 the specified loop of the nest. This is 1 if we're not tiling.
782 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
785 oacc_xform_tile (gcall
*call
)
787 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
788 unsigned collapse
= tree_to_uhwi (gimple_call_arg (call
, 0));
789 /* Inner loops have higher loop_nos. */
790 unsigned loop_no
= tree_to_uhwi (gimple_call_arg (call
, 1));
791 tree tile_size
= gimple_call_arg (call
, 2);
792 unsigned e_mask
= tree_to_uhwi (gimple_call_arg (call
, 4));
793 tree lhs
= gimple_call_lhs (call
);
794 tree type
= TREE_TYPE (lhs
);
795 gimple_seq seq
= NULL
;
796 tree span
= build_int_cst (type
, 1);
799 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR
)
800 | GOMP_DIM_MASK (GOMP_DIM_WORKER
))));
801 push_gimplify_context (!seen_error ());
803 #ifndef ACCEL_COMPILER
804 /* Partitioning disabled on host compilers. */
808 /* Not paritioning. */
809 span
= integer_one_node
;
810 else if (!integer_zerop (tile_size
))
811 /* User explicitly specified size. */
815 /* Pick a size based on the paritioning of the element loop and
816 the number of loop nests. */
817 tree first_size
= NULL_TREE
;
818 tree second_size
= NULL_TREE
;
820 if (e_mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
))
821 first_size
= oacc_dim_call (false, GOMP_DIM_VECTOR
, &seq
);
822 if (e_mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
))
823 second_size
= oacc_dim_call (false, GOMP_DIM_WORKER
, &seq
);
827 first_size
= second_size
;
828 second_size
= NULL_TREE
;
831 if (loop_no
+ 1 == collapse
)
834 if (!loop_no
&& second_size
)
835 span
= fold_build2 (MULT_EXPR
, TREE_TYPE (span
),
838 else if (loop_no
+ 2 == collapse
)
844 /* There's no obvious element size for this loop. Options
845 are 1, first_size or some non-unity constant (32 is my
846 favourite). We should gather some statistics. */
850 span
= fold_convert (type
, span
);
851 gimplify_assign (lhs
, span
, &seq
);
853 pop_gimplify_context (NULL
);
855 gsi_replace_with_seq (&gsi
, seq
, true);
858 /* Default partitioned and minimum partitioned dimensions. */
860 static int oacc_default_dims
[GOMP_DIM_MAX
];
861 static int oacc_min_dims
[GOMP_DIM_MAX
];
864 oacc_get_default_dim (int dim
)
866 gcc_assert (0 <= dim
&& dim
< GOMP_DIM_MAX
);
867 return oacc_default_dims
[dim
];
871 oacc_get_min_dim (int dim
)
873 gcc_assert (0 <= dim
&& dim
< GOMP_DIM_MAX
);
874 return oacc_min_dims
[dim
];
877 /* Parse the default dimension parameter. This is a set of
878 :-separated optional compute dimensions. Each specified dimension
879 is a positive integer. When device type support is added, it is
880 planned to be a comma separated list of such compute dimensions,
881 with all but the first prefixed by the colon-terminated device
885 oacc_parse_default_dims (const char *dims
)
889 for (ix
= GOMP_DIM_MAX
; ix
--;)
891 oacc_default_dims
[ix
] = -1;
892 oacc_min_dims
[ix
] = 1;
895 #ifndef ACCEL_COMPILER
896 /* Cannot be overridden on the host. */
901 const char *pos
= dims
;
903 for (ix
= 0; *pos
&& ix
!= GOMP_DIM_MAX
; ix
++)
918 val
= strtol (pos
, CONST_CAST (char **, &eptr
), 10);
919 if (errno
|| val
<= 0 || (int) val
!= val
)
922 oacc_default_dims
[ix
] = (int) val
;
928 error_at (UNKNOWN_LOCATION
,
929 "%<-fopenacc-dim%> operand is malformed at %qs", pos
);
933 /* Allow the backend to validate the dimensions. */
934 targetm
.goacc
.validate_dims (NULL_TREE
, oacc_default_dims
, -1, 0);
935 targetm
.goacc
.validate_dims (NULL_TREE
, oacc_min_dims
, -2, 0);
938 /* Validate and update the dimensions for offloaded FN. ATTRS is the
939 raw attribute. DIMS is an array of dimensions, which is filled in.
940 LEVEL is the partitioning level of a routine, or -1 for an offload
941 region itself. USED is the mask of partitioned execution in the
945 oacc_validate_dims (tree fn
, tree attrs
, int *dims
, int level
, unsigned used
)
947 tree purpose
[GOMP_DIM_MAX
];
949 tree pos
= TREE_VALUE (attrs
);
951 /* Make sure the attribute creator attached the dimension
955 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
957 purpose
[ix
] = TREE_PURPOSE (pos
);
958 tree val
= TREE_VALUE (pos
);
959 dims
[ix
] = val
? TREE_INT_CST_LOW (val
) : -1;
960 pos
= TREE_CHAIN (pos
);
964 #ifdef ACCEL_COMPILER
968 && warn_openacc_parallelism
969 && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn
)))
971 static char const *const axes
[] =
972 /* Must be kept in sync with GOMP_DIM enumeration. */
973 { "gang", "worker", "vector" };
974 for (ix
= level
>= 0 ? level
: 0; ix
!= GOMP_DIM_MAX
; ix
++)
976 ; /* Defaulting axis. */
977 else if ((used
& GOMP_DIM_MASK (ix
)) && dims
[ix
] == 1)
978 /* There is partitioned execution, but the user requested a
979 dimension size of 1. They're probably confused. */
980 warning_at (DECL_SOURCE_LOCATION (fn
), OPT_Wopenacc_parallelism
,
981 "region contains %s partitioned code but"
982 " is not %s partitioned", axes
[ix
], axes
[ix
]);
983 else if (!(used
& GOMP_DIM_MASK (ix
)) && dims
[ix
] != 1)
984 /* The dimension is explicitly partitioned to non-unity, but
985 no use is made within the region. */
986 warning_at (DECL_SOURCE_LOCATION (fn
), OPT_Wopenacc_parallelism
,
987 "region is %s partitioned but"
988 " does not contain %s partitioned code",
992 bool changed
= targetm
.goacc
.validate_dims (fn
, dims
, level
, used
);
994 /* Default anything left to 1 or a partitioned default. */
995 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
998 /* The OpenACC spec says 'If the [num_gangs] clause is not
999 specified, an implementation-defined default will be used;
1000 the default may depend on the code within the construct.'
1001 (2.5.6). Thus an implementation is free to choose
1002 non-unity default for a parallel region that doesn't have
1003 any gang-partitioned loops. However, it appears that there
1004 is a sufficient body of user code that expects non-gang
1005 partitioned regions to not execute in gang-redundant mode.
1006 So we (a) don't warn about the non-portability and (b) pick
1007 the minimum permissible dimension size when there is no
1008 partitioned execution. Otherwise we pick the global
1009 default for the dimension, which the user can control. The
1010 same wording and logic applies to num_workers and
1011 vector_length, however the worker- or vector- single
1012 execution doesn't have the same impact as gang-redundant
1013 execution. (If the minimum gang-level partioning is not 1,
1014 the target is probably too confusing.) */
1015 dims
[ix
] = (used
& GOMP_DIM_MASK (ix
)
1016 ? oacc_default_dims
[ix
] : oacc_min_dims
[ix
]);
1022 /* Replace the attribute with new values. */
1024 for (ix
= GOMP_DIM_MAX
; ix
--;)
1025 pos
= tree_cons (purpose
[ix
],
1026 build_int_cst (integer_type_node
, dims
[ix
]), pos
);
1027 oacc_replace_fn_attrib (fn
, pos
);
1031 /* Create an empty OpenACC loop structure at LOC. */
1034 new_oacc_loop_raw (oacc_loop
*parent
, location_t loc
)
1036 oacc_loop
*loop
= XCNEW (oacc_loop
);
1038 loop
->parent
= parent
;
1042 loop
->sibling
= parent
->child
;
1043 parent
->child
= loop
;
1050 /* Create an outermost, dummy OpenACC loop for offloaded function
1054 new_oacc_loop_outer (tree decl
)
1056 return new_oacc_loop_raw (NULL
, DECL_SOURCE_LOCATION (decl
));
1059 /* Start a new OpenACC loop structure beginning at head marker HEAD.
1060 Link into PARENT loop. Return the new loop. */
1063 new_oacc_loop (oacc_loop
*parent
, gcall
*marker
)
1065 oacc_loop
*loop
= new_oacc_loop_raw (parent
, gimple_location (marker
));
1067 loop
->marker
= marker
;
1069 /* TODO: This is where device_type flattening would occur for the loop
1072 loop
->flags
= TREE_INT_CST_LOW (gimple_call_arg (marker
, 3));
1074 tree chunk_size
= integer_zero_node
;
1075 if (loop
->flags
& OLF_GANG_STATIC
)
1076 chunk_size
= gimple_call_arg (marker
, 4);
1077 loop
->chunk_size
= chunk_size
;
1082 /* Create a dummy loop encompassing a call to a openACC routine.
1083 Extract the routine's partitioning requirements. */
1086 new_oacc_loop_routine (oacc_loop
*parent
, gcall
*call
, tree decl
, tree attrs
)
1088 oacc_loop
*loop
= new_oacc_loop_raw (parent
, gimple_location (call
));
1089 int level
= oacc_fn_attrib_level (attrs
);
1091 gcc_assert (level
>= 0);
1093 loop
->marker
= call
;
1094 loop
->routine
= decl
;
1095 loop
->mask
= ((GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1)
1096 ^ (GOMP_DIM_MASK (level
) - 1));
1099 /* Finish off the current OpenACC loop ending at tail marker TAIL.
1100 Return the parent loop. */
1103 finish_oacc_loop (oacc_loop
*loop
)
1105 /* If the loop has been collapsed, don't partition it. */
1106 if (loop
->ifns
.is_empty ())
1107 loop
->mask
= loop
->flags
= 0;
1108 return loop
->parent
;
1111 /* Free all OpenACC loop structures within LOOP (inclusive). */
1114 free_oacc_loop (oacc_loop
*loop
)
1117 free_oacc_loop (loop
->sibling
);
1119 free_oacc_loop (loop
->child
);
1121 loop
->ifns
.release ();
1125 /* Dump out the OpenACC loop head or tail beginning at FROM. */
1128 dump_oacc_loop_part (FILE *file
, gcall
*from
, int depth
,
1129 const char *title
, int level
)
1131 enum ifn_unique_kind kind
1132 = (enum ifn_unique_kind
) TREE_INT_CST_LOW (gimple_call_arg (from
, 0));
1134 fprintf (file
, "%*s%s-%d:\n", depth
* 2, "", title
, level
);
1135 for (gimple_stmt_iterator gsi
= gsi_for_stmt (from
);;)
1137 gimple
*stmt
= gsi_stmt (gsi
);
1139 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
1141 enum ifn_unique_kind k
1142 = ((enum ifn_unique_kind
) TREE_INT_CST_LOW
1143 (gimple_call_arg (stmt
, 0)));
1145 if (k
== kind
&& stmt
!= from
)
1148 print_gimple_stmt (file
, stmt
, depth
* 2 + 2);
1151 while (gsi_end_p (gsi
))
1152 gsi
= gsi_start_bb (single_succ (gsi_bb (gsi
)));
1156 /* Dump OpenACC loop LOOP, its children, and its siblings. */
1159 dump_oacc_loop (FILE *file
, oacc_loop
*loop
, int depth
)
1163 fprintf (file
, "%*sLoop %x(%x) %s:%u\n", depth
* 2, "",
1164 loop
->flags
, loop
->mask
,
1165 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
));
1168 print_gimple_stmt (file
, loop
->marker
, depth
* 2);
1171 fprintf (file
, "%*sRoutine %s:%u:%s\n",
1172 depth
* 2, "", DECL_SOURCE_FILE (loop
->routine
),
1173 DECL_SOURCE_LINE (loop
->routine
),
1174 IDENTIFIER_POINTER (DECL_NAME (loop
->routine
)));
1176 for (ix
= GOMP_DIM_GANG
; ix
!= GOMP_DIM_MAX
; ix
++)
1177 if (loop
->heads
[ix
])
1178 dump_oacc_loop_part (file
, loop
->heads
[ix
], depth
, "Head", ix
);
1179 for (ix
= GOMP_DIM_MAX
; ix
--;)
1180 if (loop
->tails
[ix
])
1181 dump_oacc_loop_part (file
, loop
->tails
[ix
], depth
, "Tail", ix
);
1184 dump_oacc_loop (file
, loop
->child
, depth
+ 1);
1186 dump_oacc_loop (file
, loop
->sibling
, depth
);
1189 void debug_oacc_loop (oacc_loop
*);
1191 /* Dump loops to stderr. */
1194 debug_oacc_loop (oacc_loop
*loop
)
1196 dump_oacc_loop (stderr
, loop
, 0);
1199 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
1203 inform_oacc_loop (const oacc_loop
*loop
)
1206 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
) ? " gang" : "";
1208 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
) ? " worker" : "";
1210 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
) ? " vector" : "";
1211 const char *seq
= loop
->mask
== 0 ? " seq" : "";
1212 const dump_user_location_t loc
1213 = dump_user_location_t::from_location_t (loop
->loc
);
1214 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, loc
,
1215 "assigned OpenACC%s%s%s%s loop parallelism\n", gang
, worker
,
1219 inform_oacc_loop (loop
->child
);
1221 inform_oacc_loop (loop
->sibling
);
1224 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
1225 structures as we go. By construction these loops are properly
1229 oacc_loop_discover_walk (oacc_loop
*loop
, basic_block bb
)
1234 if (bb
->flags
& BB_VISITED
)
1238 bb
->flags
|= BB_VISITED
;
1240 /* Scan for loop markers. */
1241 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
1244 gimple
*stmt
= gsi_stmt (gsi
);
1246 if (!is_gimple_call (stmt
))
1249 gcall
*call
= as_a
<gcall
*> (stmt
);
1251 /* If this is a routine, make a dummy loop for it. */
1252 if (tree decl
= gimple_call_fndecl (call
))
1253 if (tree attrs
= oacc_get_fn_attrib (decl
))
1255 gcc_assert (!marker
);
1256 new_oacc_loop_routine (loop
, call
, decl
, attrs
);
1259 if (!gimple_call_internal_p (call
))
1262 switch (gimple_call_internal_fn (call
))
1267 case IFN_GOACC_LOOP
:
1268 case IFN_GOACC_TILE
:
1269 /* Record the abstraction function, so we can manipulate it
1271 loop
->ifns
.safe_push (call
);
1275 enum ifn_unique_kind kind
1276 = (enum ifn_unique_kind
) (TREE_INT_CST_LOW
1277 (gimple_call_arg (call
, 0)));
1278 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
1279 || kind
== IFN_UNIQUE_OACC_TAIL_MARK
)
1281 if (gimple_call_num_args (call
) == 2)
1283 gcc_assert (marker
&& !remaining
);
1285 if (kind
== IFN_UNIQUE_OACC_TAIL_MARK
)
1286 loop
= finish_oacc_loop (loop
);
1288 loop
->head_end
= call
;
1292 int count
= TREE_INT_CST_LOW (gimple_call_arg (call
, 2));
1296 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
)
1297 loop
= new_oacc_loop (loop
, call
);
1300 gcc_assert (count
== remaining
);
1304 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
)
1305 loop
->heads
[marker
] = call
;
1307 loop
->tails
[remaining
] = call
;
1314 if (remaining
|| marker
)
1316 bb
= single_succ (bb
);
1317 gcc_assert (single_pred_p (bb
) && !(bb
->flags
& BB_VISITED
));
1321 /* Walk successor blocks. */
1325 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1326 oacc_loop_discover_walk (loop
, e
->dest
);
1329 /* LOOP is the first sibling. Reverse the order in place and return
1330 the new first sibling. Recurse to child loops. */
1333 oacc_loop_sibling_nreverse (oacc_loop
*loop
)
1335 oacc_loop
*last
= NULL
;
1339 loop
->child
= oacc_loop_sibling_nreverse (loop
->child
);
1341 oacc_loop
*next
= loop
->sibling
;
1342 loop
->sibling
= last
;
1351 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1352 the current function. */
1355 oacc_loop_discovery ()
1357 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1358 in the following. */
1361 oacc_loop
*top
= new_oacc_loop_outer (current_function_decl
);
1362 oacc_loop_discover_walk (top
, ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1364 /* The siblings were constructed in reverse order, reverse them so
1365 that diagnostics come out in an unsurprising order. */
1366 top
= oacc_loop_sibling_nreverse (top
);
1371 /* Transform the abstract internal function markers starting at FROM
1372 to be for partitioning level LEVEL. Stop when we meet another HEAD
1376 oacc_loop_xform_head_tail (gcall
*from
, int level
)
1378 enum ifn_unique_kind kind
1379 = (enum ifn_unique_kind
) TREE_INT_CST_LOW (gimple_call_arg (from
, 0));
1380 tree replacement
= build_int_cst (unsigned_type_node
, level
);
1382 for (gimple_stmt_iterator gsi
= gsi_for_stmt (from
);;)
1384 gimple
*stmt
= gsi_stmt (gsi
);
1386 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
1388 enum ifn_unique_kind k
1389 = ((enum ifn_unique_kind
)
1390 TREE_INT_CST_LOW (gimple_call_arg (stmt
, 0)));
1392 if (k
== IFN_UNIQUE_OACC_FORK
1393 || k
== IFN_UNIQUE_OACC_JOIN
1394 || k
== IFN_UNIQUE_OACC_PRIVATE
)
1395 *gimple_call_arg_ptr (stmt
, 2) = replacement
;
1396 else if (k
== kind
&& stmt
!= from
)
1399 else if (gimple_call_internal_p (stmt
, IFN_GOACC_REDUCTION
))
1400 *gimple_call_arg_ptr (stmt
, 3) = replacement
;
1404 while (gsi_end_p (gsi
))
1405 gsi
= gsi_start_bb (single_succ (gsi_bb (gsi
)));
1409 /* Process the discovered OpenACC loops, setting the correct
1410 partitioning level etc. */
1413 oacc_loop_process (oacc_loop
*loop
, int fn_level
)
1416 oacc_loop_process (loop
->child
, fn_level
);
1418 if (loop
->mask
&& !loop
->routine
)
1421 tree mask_arg
= build_int_cst (unsigned_type_node
, loop
->mask
);
1422 tree e_mask_arg
= build_int_cst (unsigned_type_node
, loop
->e_mask
);
1423 tree chunk_arg
= loop
->chunk_size
;
1426 for (ix
= 0; loop
->ifns
.iterate (ix
, &call
); ix
++)
1428 switch (gimple_call_internal_fn (call
))
1430 case IFN_GOACC_LOOP
:
1432 bool is_e
= gimple_call_arg (call
, 5) == integer_minus_one_node
;
1433 gimple_call_set_arg (call
, 5, is_e
? e_mask_arg
: mask_arg
);
1435 gimple_call_set_arg (call
, 4, chunk_arg
);
1439 case IFN_GOACC_TILE
:
1440 gimple_call_set_arg (call
, 3, mask_arg
);
1441 gimple_call_set_arg (call
, 4, e_mask_arg
);
1450 unsigned dim
= GOMP_DIM_GANG
;
1451 unsigned mask
= loop
->mask
| loop
->e_mask
;
1452 for (ix
= 0; ix
!= GOMP_DIM_MAX
&& mask
; ix
++)
1454 while (!(GOMP_DIM_MASK (dim
) & mask
))
1457 oacc_loop_xform_head_tail (loop
->heads
[ix
], dim
);
1458 oacc_loop_xform_head_tail (loop
->tails
[ix
], dim
);
1460 mask
^= GOMP_DIM_MASK (dim
);
1465 oacc_loop_process (loop
->sibling
, fn_level
);
1468 /* OpenACC 2.6, 2.9.11. "reduction clause" places a restriction such that
1469 "The 'reduction' clause may not be specified on an orphaned 'loop'
1470 construct with the 'gang' clause, or on an orphaned 'loop' construct that
1471 will generate gang parallelism in a procedure that is compiled with the
1472 'routine gang' clause." */
1473 if (fn_level
== GOMP_DIM_GANG
1474 && (loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
))
1475 && (loop
->flags
& OLF_REDUCTION
))
1476 error_at (loop
->loc
,
1477 "gang reduction on an orphan loop");
1480 /* Walk the OpenACC loop heirarchy checking and assigning the
1481 programmer-specified partitionings. OUTER_MASK is the partitioning
1482 this loop is contained within. Return mask of partitioning
1483 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1487 oacc_loop_fixed_partitions (oacc_loop
*loop
, unsigned outer_mask
)
1489 unsigned this_mask
= loop
->mask
;
1490 unsigned mask_all
= 0;
1493 #ifdef ACCEL_COMPILER
1494 /* When device_type is supported, we want the device compiler to be
1495 noisy, if the loop parameters are device_type-specific. */
1501 bool auto_par
= (loop
->flags
& OLF_AUTO
) != 0;
1502 bool seq_par
= (loop
->flags
& OLF_SEQ
) != 0;
1503 bool tiling
= (loop
->flags
& OLF_TILE
) != 0;
1505 this_mask
= ((loop
->flags
>> OLF_DIM_BASE
)
1506 & (GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1));
1508 /* Apply auto partitioning if this is a non-partitioned regular
1509 loop, or (no more than) single axis tiled loop. */
1511 = !seq_par
&& this_mask
== (tiling
? this_mask
& -this_mask
: 0);
1513 if ((this_mask
!= 0) + auto_par
+ seq_par
> 1)
1516 error_at (loop
->loc
,
1518 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1519 : G_("%<auto%> conflicts with other OpenACC loop "
1522 loop
->flags
&= ~OLF_AUTO
;
1526 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1) << OLF_DIM_BASE
);
1531 if (maybe_auto
&& (loop
->flags
& OLF_INDEPENDENT
))
1533 loop
->flags
|= OLF_AUTO
;
1534 mask_all
|= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1538 if (this_mask
& outer_mask
)
1540 const oacc_loop
*outer
;
1541 for (outer
= loop
->parent
; outer
; outer
= outer
->parent
)
1542 if ((outer
->mask
| outer
->e_mask
) & this_mask
)
1549 error_at (loop
->loc
,
1551 ? G_("routine call uses same OpenACC parallelism"
1552 " as containing loop")
1553 : G_("inner loop uses same OpenACC parallelism"
1554 " as containing loop"));
1555 inform (outer
->loc
, "containing loop here");
1558 error_at (loop
->loc
,
1560 ? G_("routine call uses OpenACC parallelism disallowed"
1561 " by containing routine")
1562 : G_("loop uses OpenACC parallelism disallowed"
1563 " by containing routine"));
1566 inform (DECL_SOURCE_LOCATION (loop
->routine
),
1567 "routine %qD declared here", loop
->routine
);
1569 this_mask
&= ~outer_mask
;
1573 unsigned outermost
= least_bit_hwi (this_mask
);
1575 if (outermost
&& outermost
<= outer_mask
)
1579 error_at (loop
->loc
,
1580 "incorrectly nested OpenACC loop parallelism");
1582 const oacc_loop
*outer
;
1583 for (outer
= loop
->parent
;
1584 outer
->flags
&& outer
->flags
< outermost
;
1585 outer
= outer
->parent
)
1587 inform (outer
->loc
, "containing loop here");
1590 this_mask
&= ~outermost
;
1594 mask_all
|= this_mask
;
1596 if (loop
->flags
& OLF_TILE
)
1598 /* When tiling, vector goes to the element loop, and failing
1599 that we put worker there. The std doesn't contemplate
1600 specifying all three. We choose to put worker and vector on
1601 the element loops in that case. */
1602 unsigned this_e_mask
= this_mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
);
1603 if (!this_e_mask
|| this_mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
))
1604 this_e_mask
|= this_mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
);
1606 loop
->e_mask
= this_e_mask
;
1607 this_mask
^= this_e_mask
;
1610 loop
->mask
= this_mask
;
1613 fprintf (dump_file
, "Loop %s:%d user specified %d & %d\n",
1614 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
),
1615 loop
->mask
, loop
->e_mask
);
1619 unsigned tmp_mask
= outer_mask
| this_mask
| loop
->e_mask
;
1620 loop
->inner
= oacc_loop_fixed_partitions (loop
->child
, tmp_mask
);
1621 mask_all
|= loop
->inner
;
1625 mask_all
|= oacc_loop_fixed_partitions (loop
->sibling
, outer_mask
);
1630 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1631 OUTER_MASK is the partitioning this loop is contained within.
1632 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1633 Return the cumulative partitioning used by this loop, siblings and
1637 oacc_loop_auto_partitions (oacc_loop
*loop
, unsigned outer_mask
,
1640 bool assign
= (loop
->flags
& OLF_AUTO
) && (loop
->flags
& OLF_INDEPENDENT
);
1642 bool tiling
= loop
->flags
& OLF_TILE
;
1644 #ifdef ACCEL_COMPILER
1645 /* When device_type is supported, we want the device compiler to be
1646 noisy, if the loop parameters are device_type-specific. */
1650 if (assign
&& (!outer_assign
|| loop
->inner
))
1652 /* Allocate outermost and non-innermost loops at the outermost
1653 non-innermost available level. */
1654 unsigned this_mask
= GOMP_DIM_MASK (GOMP_DIM_GANG
);
1656 /* Find the first outermost available partition. */
1657 while (this_mask
<= outer_mask
)
1660 /* Grab two axes if tiling, and we've not assigned anything */
1661 if (tiling
&& !(loop
->mask
| loop
->e_mask
))
1662 this_mask
|= this_mask
<< 1;
1664 /* Prohibit the innermost partitioning at the moment. */
1665 this_mask
&= GOMP_DIM_MASK (GOMP_DIM_MAX
- 1) - 1;
1667 /* Don't use any dimension explicitly claimed by an inner loop. */
1668 this_mask
&= ~loop
->inner
;
1670 if (tiling
&& !loop
->e_mask
)
1672 /* If we got two axes, allocate the inner one to the element
1674 loop
->e_mask
= this_mask
& (this_mask
<< 1);
1675 this_mask
^= loop
->e_mask
;
1678 loop
->mask
|= this_mask
;
1683 unsigned tmp_mask
= outer_mask
| loop
->mask
| loop
->e_mask
;
1684 loop
->inner
= oacc_loop_auto_partitions (loop
->child
, tmp_mask
,
1685 outer_assign
| assign
);
1688 if (assign
&& (!loop
->mask
|| (tiling
&& !loop
->e_mask
) || !outer_assign
))
1690 /* Allocate the loop at the innermost available level. Note
1691 that we do this even if we already assigned this loop the
1692 outermost available level above. That way we'll partition
1693 this along 2 axes, if they are available. */
1694 unsigned this_mask
= 0;
1696 /* Determine the outermost partitioning used within this loop. */
1697 this_mask
= loop
->inner
| GOMP_DIM_MASK (GOMP_DIM_MAX
);
1698 this_mask
= least_bit_hwi (this_mask
);
1700 /* Pick the partitioning just inside that one. */
1703 /* And avoid picking one use by an outer loop. */
1704 this_mask
&= ~outer_mask
;
1706 /* If tiling and we failed completely above, grab the next one
1707 too. Making sure it doesn't hit an outer loop. */
1710 this_mask
&= ~(loop
->e_mask
| loop
->mask
);
1711 unsigned tile_mask
= ((this_mask
>> 1)
1712 & ~(outer_mask
| loop
->e_mask
| loop
->mask
));
1714 if (tile_mask
|| loop
->mask
)
1716 loop
->e_mask
|= this_mask
;
1717 this_mask
= tile_mask
;
1719 if (!loop
->e_mask
&& noisy
)
1720 warning_at (loop
->loc
, 0,
1721 "insufficient partitioning available"
1722 " to parallelize element loop");
1725 loop
->mask
|= this_mask
;
1726 if (!loop
->mask
&& noisy
)
1727 warning_at (loop
->loc
, 0,
1729 ? G_("insufficient partitioning available"
1730 " to parallelize tile loop")
1731 : G_("insufficient partitioning available"
1732 " to parallelize loop"));
1735 if (assign
&& dump_file
)
1736 fprintf (dump_file
, "Auto loop %s:%d assigned %d & %d\n",
1737 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
),
1738 loop
->mask
, loop
->e_mask
);
1740 unsigned inner_mask
= 0;
1743 inner_mask
|= oacc_loop_auto_partitions (loop
->sibling
,
1744 outer_mask
, outer_assign
);
1746 inner_mask
|= loop
->inner
| loop
->mask
| loop
->e_mask
;
1751 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1752 axes. Return mask of partitioning. */
1755 oacc_loop_partition (oacc_loop
*loop
, unsigned outer_mask
)
1757 unsigned mask_all
= oacc_loop_fixed_partitions (loop
, outer_mask
);
1759 if (mask_all
& GOMP_DIM_MASK (GOMP_DIM_MAX
))
1761 mask_all
^= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1762 mask_all
|= oacc_loop_auto_partitions (loop
, outer_mask
, false);
1767 /* Default fork/join early expander. Delete the function calls if
1768 there is no RTL expander. */
1771 default_goacc_fork_join (gcall
*ARG_UNUSED (call
),
1772 const int *ARG_UNUSED (dims
), bool is_fork
)
1775 return targetm
.have_oacc_fork ();
1777 return targetm
.have_oacc_join ();
1780 /* Default goacc.reduction early expander.
1782 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1783 If RES_PTR is not integer-zerop:
1784 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1785 TEARDOWN - emit '*RES_PTR = VAR'
1790 default_goacc_reduction (gcall
*call
)
1792 unsigned code
= (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
1793 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
1794 tree lhs
= gimple_call_lhs (call
);
1795 tree var
= gimple_call_arg (call
, 2);
1796 gimple_seq seq
= NULL
;
1798 if (code
== IFN_GOACC_REDUCTION_SETUP
1799 || code
== IFN_GOACC_REDUCTION_TEARDOWN
)
1801 /* Setup and Teardown need to copy from/to the receiver object,
1803 tree ref_to_res
= gimple_call_arg (call
, 1);
1805 if (!integer_zerop (ref_to_res
))
1807 tree dst
= build_simple_mem_ref (ref_to_res
);
1810 if (code
== IFN_GOACC_REDUCTION_SETUP
)
1816 gimple_seq_add_stmt (&seq
, gimple_build_assign (dst
, src
));
1820 /* Copy VAR to LHS, if there is an LHS. */
1822 gimple_seq_add_stmt (&seq
, gimple_build_assign (lhs
, var
));
1824 gsi_replace_with_seq (&gsi
, seq
, true);
1827 struct var_decl_rewrite_info
1830 hash_map
<tree
, tree
> *adjusted_vars
;
1831 bool avoid_pointer_conversion
;
1835 /* Helper function for execute_oacc_device_lower. Rewrite VAR_DECLs (by
1836 themselves or wrapped in various other nodes) according to ADJUSTED_VARS in
1837 the var_decl_rewrite_info pointed to via DATA. Used as part of coercing
1838 gang-private variables in OpenACC offload regions to reside in GPU shared
1842 oacc_rewrite_var_decl (tree
*tp
, int *walk_subtrees
, void *data
)
1844 walk_stmt_info
*wi
= (walk_stmt_info
*) data
;
1845 var_decl_rewrite_info
*info
= (var_decl_rewrite_info
*) wi
->info
;
1847 if (TREE_CODE (*tp
) == ADDR_EXPR
)
1849 tree arg
= TREE_OPERAND (*tp
, 0);
1850 tree
*new_arg
= info
->adjusted_vars
->get (arg
);
1854 if (info
->avoid_pointer_conversion
)
1856 *tp
= build_fold_addr_expr (*new_arg
);
1857 info
->modified
= true;
1862 gimple_stmt_iterator gsi
= gsi_for_stmt (info
->stmt
);
1863 tree repl
= build_fold_addr_expr (*new_arg
);
1865 = gimple_build_assign (make_ssa_name (TREE_TYPE (repl
)), repl
);
1866 tree conv
= convert_to_pointer (TREE_TYPE (*tp
),
1867 gimple_assign_lhs (stmt1
));
1869 = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp
)), conv
);
1870 gsi_insert_before (&gsi
, stmt1
, GSI_SAME_STMT
);
1871 gsi_insert_before (&gsi
, stmt2
, GSI_SAME_STMT
);
1872 *tp
= gimple_assign_lhs (stmt2
);
1873 info
->modified
= true;
1878 else if (TREE_CODE (*tp
) == COMPONENT_REF
|| TREE_CODE (*tp
) == ARRAY_REF
)
1880 tree
*base
= &TREE_OPERAND (*tp
, 0);
1882 while (TREE_CODE (*base
) == COMPONENT_REF
1883 || TREE_CODE (*base
) == ARRAY_REF
)
1884 base
= &TREE_OPERAND (*base
, 0);
1886 if (TREE_CODE (*base
) != VAR_DECL
)
1889 tree
*new_decl
= info
->adjusted_vars
->get (*base
);
1893 int base_quals
= TYPE_QUALS (TREE_TYPE (*new_decl
));
1894 tree field
= TREE_OPERAND (*tp
, 1);
1896 /* Adjust the type of the field. */
1897 int field_quals
= TYPE_QUALS (TREE_TYPE (field
));
1898 if (TREE_CODE (field
) == FIELD_DECL
&& field_quals
!= base_quals
)
1900 tree
*field_type
= &TREE_TYPE (field
);
1901 while (TREE_CODE (*field_type
) == ARRAY_TYPE
)
1902 field_type
= &TREE_TYPE (*field_type
);
1903 field_quals
|= base_quals
;
1904 *field_type
= build_qualified_type (*field_type
, field_quals
);
1907 /* Adjust the type of the component ref itself. */
1908 tree comp_type
= TREE_TYPE (*tp
);
1909 int comp_quals
= TYPE_QUALS (comp_type
);
1910 if (TREE_CODE (*tp
) == COMPONENT_REF
&& comp_quals
!= base_quals
)
1912 comp_quals
|= base_quals
;
1914 = build_qualified_type (comp_type
, comp_quals
);
1918 info
->modified
= true;
1920 else if (VAR_P (*tp
))
1922 tree
*new_decl
= info
->adjusted_vars
->get (*tp
);
1926 info
->modified
= true;
1933 /* Return TRUE if CALL is a call to a builtin atomic/sync operation. */
1936 is_sync_builtin_call (gcall
*call
)
1938 tree callee
= gimple_call_fndecl (call
);
1940 if (callee
!= NULL_TREE
1941 && gimple_call_builtin_p (call
, BUILT_IN_NORMAL
))
1942 switch (DECL_FUNCTION_CODE (callee
))
1944 #undef DEF_SYNC_BUILTIN
1945 #define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
1946 #include "sync-builtins.def"
1947 #undef DEF_SYNC_BUILTIN
1957 /* Main entry point for oacc transformations which run on the device
1958 compiler after LTO, so we know what the target device is at this
1959 point (including the host fallback). */
1962 execute_oacc_loop_designation ()
1964 tree attrs
= oacc_get_fn_attrib (current_function_decl
);
1967 /* Not an offloaded function. */
1970 /* Parse the default dim argument exactly once. */
1971 if ((const void *)flag_openacc_dims
!= &flag_openacc_dims
)
1973 oacc_parse_default_dims (flag_openacc_dims
);
1974 flag_openacc_dims
= (char *)&flag_openacc_dims
;
1977 bool is_oacc_parallel
1978 = (lookup_attribute ("oacc parallel",
1979 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1980 bool is_oacc_kernels
1981 = (lookup_attribute ("oacc kernels",
1982 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1984 = (lookup_attribute ("oacc serial",
1985 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1986 bool is_oacc_parallel_kernels_parallelized
1987 = (lookup_attribute ("oacc parallel_kernels_parallelized",
1988 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1989 bool is_oacc_parallel_kernels_gang_single
1990 = (lookup_attribute ("oacc parallel_kernels_gang_single",
1991 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1992 int fn_level
= oacc_fn_attrib_level (attrs
);
1993 bool is_oacc_routine
= (fn_level
>= 0);
1994 gcc_checking_assert (is_oacc_parallel
1997 + is_oacc_parallel_kernels_parallelized
1998 + is_oacc_parallel_kernels_gang_single
2002 bool is_oacc_kernels_parallelized
2003 = (lookup_attribute ("oacc kernels parallelized",
2004 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
2005 if (is_oacc_kernels_parallelized
)
2006 gcc_checking_assert (is_oacc_kernels
);
2010 if (is_oacc_parallel
)
2011 fprintf (dump_file
, "Function is OpenACC parallel offload\n");
2012 else if (is_oacc_kernels
)
2013 fprintf (dump_file
, "Function is %s OpenACC kernels offload\n",
2014 (is_oacc_kernels_parallelized
2015 ? "parallelized" : "unparallelized"));
2016 else if (is_oacc_serial
)
2017 fprintf (dump_file
, "Function is OpenACC serial offload\n");
2018 else if (is_oacc_parallel_kernels_parallelized
)
2019 fprintf (dump_file
, "Function is %s OpenACC kernels offload\n",
2020 "parallel_kernels_parallelized");
2021 else if (is_oacc_parallel_kernels_gang_single
)
2022 fprintf (dump_file
, "Function is %s OpenACC kernels offload\n",
2023 "parallel_kernels_gang_single");
2024 else if (is_oacc_routine
)
2025 fprintf (dump_file
, "Function is OpenACC routine level %d\n",
2031 /* This doesn't belong into 'pass_oacc_loop_designation' conceptually, but
2032 it's a convenient place, so... */
2033 if (is_oacc_routine
)
2035 tree attr
= lookup_attribute ("omp declare target",
2036 DECL_ATTRIBUTES (current_function_decl
));
2037 gcc_checking_assert (attr
);
2038 tree clauses
= TREE_VALUE (attr
);
2039 gcc_checking_assert (clauses
);
2041 /* Should this OpenACC routine be discarded? */
2042 bool discard
= false;
2044 tree clause_nohost
= omp_find_clause (clauses
, OMP_CLAUSE_NOHOST
);
2047 "OpenACC routine '%s' %s '%s' clause.\n",
2048 lang_hooks
.decl_printable_name (current_function_decl
, 2),
2049 clause_nohost
? "has" : "doesn't have",
2050 omp_clause_code_name
[OMP_CLAUSE_NOHOST
]);
2051 /* Host compiler, 'nohost' clause? */
2052 #ifndef ACCEL_COMPILER
2059 "OpenACC routine '%s' %sdiscarded.\n",
2060 lang_hooks
.decl_printable_name (current_function_decl
, 2),
2061 discard
? "" : "not ");
2064 TREE_ASM_WRITTEN (current_function_decl
) = 1;
2065 return TODO_discard_function
;
2069 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
2070 kernels, so remove the parallelism dimensions function attributes
2071 potentially set earlier on. */
2072 if (is_oacc_kernels
&& !is_oacc_kernels_parallelized
)
2074 oacc_set_fn_attrib (current_function_decl
, NULL
, NULL
);
2075 attrs
= oacc_get_fn_attrib (current_function_decl
);
2078 /* Discover, partition and process the loops. */
2079 oacc_loop
*loops
= oacc_loop_discovery ();
2081 unsigned outer_mask
= 0;
2082 if (is_oacc_routine
)
2083 outer_mask
= GOMP_DIM_MASK (fn_level
) - 1;
2084 unsigned used_mask
= oacc_loop_partition (loops
, outer_mask
);
2085 /* OpenACC kernels constructs are special: they currently don't use the
2086 generic oacc_loop infrastructure and attribute/dimension processing. */
2087 if (is_oacc_kernels
&& is_oacc_kernels_parallelized
)
2089 /* Parallelized OpenACC kernels constructs use gang parallelism. See
2090 also tree-parloops.cc:create_parallel_loop. */
2091 used_mask
|= GOMP_DIM_MASK (GOMP_DIM_GANG
);
2094 int dims
[GOMP_DIM_MAX
];
2095 oacc_validate_dims (current_function_decl
, attrs
, dims
, fn_level
, used_mask
);
2099 const char *comma
= "Compute dimensions [";
2100 for (int ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++, comma
= ", ")
2101 fprintf (dump_file
, "%s%d", comma
, dims
[ix
]);
2102 fprintf (dump_file
, "]\n");
2105 /* Verify that for OpenACC 'kernels' decomposed "gang-single" parts we launch
2106 a single gang only. */
2107 if (is_oacc_parallel_kernels_gang_single
)
2108 gcc_checking_assert (dims
[GOMP_DIM_GANG
] == 1);
2110 oacc_loop_process (loops
, fn_level
);
2113 fprintf (dump_file
, "OpenACC loops\n");
2114 dump_oacc_loop (dump_file
, loops
, 0);
2115 fprintf (dump_file
, "\n");
2117 if (dump_enabled_p ())
2119 oacc_loop
*l
= loops
;
2120 /* OpenACC kernels constructs are special: they currently don't use the
2121 generic oacc_loop infrastructure. */
2122 if (is_oacc_kernels
)
2124 /* Create a fake oacc_loop for diagnostic purposes. */
2125 l
= new_oacc_loop_raw (NULL
,
2126 DECL_SOURCE_LOCATION (current_function_decl
));
2127 l
->mask
= used_mask
;
2131 /* Skip the outermost, dummy OpenACC loop */
2135 inform_oacc_loop (l
);
2136 if (is_oacc_kernels
)
2140 free_oacc_loop (loops
);
2146 execute_oacc_device_lower ()
2148 tree attrs
= oacc_get_fn_attrib (current_function_decl
);
2151 /* Not an offloaded function. */
2154 int dims
[GOMP_DIM_MAX
];
2155 for (unsigned i
= 0; i
< GOMP_DIM_MAX
; i
++)
2156 dims
[i
] = oacc_get_fn_dim_size (current_function_decl
, i
);
2158 hash_map
<tree
, tree
> adjusted_vars
;
2160 /* Now lower internal loop functions to target-specific code
2163 FOR_ALL_BB_FN (bb
, cfun
)
2164 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
2166 gimple
*stmt
= gsi_stmt (gsi
);
2167 if (!is_gimple_call (stmt
))
2173 gcall
*call
= as_a
<gcall
*> (stmt
);
2174 if (!gimple_call_internal_p (call
))
2180 /* Rewind to allow rescan. */
2182 bool rescan
= false, remove
= false;
2183 enum internal_fn ifn_code
= gimple_call_internal_fn (call
);
2189 case IFN_GOACC_TILE
:
2190 oacc_xform_tile (call
);
2194 case IFN_GOACC_LOOP
:
2195 oacc_xform_loop (call
);
2199 case IFN_GOACC_REDUCTION
:
2200 /* Mark the function for SSA renaming. */
2201 mark_virtual_operands_for_renaming (cfun
);
2203 /* If the level is -1, this ended up being an unused
2204 axis. Handle as a default. */
2205 if (integer_minus_onep (gimple_call_arg (call
, 3)))
2206 default_goacc_reduction (call
);
2208 targetm
.goacc
.reduction (call
);
2214 enum ifn_unique_kind kind
2215 = ((enum ifn_unique_kind
)
2216 TREE_INT_CST_LOW (gimple_call_arg (call
, 0)));
2223 case IFN_UNIQUE_OACC_FORK
:
2224 case IFN_UNIQUE_OACC_JOIN
:
2225 if (integer_minus_onep (gimple_call_arg (call
, 2)))
2227 else if (!targetm
.goacc
.fork_join
2228 (call
, dims
, kind
== IFN_UNIQUE_OACC_FORK
))
2232 case IFN_UNIQUE_OACC_HEAD_MARK
:
2233 case IFN_UNIQUE_OACC_TAIL_MARK
:
2237 case IFN_UNIQUE_OACC_PRIVATE
:
2239 dump_flags_t l_dump_flags
2240 = get_openacc_privatization_dump_flags ();
2242 location_t loc
= gimple_location (stmt
);
2243 if (LOCATION_LOCUS (loc
) == UNKNOWN_LOCATION
)
2244 loc
= DECL_SOURCE_LOCATION (current_function_decl
);
2245 const dump_user_location_t d_u_loc
2246 = dump_user_location_t::from_location_t (loc
);
2249 = TREE_INT_CST_LOW (gimple_call_arg (call
, 2));
2250 gcc_checking_assert (level
== -1
2252 && level
< GOMP_DIM_MAX
));
2253 for (unsigned i
= 3;
2254 i
< gimple_call_num_args (call
);
2257 static char const *const axes
[] =
2258 /* Must be kept in sync with GOMP_DIM enumeration. */
2259 { "gang", "worker", "vector" };
2261 tree arg
= gimple_call_arg (call
, i
);
2262 gcc_checking_assert (TREE_CODE (arg
) == ADDR_EXPR
);
2263 tree decl
= TREE_OPERAND (arg
, 0);
2264 if (dump_enabled_p ())
2265 /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
2267 # pragma GCC diagnostic push
2268 # pragma GCC diagnostic ignored "-Wformat"
2270 dump_printf_loc (l_dump_flags
, d_u_loc
,
2271 "variable %<%T%> ought to be"
2272 " adjusted for OpenACC"
2273 " privatization level: %qs\n",
2276 ? "UNKNOWN" : axes
[level
]));
2278 # pragma GCC diagnostic pop
2283 else if (!targetm
.goacc
.adjust_private_decl
)
2285 else if (level
== GOMP_DIM_VECTOR
)
2287 /* That's the default behavior. */
2292 tree oldtype
= TREE_TYPE (decl
);
2294 = targetm
.goacc
.adjust_private_decl (loc
, decl
,
2296 adjusted
= (TREE_TYPE (newdecl
) != oldtype
2297 || newdecl
!= decl
);
2299 adjusted_vars
.put (decl
, newdecl
);
2302 && dump_enabled_p ())
2303 /* PR100695 "Format decoder, quoting in 'dump_printf' etc." */
2305 # pragma GCC diagnostic push
2306 # pragma GCC diagnostic ignored "-Wformat"
2308 dump_printf_loc (l_dump_flags
, d_u_loc
,
2309 "variable %<%T%> adjusted for"
2310 " OpenACC privatization level:"
2314 # pragma GCC diagnostic pop
2325 if (gsi_end_p (gsi
))
2326 /* We rewound past the beginning of the BB. */
2327 gsi
= gsi_start_bb (bb
);
2329 /* Undo the rewind. */
2334 if (gimple_vdef (call
))
2335 replace_uses_by (gimple_vdef (call
), gimple_vuse (call
));
2336 if (gimple_call_lhs (call
))
2338 /* Propagate the data dependency var. */
2339 gimple
*ass
= gimple_build_assign (gimple_call_lhs (call
),
2340 gimple_call_arg (call
, 1));
2341 gsi_replace (&gsi
, ass
, false);
2344 gsi_remove (&gsi
, true);
2347 /* If not rescanning, advance over the call. */
2351 /* Regarding the OpenACC privatization level, we're currently only looking at
2352 making the gang-private level work. Regarding that, we have the following
2355 - GCN offloading: 'targetm.goacc.adjust_private_decl' does the work (in
2356 particular, change 'TREE_TYPE', etc.) and there is no
2357 'targetm.goacc.expand_var_decl'.
2359 - nvptx offloading: 'targetm.goacc.adjust_private_decl' only sets a
2360 marker and then 'targetm.goacc.expand_var_decl' does the work.
2362 Eventually (in particular, for worker-private level?), both
2363 'targetm.goacc.adjust_private_decl' and 'targetm.goacc.expand_var_decl'
2364 may need to do things, but that's currently not meant to be addressed, and
2365 thus not fully worked out and implemented, and thus untested. Hence,
2366 'assert' what currently is implemented/tested, only. */
2368 if (targetm
.goacc
.expand_var_decl
)
2369 gcc_assert (adjusted_vars
.is_empty ());
2371 /* Make adjustments to gang-private local variables if required by the
2372 target, e.g. forcing them into a particular address space. Afterwards,
2373 ADDR_EXPR nodes which have adjusted variables as their argument need to
2374 be modified in one of two ways:
2376 1. They can be recreated, making a pointer to the variable in the new
2379 2. The address of the variable in the new address space can be taken,
2380 converted to the default (original) address space, and the result of
2381 that conversion subsituted in place of the original ADDR_EXPR node.
2383 Which of these is done depends on the gimple statement being processed.
2384 At present atomic operations and inline asms use (1), and everything else
2385 uses (2). At least on AMD GCN, there are atomic operations that work
2386 directly in the LDS address space.
2388 COMPONENT_REFS, ARRAY_REFS and plain VAR_DECLs are also rewritten to use
2389 the new decl, adjusting types of appropriate tree nodes as necessary. */
2391 if (targetm
.goacc
.adjust_private_decl
2392 && !adjusted_vars
.is_empty ())
2394 FOR_ALL_BB_FN (bb
, cfun
)
2395 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
2399 gimple
*stmt
= gsi_stmt (gsi
);
2401 var_decl_rewrite_info info
;
2403 info
.avoid_pointer_conversion
2404 = (is_gimple_call (stmt
)
2405 && is_sync_builtin_call (as_a
<gcall
*> (stmt
)))
2406 || gimple_code (stmt
) == GIMPLE_ASM
;
2408 info
.modified
= false;
2409 info
.adjusted_vars
= &adjusted_vars
;
2411 memset (&wi
, 0, sizeof (wi
));
2414 walk_gimple_op (stmt
, oacc_rewrite_var_decl
, &wi
);
2424 /* Default launch dimension validator. Force everything to 1. A
2425 backend that wants to provide larger dimensions must override this
2429 default_goacc_validate_dims (tree
ARG_UNUSED (decl
), int *dims
,
2430 int ARG_UNUSED (fn_level
),
2431 unsigned ARG_UNUSED (used
))
2433 bool changed
= false;
2435 for (unsigned ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
2447 /* Default dimension bound is unknown on accelerator and 1 on host. */
2450 default_goacc_dim_limit (int ARG_UNUSED (axis
))
2452 #ifdef ACCEL_COMPILER
2461 const pass_data pass_data_oacc_loop_designation
=
2463 GIMPLE_PASS
, /* type */
2464 "oaccloops", /* name */
2465 OPTGROUP_OMP
, /* optinfo_flags */
2466 TV_NONE
, /* tv_id */
2467 PROP_cfg
, /* properties_required */
2468 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2469 0, /* properties_destroyed */
2470 0, /* todo_flags_start */
2471 TODO_update_ssa
| TODO_cleanup_cfg
, /* todo_flags_finish */
2474 class pass_oacc_loop_designation
: public gimple_opt_pass
2477 pass_oacc_loop_designation (gcc::context
*ctxt
)
2478 : gimple_opt_pass (pass_data_oacc_loop_designation
, ctxt
)
2481 /* opt_pass methods: */
2482 bool gate (function
*) final override
{ return flag_openacc
; };
2484 unsigned int execute (function
*) final override
2486 return execute_oacc_loop_designation ();
2489 }; // class pass_oacc_loop_designation
2491 const pass_data pass_data_oacc_device_lower
=
2493 GIMPLE_PASS
, /* type */
2494 "oaccdevlow", /* name */
2495 OPTGROUP_OMP
, /* optinfo_flags */
2496 TV_NONE
, /* tv_id */
2497 PROP_cfg
, /* properties_required */
2498 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
2499 0, /* properties_destroyed */
2500 0, /* todo_flags_start */
2501 TODO_update_ssa
| TODO_cleanup_cfg
, /* todo_flags_finish */
2504 class pass_oacc_device_lower
: public gimple_opt_pass
2507 pass_oacc_device_lower (gcc::context
*ctxt
)
2508 : gimple_opt_pass (pass_data_oacc_device_lower
, ctxt
)
2511 /* opt_pass methods: */
2512 bool gate (function
*) final override
{ return flag_openacc
; };
2514 unsigned int execute (function
*) final override
2516 return execute_oacc_device_lower ();
2519 }; // class pass_oacc_device_lower
2524 make_pass_oacc_loop_designation (gcc::context
*ctxt
)
2526 return new pass_oacc_loop_designation (ctxt
);
2530 make_pass_oacc_device_lower (gcc::context
*ctxt
)
2532 return new pass_oacc_device_lower (ctxt
);
2536 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
2537 GOMP_SIMT_ENTER call identifying the privatized variables, which are
2538 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
2539 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
2542 ompdevlow_adjust_simt_enter (gimple_stmt_iterator
*gsi
, bool *regimplify
)
2544 gimple
*alloc_stmt
= gsi_stmt (*gsi
);
2545 tree simtrec
= gimple_call_lhs (alloc_stmt
);
2546 tree simduid
= gimple_call_arg (alloc_stmt
, 0);
2547 gimple
*enter_stmt
= SSA_NAME_DEF_STMT (simduid
);
2548 gcc_assert (gimple_call_internal_p (enter_stmt
, IFN_GOMP_SIMT_ENTER
));
2549 tree rectype
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2550 TYPE_ARTIFICIAL (rectype
) = TYPE_NAMELESS (rectype
) = 1;
2551 TREE_ADDRESSABLE (rectype
) = 1;
2552 TREE_TYPE (simtrec
) = build_pointer_type (rectype
);
2553 for (unsigned i
= 1; i
< gimple_call_num_args (enter_stmt
); i
++)
2555 tree
*argp
= gimple_call_arg_ptr (enter_stmt
, i
);
2556 if (*argp
== null_pointer_node
)
2558 gcc_assert (TREE_CODE (*argp
) == ADDR_EXPR
2559 && VAR_P (TREE_OPERAND (*argp
, 0)));
2560 tree var
= TREE_OPERAND (*argp
, 0);
2562 tree field
= build_decl (DECL_SOURCE_LOCATION (var
), FIELD_DECL
,
2563 DECL_NAME (var
), TREE_TYPE (var
));
2564 SET_DECL_ALIGN (field
, DECL_ALIGN (var
));
2565 DECL_USER_ALIGN (field
) = DECL_USER_ALIGN (var
);
2566 TREE_THIS_VOLATILE (field
) = TREE_THIS_VOLATILE (var
);
2568 insert_field_into_struct (rectype
, field
);
2570 tree t
= build_simple_mem_ref (simtrec
);
2571 t
= build3 (COMPONENT_REF
, TREE_TYPE (var
), t
, field
, NULL
);
2572 TREE_THIS_VOLATILE (t
) = TREE_THIS_VOLATILE (var
);
2573 SET_DECL_VALUE_EXPR (var
, t
);
2574 DECL_HAS_VALUE_EXPR_P (var
) = 1;
2577 layout_type (rectype
);
2578 tree size
= TYPE_SIZE_UNIT (rectype
);
2579 tree align
= build_int_cst (TREE_TYPE (size
), TYPE_ALIGN_UNIT (rectype
));
2582 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC
, 2, size
, align
);
2583 gimple_call_set_lhs (alloc_stmt
, simtrec
);
2584 gsi_replace (gsi
, alloc_stmt
, false);
2585 gimple_stmt_iterator enter_gsi
= gsi_for_stmt (enter_stmt
);
2586 enter_stmt
= gimple_build_assign (simduid
, gimple_call_arg (enter_stmt
, 0));
2587 gsi_replace (&enter_gsi
, enter_stmt
, false);
2591 if (single_imm_use (simtrec
, &use
, &exit_stmt
))
2593 gcc_assert (gimple_call_internal_p (exit_stmt
, IFN_GOMP_SIMT_EXIT
));
2594 gimple_stmt_iterator exit_gsi
= gsi_for_stmt (exit_stmt
);
2595 tree clobber
= build_clobber (rectype
);
2596 exit_stmt
= gimple_build_assign (build_simple_mem_ref (simtrec
), clobber
);
2597 gsi_insert_before (&exit_gsi
, exit_stmt
, GSI_SAME_STMT
);
2600 gcc_checking_assert (has_zero_uses (simtrec
));
2603 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2606 find_simtpriv_var_op (tree
*tp
, int *walk_subtrees
, void *)
2611 && DECL_HAS_VALUE_EXPR_P (t
)
2612 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t
)))
2620 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2621 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2622 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2623 internal functions on non-SIMT targets, and likewise some SIMD internal
2624 functions on SIMT targets. */
2627 execute_omp_device_lower ()
2629 int vf
= targetm
.simt
.vf
? targetm
.simt
.vf () : 1;
2630 bool regimplify
= false;
2632 gimple_stmt_iterator gsi
;
2633 bool calls_declare_variant_alt
2634 = cgraph_node::get (cfun
->decl
)->calls_declare_variant_alt
;
2635 #ifdef ACCEL_COMPILER
2636 bool omp_redirect_indirect_calls
= vec_safe_length (offload_ind_funcs
) > 0;
2638 = builtin_decl_explicit (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR
);
2640 FOR_EACH_BB_FN (bb
, cfun
)
2641 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2643 gimple
*stmt
= gsi_stmt (gsi
);
2644 if (!is_gimple_call (stmt
))
2646 if (!gimple_call_internal_p (stmt
))
2648 if (calls_declare_variant_alt
)
2649 if (tree fndecl
= gimple_call_fndecl (stmt
))
2651 tree new_fndecl
= omp_resolve_declare_variant (fndecl
);
2652 if (new_fndecl
!= fndecl
)
2654 gimple_call_set_fndecl (stmt
, new_fndecl
);
2658 #ifdef ACCEL_COMPILER
2659 if (omp_redirect_indirect_calls
2660 && gimple_call_fndecl (stmt
) == NULL_TREE
)
2662 gcall
*orig_call
= dyn_cast
<gcall
*> (stmt
);
2663 tree call_fn
= gimple_call_fn (stmt
);
2664 tree fn_ty
= TREE_TYPE (call_fn
);
2666 if (TREE_CODE (call_fn
) == OBJ_TYPE_REF
)
2668 tree obj_ref
= create_tmp_reg (TREE_TYPE (call_fn
),
2670 gimple
*gassign
= gimple_build_assign (obj_ref
, call_fn
);
2671 gsi_insert_before (&gsi
, gassign
, GSI_SAME_STMT
);
2674 tree mapped_fn
= create_tmp_reg (fn_ty
, ".ind_fn");
2676 gimple_build_call (map_ptr_fn
, 1, call_fn
);
2677 gimple_set_location (gcall
, gimple_location (stmt
));
2678 gimple_call_set_lhs (gcall
, mapped_fn
);
2679 gsi_insert_before (&gsi
, gcall
, GSI_SAME_STMT
);
2681 gimple_call_set_fn (orig_call
, mapped_fn
);
2682 update_stmt (orig_call
);
2687 tree lhs
= gimple_call_lhs (stmt
), rhs
= NULL_TREE
;
2688 tree type
= lhs
? TREE_TYPE (lhs
) : integer_type_node
;
2689 switch (gimple_call_internal_fn (stmt
))
2691 case IFN_GOMP_TARGET_REV
:
2693 #ifndef ACCEL_COMPILER
2694 gimple_stmt_iterator gsi2
= gsi
;
2696 gcc_assert (!gsi_end_p (gsi2
));
2697 gcc_assert (gimple_call_builtin_p (gsi_stmt (gsi2
),
2698 BUILT_IN_GOMP_TARGET
));
2700 = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi2
), 1), 0);
2701 tree new_decl
= gimple_call_arg (gsi_stmt (gsi
), 0);
2702 gimple_call_set_arg (gsi_stmt (gsi2
), 1, new_decl
);
2703 update_stmt (gsi_stmt (gsi2
));
2704 new_decl
= TREE_OPERAND (new_decl
, 0);
2706 unsigned num_funcs
= vec_safe_length (offload_funcs
);
2707 for (i
= 0; i
< num_funcs
; i
++)
2709 if ((*offload_funcs
)[i
] == old_decl
)
2711 (*offload_funcs
)[i
] = new_decl
;
2714 else if ((*offload_funcs
)[i
] == new_decl
)
2715 break; /* This can happen due to inlining. */
2717 gcc_assert (i
< num_funcs
);
2719 tree old_decl
= TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi
), 0),
2722 /* FIXME: Find a way to actually prevent outputting the empty-body
2723 old_decl as debug symbol + function in the assembly file. */
2724 cgraph_node
*node
= cgraph_node::get (old_decl
);
2725 node
->address_taken
= false;
2726 node
->need_lto_streaming
= false;
2727 node
->offloadable
= false;
2729 unlink_stmt_vdef (stmt
);
2732 case IFN_GOMP_USE_SIMT
:
2733 rhs
= vf
== 1 ? integer_zero_node
: integer_one_node
;
2735 case IFN_GOMP_SIMT_ENTER
:
2736 rhs
= vf
== 1 ? gimple_call_arg (stmt
, 0) : NULL_TREE
;
2737 goto simtreg_enter_exit
;
2738 case IFN_GOMP_SIMT_ENTER_ALLOC
:
2740 ompdevlow_adjust_simt_enter (&gsi
, ®implify
);
2741 rhs
= vf
== 1 ? null_pointer_node
: NULL_TREE
;
2742 goto simtreg_enter_exit
;
2743 case IFN_GOMP_SIMT_EXIT
:
2747 unlink_stmt_vdef (stmt
);
2749 case IFN_GOMP_SIMT_LANE
:
2750 case IFN_GOMP_SIMT_LAST_LANE
:
2751 rhs
= vf
== 1 ? build_zero_cst (type
) : NULL_TREE
;
2753 case IFN_GOMP_SIMT_VF
:
2754 rhs
= build_int_cst (type
, vf
);
2756 case IFN_GOMP_SIMT_ORDERED_PRED
:
2757 rhs
= vf
== 1 ? integer_zero_node
: NULL_TREE
;
2759 unlink_stmt_vdef (stmt
);
2761 case IFN_GOMP_SIMT_VOTE_ANY
:
2762 case IFN_GOMP_SIMT_XCHG_BFLY
:
2763 case IFN_GOMP_SIMT_XCHG_IDX
:
2764 rhs
= vf
== 1 ? gimple_call_arg (stmt
, 0) : NULL_TREE
;
2766 case IFN_GOMP_SIMD_LANE
:
2767 case IFN_GOMP_SIMD_LAST_LANE
:
2768 rhs
= vf
!= 1 ? build_zero_cst (type
) : NULL_TREE
;
2770 case IFN_GOMP_SIMD_VF
:
2771 rhs
= vf
!= 1 ? build_one_cst (type
) : NULL_TREE
;
2778 stmt
= lhs
? gimple_build_assign (lhs
, rhs
) : gimple_build_nop ();
2779 gsi_replace (&gsi
, stmt
, false);
2782 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
2783 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
2784 if (walk_gimple_stmt (&gsi
, NULL
, find_simtpriv_var_op
, NULL
))
2786 if (gimple_clobber_p (gsi_stmt (gsi
)))
2787 gsi_remove (&gsi
, true);
2789 gimple_regimplify_operands (gsi_stmt (gsi
), &gsi
);
2792 cfun
->has_force_vectorize_loops
= false;
2798 const pass_data pass_data_omp_device_lower
=
2800 GIMPLE_PASS
, /* type */
2801 "ompdevlow", /* name */
2802 OPTGROUP_OMP
, /* optinfo_flags */
2803 TV_NONE
, /* tv_id */
2804 PROP_cfg
, /* properties_required */
2805 PROP_gimple_lomp_dev
, /* properties_provided */
2806 0, /* properties_destroyed */
2807 0, /* todo_flags_start */
2808 TODO_update_ssa
, /* todo_flags_finish */
2811 class pass_omp_device_lower
: public gimple_opt_pass
2814 pass_omp_device_lower (gcc::context
*ctxt
)
2815 : gimple_opt_pass (pass_data_omp_device_lower
, ctxt
)
2818 /* opt_pass methods: */
2819 bool gate (function
*fun
) final override
2821 #ifdef ACCEL_COMPILER
2822 bool offload_ind_funcs_p
= vec_safe_length (offload_ind_funcs
) > 0;
2824 bool offload_ind_funcs_p
= false;
2826 return (!(fun
->curr_properties
& PROP_gimple_lomp_dev
)
2828 && (cgraph_node::get (fun
->decl
)->calls_declare_variant_alt
2829 || offload_ind_funcs_p
)));
2831 unsigned int execute (function
*) final override
2833 return execute_omp_device_lower ();
2836 }; // class pass_expand_omp_ssa
2841 make_pass_omp_device_lower (gcc::context
*ctxt
)
2843 return new pass_omp_device_lower (ctxt
);
2846 /* "omp declare target link" handling pass. */
2850 const pass_data pass_data_omp_target_link
=
2852 GIMPLE_PASS
, /* type */
2853 "omptargetlink", /* name */
2854 OPTGROUP_OMP
, /* optinfo_flags */
2855 TV_NONE
, /* tv_id */
2856 PROP_ssa
, /* properties_required */
2857 0, /* properties_provided */
2858 0, /* properties_destroyed */
2859 0, /* todo_flags_start */
2860 TODO_update_ssa
, /* todo_flags_finish */
2863 class pass_omp_target_link
: public gimple_opt_pass
2866 pass_omp_target_link (gcc::context
*ctxt
)
2867 : gimple_opt_pass (pass_data_omp_target_link
, ctxt
)
2870 /* opt_pass methods: */
2871 bool gate (function
*fun
) final override
2873 #ifdef ACCEL_COMPILER
2874 return offloading_function_p (fun
->decl
);
2881 unsigned execute (function
*) final override
;
2884 /* Callback for walk_gimple_stmt used to scan for link var operands. */
2887 find_link_var_op (tree
*tp
, int *walk_subtrees
, void *)
2892 && DECL_HAS_VALUE_EXPR_P (t
)
2893 && is_global_var (t
)
2894 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t
)))
2904 pass_omp_target_link::execute (function
*fun
)
2907 FOR_EACH_BB_FN (bb
, fun
)
2909 gimple_stmt_iterator gsi
;
2910 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2912 if (gimple_call_builtin_p (gsi_stmt (gsi
), BUILT_IN_GOMP_TARGET
))
2914 tree dev
= gimple_call_arg (gsi_stmt (gsi
), 0);
2915 tree fn
= gimple_call_arg (gsi_stmt (gsi
), 1);
2916 if (POINTER_TYPE_P (TREE_TYPE (fn
)))
2917 fn
= TREE_OPERAND (fn
, 0);
2918 if (TREE_CODE (dev
) == INTEGER_CST
2919 && wi::to_wide (dev
) == GOMP_DEVICE_HOST_FALLBACK
2920 && lookup_attribute ("omp target device_ancestor_nohost",
2921 DECL_ATTRIBUTES (fn
)) != NULL_TREE
)
2922 continue; /* ancestor:1 */
2923 /* Nullify the second argument of __builtin_GOMP_target_ext. */
2924 gimple_call_set_arg (gsi_stmt (gsi
), 1, null_pointer_node
);
2925 update_stmt (gsi_stmt (gsi
));
2927 if (walk_gimple_stmt (&gsi
, NULL
, find_link_var_op
, NULL
))
2928 gimple_regimplify_operands (gsi_stmt (gsi
), &gsi
);
2938 make_pass_omp_target_link (gcc::context
*ctxt
)
2940 return new pass_omp_target_link (ctxt
);