1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
4 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "tree-pass.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
52 #include "stringpool.h"
57 /* Describe the OpenACC looping structure of a function. The entire
58 function is held in a 'NULL' loop. */
62 oacc_loop
*parent
; /* Containing loop. */
64 oacc_loop
*child
; /* First inner loop. */
66 oacc_loop
*sibling
; /* Next loop within same parent. */
68 location_t loc
; /* Location of the loop start. */
70 gcall
*marker
; /* Initial head marker. */
72 gcall
*heads
[GOMP_DIM_MAX
]; /* Head marker functions. */
73 gcall
*tails
[GOMP_DIM_MAX
]; /* Tail marker functions. */
75 tree routine
; /* Pseudo-loop enclosing a routine. */
77 unsigned mask
; /* Partitioning mask. */
78 unsigned e_mask
; /* Partitioning of element loops (when tiling). */
79 unsigned inner
; /* Partitioning of inner loops. */
80 unsigned flags
; /* Partitioning flags. */
81 vec
<gcall
*> ifns
; /* Contained loop abstraction functions. */
82 tree chunk_size
; /* Chunk size. */
83 gcall
*head_end
; /* Final marker of head sequence. */
86 /* Holds offload tables with decls. */
87 vec
<tree
, va_gc
> *offload_funcs
, *offload_vars
;
89 /* Return level at which oacc routine may spawn a partitioned loop, or
90 -1 if it is not a routine (i.e. is an offload fn). */
93 oacc_fn_attrib_level (tree attr
)
95 tree pos
= TREE_VALUE (attr
);
97 if (!TREE_PURPOSE (pos
))
101 for (ix
= 0; ix
!= GOMP_DIM_MAX
;
102 ix
++, pos
= TREE_CHAIN (pos
))
103 if (!integer_zerop (TREE_PURPOSE (pos
)))
109 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
110 adds their addresses and sizes to constructor-vector V_CTOR. */
113 add_decls_addresses_to_decl_constructor (vec
<tree
, va_gc
> *v_decls
,
114 vec
<constructor_elt
, va_gc
> *v_ctor
)
116 unsigned len
= vec_safe_length (v_decls
);
117 for (unsigned i
= 0; i
< len
; i
++)
119 tree it
= (*v_decls
)[i
];
120 bool is_var
= VAR_P (it
);
123 #ifdef ACCEL_COMPILER
124 && DECL_HAS_VALUE_EXPR_P (it
)
126 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it
));
128 /* See also omp_finish_file and output_offload_tables in lto-cgraph.c. */
129 if (!in_lto_p
&& !symtab_node::get (it
))
132 tree size
= NULL_TREE
;
134 size
= fold_convert (const_ptr_type_node
, DECL_SIZE_UNIT (it
));
138 addr
= build_fold_addr_expr (it
);
141 #ifdef ACCEL_COMPILER
142 /* For "omp declare target link" vars add address of the pointer to
143 the target table, instead of address of the var. */
144 tree value_expr
= DECL_VALUE_EXPR (it
);
145 tree link_ptr_decl
= TREE_OPERAND (value_expr
, 0);
146 varpool_node::finalize_decl (link_ptr_decl
);
147 addr
= build_fold_addr_expr (link_ptr_decl
);
149 addr
= build_fold_addr_expr (it
);
152 /* Most significant bit of the size marks "omp declare target link"
153 vars in host and target tables. */
154 unsigned HOST_WIDE_INT isize
= tree_to_uhwi (size
);
155 isize
|= 1ULL << (int_size_in_bytes (const_ptr_type_node
)
156 * BITS_PER_UNIT
- 1);
157 size
= wide_int_to_tree (const_ptr_type_node
, isize
);
160 CONSTRUCTOR_APPEND_ELT (v_ctor
, NULL_TREE
, addr
);
162 CONSTRUCTOR_APPEND_ELT (v_ctor
, NULL_TREE
, size
);
166 /* Return true if DECL is a function for which its references should be
170 omp_declare_target_fn_p (tree decl
)
172 return (TREE_CODE (decl
) == FUNCTION_DECL
173 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl
))
174 && !lookup_attribute ("omp declare target host",
175 DECL_ATTRIBUTES (decl
))
177 || oacc_get_fn_attrib (decl
) == NULL_TREE
));
180 /* Return true if DECL Is a variable for which its initializer references
181 should be analyzed. */
184 omp_declare_target_var_p (tree decl
)
187 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl
))
188 && !lookup_attribute ("omp declare target link",
189 DECL_ATTRIBUTES (decl
)));
192 /* Helper function for omp_discover_implicit_declare_target, called through
193 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
194 declare target to. */
197 omp_discover_declare_target_tgt_fn_r (tree
*tp
, int *walk_subtrees
, void *data
)
199 if (TREE_CODE (*tp
) == FUNCTION_DECL
)
202 tree id
= get_identifier ("omp declare target");
203 symtab_node
*node
= symtab_node::get (*tp
);
206 while (node
->alias_target
207 && TREE_CODE (node
->alias_target
) == FUNCTION_DECL
)
209 if (!omp_declare_target_fn_p (node
->decl
)
210 && !lookup_attribute ("omp declare target host",
211 DECL_ATTRIBUTES (node
->decl
)))
213 node
->offloadable
= 1;
214 DECL_ATTRIBUTES (node
->decl
)
215 = tree_cons (id
, NULL_TREE
, DECL_ATTRIBUTES (node
->decl
));
217 node
= symtab_node::get (node
->alias_target
);
219 symtab_node
*new_node
= node
->ultimate_alias_target ();
220 decl
= new_node
->decl
;
221 while (node
!= new_node
)
223 if (!omp_declare_target_fn_p (node
->decl
)
224 && !lookup_attribute ("omp declare target host",
225 DECL_ATTRIBUTES (node
->decl
)))
227 node
->offloadable
= 1;
228 DECL_ATTRIBUTES (node
->decl
)
229 = tree_cons (id
, NULL_TREE
, DECL_ATTRIBUTES (node
->decl
));
231 gcc_assert (node
->alias
&& node
->analyzed
);
232 node
= node
->get_alias_target ();
234 node
->offloadable
= 1;
235 if (ENABLE_OFFLOADING
)
236 g
->have_offload
= true;
238 if (omp_declare_target_fn_p (decl
)
239 || lookup_attribute ("omp declare target host",
240 DECL_ATTRIBUTES (decl
)))
243 if (!DECL_EXTERNAL (decl
) && DECL_SAVED_TREE (decl
))
244 ((vec
<tree
> *) data
)->safe_push (decl
);
245 DECL_ATTRIBUTES (decl
) = tree_cons (id
, NULL_TREE
,
246 DECL_ATTRIBUTES (decl
));
248 else if (TYPE_P (*tp
))
250 /* else if (TREE_CODE (*tp) == OMP_TARGET)
252 if (tree dev = omp_find_clause (OMP_TARGET_CLAUSES (*tp)))
253 if (OMP_DEVICE_ANCESTOR (dev))
259 /* Similarly, but ignore references outside of OMP_TARGET regions. */
262 omp_discover_declare_target_fn_r (tree
*tp
, int *walk_subtrees
, void *data
)
264 if (TREE_CODE (*tp
) == OMP_TARGET
)
266 /* And not OMP_DEVICE_ANCESTOR. */
267 walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp
),
268 omp_discover_declare_target_tgt_fn_r
,
272 else if (TYPE_P (*tp
))
277 /* Helper function for omp_discover_implicit_declare_target, called through
278 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
279 declare target to. */
282 omp_discover_declare_target_var_r (tree
*tp
, int *walk_subtrees
, void *data
)
284 if (TREE_CODE (*tp
) == FUNCTION_DECL
)
285 return omp_discover_declare_target_tgt_fn_r (tp
, walk_subtrees
, data
);
287 && is_global_var (*tp
)
288 && !omp_declare_target_var_p (*tp
))
290 tree id
= get_identifier ("omp declare target");
291 if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp
)))
293 error_at (DECL_SOURCE_LOCATION (*tp
),
294 "%qD specified both in declare target %<link%> and "
295 "implicitly in %<to%> clauses", *tp
);
296 DECL_ATTRIBUTES (*tp
)
297 = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp
));
299 if (TREE_STATIC (*tp
) && DECL_INITIAL (*tp
))
300 ((vec
<tree
> *) data
)->safe_push (*tp
);
301 DECL_ATTRIBUTES (*tp
) = tree_cons (id
, NULL_TREE
, DECL_ATTRIBUTES (*tp
));
302 symtab_node
*node
= symtab_node::get (*tp
);
303 if (node
!= NULL
&& !node
->offloadable
)
305 node
->offloadable
= 1;
306 if (ENABLE_OFFLOADING
)
308 g
->have_offload
= true;
309 if (is_a
<varpool_node
*> (node
))
310 vec_safe_push (offload_vars
, node
->decl
);
314 else if (TYPE_P (*tp
))
319 /* Perform the OpenMP implicit declare target to discovery. */
322 omp_discover_implicit_declare_target (void)
326 auto_vec
<tree
> worklist
;
328 FOR_EACH_DEFINED_FUNCTION (node
)
329 if (DECL_SAVED_TREE (node
->decl
))
331 struct cgraph_node
*cgn
;
332 if (omp_declare_target_fn_p (node
->decl
))
333 worklist
.safe_push (node
->decl
);
334 else if (DECL_STRUCT_FUNCTION (node
->decl
)
335 && DECL_STRUCT_FUNCTION (node
->decl
)->has_omp_target
)
336 worklist
.safe_push (node
->decl
);
337 for (cgn
= first_nested_function (node
);
338 cgn
; cgn
= next_nested_function (cgn
))
339 if (omp_declare_target_fn_p (cgn
->decl
))
340 worklist
.safe_push (cgn
->decl
);
341 else if (DECL_STRUCT_FUNCTION (cgn
->decl
)
342 && DECL_STRUCT_FUNCTION (cgn
->decl
)->has_omp_target
)
343 worklist
.safe_push (cgn
->decl
);
345 FOR_EACH_STATIC_INITIALIZER (vnode
)
346 if (omp_declare_target_var_p (vnode
->decl
))
347 worklist
.safe_push (vnode
->decl
);
348 while (!worklist
.is_empty ())
350 tree decl
= worklist
.pop ();
352 walk_tree_without_duplicates (&DECL_INITIAL (decl
),
353 omp_discover_declare_target_var_r
,
355 else if (omp_declare_target_fn_p (decl
))
356 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl
),
357 omp_discover_declare_target_tgt_fn_r
,
360 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl
),
361 omp_discover_declare_target_fn_r
,
367 /* Create new symbols containing (address, size) pairs for global variables,
368 marked with "omp declare target" attribute, as well as addresses for the
369 functions, which are outlined offloading regions. */
371 omp_finish_file (void)
373 unsigned num_funcs
= vec_safe_length (offload_funcs
);
374 unsigned num_vars
= vec_safe_length (offload_vars
);
376 if (num_funcs
== 0 && num_vars
== 0)
379 if (targetm_common
.have_named_sections
)
381 vec
<constructor_elt
, va_gc
> *v_f
, *v_v
;
382 vec_alloc (v_f
, num_funcs
);
383 vec_alloc (v_v
, num_vars
* 2);
385 add_decls_addresses_to_decl_constructor (offload_funcs
, v_f
);
386 add_decls_addresses_to_decl_constructor (offload_vars
, v_v
);
388 tree vars_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
389 vec_safe_length (v_v
));
390 tree funcs_decl_type
= build_array_type_nelts (pointer_sized_int_node
,
392 SET_TYPE_ALIGN (vars_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
393 SET_TYPE_ALIGN (funcs_decl_type
, TYPE_ALIGN (pointer_sized_int_node
));
394 tree ctor_v
= build_constructor (vars_decl_type
, v_v
);
395 tree ctor_f
= build_constructor (funcs_decl_type
, v_f
);
396 TREE_CONSTANT (ctor_v
) = TREE_CONSTANT (ctor_f
) = 1;
397 TREE_STATIC (ctor_v
) = TREE_STATIC (ctor_f
) = 1;
398 tree funcs_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
399 get_identifier (".offload_func_table"),
401 tree vars_decl
= build_decl (UNKNOWN_LOCATION
, VAR_DECL
,
402 get_identifier (".offload_var_table"),
404 TREE_STATIC (funcs_decl
) = TREE_STATIC (vars_decl
) = 1;
405 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
406 otherwise a joint table in a binary will contain padding between
407 tables from multiple object files. */
408 DECL_USER_ALIGN (funcs_decl
) = DECL_USER_ALIGN (vars_decl
) = 1;
409 SET_DECL_ALIGN (funcs_decl
, TYPE_ALIGN (funcs_decl_type
));
410 SET_DECL_ALIGN (vars_decl
, TYPE_ALIGN (vars_decl_type
));
411 DECL_INITIAL (funcs_decl
) = ctor_f
;
412 DECL_INITIAL (vars_decl
) = ctor_v
;
413 set_decl_section_name (funcs_decl
, OFFLOAD_FUNC_TABLE_SECTION_NAME
);
414 set_decl_section_name (vars_decl
, OFFLOAD_VAR_TABLE_SECTION_NAME
);
416 varpool_node::finalize_decl (vars_decl
);
417 varpool_node::finalize_decl (funcs_decl
);
421 for (unsigned i
= 0; i
< num_funcs
; i
++)
423 tree it
= (*offload_funcs
)[i
];
424 /* See also add_decls_addresses_to_decl_constructor
425 and output_offload_tables in lto-cgraph.c. */
426 if (!in_lto_p
&& !symtab_node::get (it
))
428 targetm
.record_offload_symbol (it
);
430 for (unsigned i
= 0; i
< num_vars
; i
++)
432 tree it
= (*offload_vars
)[i
];
433 if (!in_lto_p
&& !symtab_node::get (it
))
435 #ifdef ACCEL_COMPILER
436 if (DECL_HAS_VALUE_EXPR_P (it
)
437 && lookup_attribute ("omp declare target link",
438 DECL_ATTRIBUTES (it
)))
440 tree value_expr
= DECL_VALUE_EXPR (it
);
441 tree link_ptr_decl
= TREE_OPERAND (value_expr
, 0);
442 targetm
.record_offload_symbol (link_ptr_decl
);
443 varpool_node::finalize_decl (link_ptr_decl
);
447 targetm
.record_offload_symbol (it
);
452 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
453 axis DIM. Return a tmp var holding the result. */
456 oacc_dim_call (bool pos
, int dim
, gimple_seq
*seq
)
458 tree arg
= build_int_cst (unsigned_type_node
, dim
);
459 tree size
= create_tmp_var (integer_type_node
);
460 enum internal_fn fn
= pos
? IFN_GOACC_DIM_POS
: IFN_GOACC_DIM_SIZE
;
461 gimple
*call
= gimple_build_call_internal (fn
, 1, arg
);
463 gimple_call_set_lhs (call
, size
);
464 gimple_seq_add_stmt (seq
, call
);
469 /* Find the number of threads (POS = false), or thread number (POS =
470 true) for an OpenACC region partitioned as MASK. Setup code
471 required for the calculation is added to SEQ. */
474 oacc_thread_numbers (bool pos
, int mask
, gimple_seq
*seq
)
476 tree res
= pos
? NULL_TREE
: build_int_cst (unsigned_type_node
, 1);
479 /* Start at gang level, and examine relevant dimension indices. */
480 for (ix
= GOMP_DIM_GANG
; ix
!= GOMP_DIM_MAX
; ix
++)
481 if (GOMP_DIM_MASK (ix
) & mask
)
485 /* We had an outer index, so scale that by the size of
487 tree n
= oacc_dim_call (false, ix
, seq
);
488 res
= fold_build2 (MULT_EXPR
, integer_type_node
, res
, n
);
492 /* Determine index in this dimension. */
493 tree id
= oacc_dim_call (true, ix
, seq
);
495 res
= fold_build2 (PLUS_EXPR
, integer_type_node
, res
, id
);
501 if (res
== NULL_TREE
)
502 res
= integer_zero_node
;
507 /* Transform IFN_GOACC_LOOP calls to actual code. See
508 expand_oacc_for for where these are generated. At the vector
509 level, we stride loops, such that each member of a warp will
510 operate on adjacent iterations. At the worker and gang level,
511 each gang/warp executes a set of contiguous iterations. Chunking
512 can override this such that each iteration engine executes a
513 contiguous chunk, and then moves on to stride to the next chunk. */
516 oacc_xform_loop (gcall
*call
)
518 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
519 enum ifn_goacc_loop_kind code
520 = (enum ifn_goacc_loop_kind
) TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
521 tree dir
= gimple_call_arg (call
, 1);
522 tree range
= gimple_call_arg (call
, 2);
523 tree step
= gimple_call_arg (call
, 3);
524 tree chunk_size
= NULL_TREE
;
525 unsigned mask
= (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call
, 5));
526 tree lhs
= gimple_call_lhs (call
);
527 tree type
= NULL_TREE
;
528 tree diff_type
= TREE_TYPE (range
);
530 gimple_seq seq
= NULL
;
531 bool chunking
= false, striding
= true;
532 unsigned outer_mask
= mask
& (~mask
+ 1); // Outermost partitioning
533 unsigned inner_mask
= mask
& ~outer_mask
; // Inner partitioning (if any)
535 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
538 gsi_replace_with_seq (&gsi
, seq
, true);
542 type
= TREE_TYPE (lhs
);
544 #ifdef ACCEL_COMPILER
545 chunk_size
= gimple_call_arg (call
, 4);
546 if (integer_minus_onep (chunk_size
) /* Force static allocation. */
547 || integer_zerop (chunk_size
)) /* Default (also static). */
549 /* If we're at the gang level, we want each to execute a
550 contiguous run of iterations. Otherwise we want each element
552 striding
= !(outer_mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
));
557 /* Chunk of size 1 is striding. */
558 striding
= integer_onep (chunk_size
);
559 chunking
= !striding
;
563 /* striding=true, chunking=true
565 striding=true, chunking=false
567 striding=false,chunking=true
568 -> chunks=ceil (range/(chunksize*threads*step))
569 striding=false,chunking=false
570 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
571 push_gimplify_context (true);
575 default: gcc_unreachable ();
577 case IFN_GOACC_LOOP_CHUNKS
:
579 r
= build_int_cst (type
, 1);
583 = (range - dir) / (chunks * step * num_threads) + dir */
584 tree per
= oacc_thread_numbers (false, mask
, &seq
);
585 per
= fold_convert (type
, per
);
586 chunk_size
= fold_convert (type
, chunk_size
);
587 per
= fold_build2 (MULT_EXPR
, type
, per
, chunk_size
);
588 per
= fold_build2 (MULT_EXPR
, type
, per
, step
);
589 r
= build2 (MINUS_EXPR
, type
, range
, dir
);
590 r
= build2 (PLUS_EXPR
, type
, r
, per
);
591 r
= build2 (TRUNC_DIV_EXPR
, type
, r
, per
);
595 case IFN_GOACC_LOOP_STEP
:
597 /* If striding, step by the entire compute volume, otherwise
598 step by the inner volume. */
599 unsigned volume
= striding
? mask
: inner_mask
;
601 r
= oacc_thread_numbers (false, volume
, &seq
);
602 r
= build2 (MULT_EXPR
, type
, fold_convert (type
, r
), step
);
606 case IFN_GOACC_LOOP_OFFSET
:
607 /* Enable vectorization on non-SIMT targets. */
609 && outer_mask
== GOMP_DIM_MASK (GOMP_DIM_VECTOR
)
610 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
612 && (flag_tree_loop_vectorize
613 || !global_options_set
.x_flag_tree_loop_vectorize
))
615 basic_block bb
= gsi_bb (gsi
);
616 class loop
*parent
= bb
->loop_father
;
617 class loop
*body
= parent
->inner
;
619 parent
->force_vectorize
= true;
620 parent
->safelen
= INT_MAX
;
622 /* "Chunking loops" may have inner loops. */
625 body
->force_vectorize
= true;
626 body
->safelen
= INT_MAX
;
629 cfun
->has_force_vectorize_loops
= true;
633 r
= oacc_thread_numbers (true, mask
, &seq
);
634 r
= fold_convert (diff_type
, r
);
638 tree inner_size
= oacc_thread_numbers (false, inner_mask
, &seq
);
639 tree outer_size
= oacc_thread_numbers (false, outer_mask
, &seq
);
640 tree volume
= fold_build2 (MULT_EXPR
, TREE_TYPE (inner_size
),
641 inner_size
, outer_size
);
643 volume
= fold_convert (diff_type
, volume
);
645 chunk_size
= fold_convert (diff_type
, chunk_size
);
648 tree per
= fold_build2 (MULT_EXPR
, diff_type
, volume
, step
);
650 chunk_size
= build2 (MINUS_EXPR
, diff_type
, range
, dir
);
651 chunk_size
= build2 (PLUS_EXPR
, diff_type
, chunk_size
, per
);
652 chunk_size
= build2 (TRUNC_DIV_EXPR
, diff_type
, chunk_size
, per
);
655 tree span
= build2 (MULT_EXPR
, diff_type
, chunk_size
,
656 fold_convert (diff_type
, inner_size
));
657 r
= oacc_thread_numbers (true, outer_mask
, &seq
);
658 r
= fold_convert (diff_type
, r
);
659 r
= build2 (MULT_EXPR
, diff_type
, r
, span
);
661 tree inner
= oacc_thread_numbers (true, inner_mask
, &seq
);
662 inner
= fold_convert (diff_type
, inner
);
663 r
= fold_build2 (PLUS_EXPR
, diff_type
, r
, inner
);
667 tree chunk
= fold_convert (diff_type
, gimple_call_arg (call
, 6));
669 = fold_build2 (MULT_EXPR
, diff_type
, volume
, chunk_size
);
670 per
= build2 (MULT_EXPR
, diff_type
, per
, chunk
);
672 r
= build2 (PLUS_EXPR
, diff_type
, r
, per
);
675 r
= fold_build2 (MULT_EXPR
, diff_type
, r
, step
);
676 if (type
!= diff_type
)
677 r
= fold_convert (type
, r
);
680 case IFN_GOACC_LOOP_BOUND
:
685 tree inner_size
= oacc_thread_numbers (false, inner_mask
, &seq
);
686 tree outer_size
= oacc_thread_numbers (false, outer_mask
, &seq
);
687 tree volume
= fold_build2 (MULT_EXPR
, TREE_TYPE (inner_size
),
688 inner_size
, outer_size
);
690 volume
= fold_convert (diff_type
, volume
);
692 chunk_size
= fold_convert (diff_type
, chunk_size
);
695 tree per
= fold_build2 (MULT_EXPR
, diff_type
, volume
, step
);
697 chunk_size
= build2 (MINUS_EXPR
, diff_type
, range
, dir
);
698 chunk_size
= build2 (PLUS_EXPR
, diff_type
, chunk_size
, per
);
699 chunk_size
= build2 (TRUNC_DIV_EXPR
, diff_type
, chunk_size
, per
);
702 tree span
= build2 (MULT_EXPR
, diff_type
, chunk_size
,
703 fold_convert (diff_type
, inner_size
));
705 r
= fold_build2 (MULT_EXPR
, diff_type
, span
, step
);
707 tree offset
= gimple_call_arg (call
, 6);
708 r
= build2 (PLUS_EXPR
, diff_type
, r
,
709 fold_convert (diff_type
, offset
));
710 r
= build2 (integer_onep (dir
) ? MIN_EXPR
: MAX_EXPR
,
711 diff_type
, r
, range
);
713 if (diff_type
!= type
)
714 r
= fold_convert (type
, r
);
718 gimplify_assign (lhs
, r
, &seq
);
720 pop_gimplify_context (NULL
);
722 gsi_replace_with_seq (&gsi
, seq
, true);
725 /* Transform a GOACC_TILE call. Determines the element loop span for
726 the specified loop of the nest. This is 1 if we're not tiling.
728 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
731 oacc_xform_tile (gcall
*call
)
733 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
734 unsigned collapse
= tree_to_uhwi (gimple_call_arg (call
, 0));
735 /* Inner loops have higher loop_nos. */
736 unsigned loop_no
= tree_to_uhwi (gimple_call_arg (call
, 1));
737 tree tile_size
= gimple_call_arg (call
, 2);
738 unsigned e_mask
= tree_to_uhwi (gimple_call_arg (call
, 4));
739 tree lhs
= gimple_call_lhs (call
);
740 tree type
= TREE_TYPE (lhs
);
741 gimple_seq seq
= NULL
;
742 tree span
= build_int_cst (type
, 1);
745 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR
)
746 | GOMP_DIM_MASK (GOMP_DIM_WORKER
))));
747 push_gimplify_context (!seen_error ());
749 #ifndef ACCEL_COMPILER
750 /* Partitioning disabled on host compilers. */
754 /* Not paritioning. */
755 span
= integer_one_node
;
756 else if (!integer_zerop (tile_size
))
757 /* User explicitly specified size. */
761 /* Pick a size based on the paritioning of the element loop and
762 the number of loop nests. */
763 tree first_size
= NULL_TREE
;
764 tree second_size
= NULL_TREE
;
766 if (e_mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
))
767 first_size
= oacc_dim_call (false, GOMP_DIM_VECTOR
, &seq
);
768 if (e_mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
))
769 second_size
= oacc_dim_call (false, GOMP_DIM_WORKER
, &seq
);
773 first_size
= second_size
;
774 second_size
= NULL_TREE
;
777 if (loop_no
+ 1 == collapse
)
780 if (!loop_no
&& second_size
)
781 span
= fold_build2 (MULT_EXPR
, TREE_TYPE (span
),
784 else if (loop_no
+ 2 == collapse
)
790 /* There's no obvious element size for this loop. Options
791 are 1, first_size or some non-unity constant (32 is my
792 favourite). We should gather some statistics. */
796 span
= fold_convert (type
, span
);
797 gimplify_assign (lhs
, span
, &seq
);
799 pop_gimplify_context (NULL
);
801 gsi_replace_with_seq (&gsi
, seq
, true);
804 /* Default partitioned and minimum partitioned dimensions. */
806 static int oacc_default_dims
[GOMP_DIM_MAX
];
807 static int oacc_min_dims
[GOMP_DIM_MAX
];
810 oacc_get_default_dim (int dim
)
812 gcc_assert (0 <= dim
&& dim
< GOMP_DIM_MAX
);
813 return oacc_default_dims
[dim
];
817 oacc_get_min_dim (int dim
)
819 gcc_assert (0 <= dim
&& dim
< GOMP_DIM_MAX
);
820 return oacc_min_dims
[dim
];
823 /* Parse the default dimension parameter. This is a set of
824 :-separated optional compute dimensions. Each specified dimension
825 is a positive integer. When device type support is added, it is
826 planned to be a comma separated list of such compute dimensions,
827 with all but the first prefixed by the colon-terminated device
831 oacc_parse_default_dims (const char *dims
)
835 for (ix
= GOMP_DIM_MAX
; ix
--;)
837 oacc_default_dims
[ix
] = -1;
838 oacc_min_dims
[ix
] = 1;
841 #ifndef ACCEL_COMPILER
842 /* Cannot be overridden on the host. */
847 const char *pos
= dims
;
849 for (ix
= 0; *pos
&& ix
!= GOMP_DIM_MAX
; ix
++)
864 val
= strtol (pos
, CONST_CAST (char **, &eptr
), 10);
865 if (errno
|| val
<= 0 || (int) val
!= val
)
868 oacc_default_dims
[ix
] = (int) val
;
874 error_at (UNKNOWN_LOCATION
,
875 "%<-fopenacc-dim%> operand is malformed at %qs", pos
);
879 /* Allow the backend to validate the dimensions. */
880 targetm
.goacc
.validate_dims (NULL_TREE
, oacc_default_dims
, -1, 0);
881 targetm
.goacc
.validate_dims (NULL_TREE
, oacc_min_dims
, -2, 0);
884 /* Validate and update the dimensions for offloaded FN. ATTRS is the
885 raw attribute. DIMS is an array of dimensions, which is filled in.
886 LEVEL is the partitioning level of a routine, or -1 for an offload
887 region itself. USED is the mask of partitioned execution in the
891 oacc_validate_dims (tree fn
, tree attrs
, int *dims
, int level
, unsigned used
)
893 tree purpose
[GOMP_DIM_MAX
];
895 tree pos
= TREE_VALUE (attrs
);
897 /* Make sure the attribute creator attached the dimension
901 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
903 purpose
[ix
] = TREE_PURPOSE (pos
);
904 tree val
= TREE_VALUE (pos
);
905 dims
[ix
] = val
? TREE_INT_CST_LOW (val
) : -1;
906 pos
= TREE_CHAIN (pos
);
909 bool changed
= targetm
.goacc
.validate_dims (fn
, dims
, level
, used
);
911 /* Default anything left to 1 or a partitioned default. */
912 for (ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
915 /* The OpenACC spec says 'If the [num_gangs] clause is not
916 specified, an implementation-defined default will be used;
917 the default may depend on the code within the construct.'
918 (2.5.6). Thus an implementation is free to choose
919 non-unity default for a parallel region that doesn't have
920 any gang-partitioned loops. However, it appears that there
921 is a sufficient body of user code that expects non-gang
922 partitioned regions to not execute in gang-redundant mode.
923 So we (a) don't warn about the non-portability and (b) pick
924 the minimum permissible dimension size when there is no
925 partitioned execution. Otherwise we pick the global
926 default for the dimension, which the user can control. The
927 same wording and logic applies to num_workers and
928 vector_length, however the worker- or vector- single
929 execution doesn't have the same impact as gang-redundant
930 execution. (If the minimum gang-level partioning is not 1,
931 the target is probably too confusing.) */
932 dims
[ix
] = (used
& GOMP_DIM_MASK (ix
)
933 ? oacc_default_dims
[ix
] : oacc_min_dims
[ix
]);
939 /* Replace the attribute with new values. */
941 for (ix
= GOMP_DIM_MAX
; ix
--;)
942 pos
= tree_cons (purpose
[ix
],
943 build_int_cst (integer_type_node
, dims
[ix
]), pos
);
944 oacc_replace_fn_attrib (fn
, pos
);
948 /* Create an empty OpenACC loop structure at LOC. */
951 new_oacc_loop_raw (oacc_loop
*parent
, location_t loc
)
953 oacc_loop
*loop
= XCNEW (oacc_loop
);
955 loop
->parent
= parent
;
959 loop
->sibling
= parent
->child
;
960 parent
->child
= loop
;
967 /* Create an outermost, dummy OpenACC loop for offloaded function
971 new_oacc_loop_outer (tree decl
)
973 return new_oacc_loop_raw (NULL
, DECL_SOURCE_LOCATION (decl
));
976 /* Start a new OpenACC loop structure beginning at head marker HEAD.
977 Link into PARENT loop. Return the new loop. */
980 new_oacc_loop (oacc_loop
*parent
, gcall
*marker
)
982 oacc_loop
*loop
= new_oacc_loop_raw (parent
, gimple_location (marker
));
984 loop
->marker
= marker
;
986 /* TODO: This is where device_type flattening would occur for the loop
989 loop
->flags
= TREE_INT_CST_LOW (gimple_call_arg (marker
, 3));
991 tree chunk_size
= integer_zero_node
;
992 if (loop
->flags
& OLF_GANG_STATIC
)
993 chunk_size
= gimple_call_arg (marker
, 4);
994 loop
->chunk_size
= chunk_size
;
999 /* Create a dummy loop encompassing a call to a openACC routine.
1000 Extract the routine's partitioning requirements. */
1003 new_oacc_loop_routine (oacc_loop
*parent
, gcall
*call
, tree decl
, tree attrs
)
1005 oacc_loop
*loop
= new_oacc_loop_raw (parent
, gimple_location (call
));
1006 int level
= oacc_fn_attrib_level (attrs
);
1008 gcc_assert (level
>= 0);
1010 loop
->marker
= call
;
1011 loop
->routine
= decl
;
1012 loop
->mask
= ((GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1)
1013 ^ (GOMP_DIM_MASK (level
) - 1));
1016 /* Finish off the current OpenACC loop ending at tail marker TAIL.
1017 Return the parent loop. */
1020 finish_oacc_loop (oacc_loop
*loop
)
1022 /* If the loop has been collapsed, don't partition it. */
1023 if (loop
->ifns
.is_empty ())
1024 loop
->mask
= loop
->flags
= 0;
1025 return loop
->parent
;
1028 /* Free all OpenACC loop structures within LOOP (inclusive). */
1031 free_oacc_loop (oacc_loop
*loop
)
1034 free_oacc_loop (loop
->sibling
);
1036 free_oacc_loop (loop
->child
);
1038 loop
->ifns
.release ();
1042 /* Dump out the OpenACC loop head or tail beginning at FROM. */
1045 dump_oacc_loop_part (FILE *file
, gcall
*from
, int depth
,
1046 const char *title
, int level
)
1048 enum ifn_unique_kind kind
1049 = (enum ifn_unique_kind
) TREE_INT_CST_LOW (gimple_call_arg (from
, 0));
1051 fprintf (file
, "%*s%s-%d:\n", depth
* 2, "", title
, level
);
1052 for (gimple_stmt_iterator gsi
= gsi_for_stmt (from
);;)
1054 gimple
*stmt
= gsi_stmt (gsi
);
1056 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
1058 enum ifn_unique_kind k
1059 = ((enum ifn_unique_kind
) TREE_INT_CST_LOW
1060 (gimple_call_arg (stmt
, 0)));
1062 if (k
== kind
&& stmt
!= from
)
1065 print_gimple_stmt (file
, stmt
, depth
* 2 + 2);
1068 while (gsi_end_p (gsi
))
1069 gsi
= gsi_start_bb (single_succ (gsi_bb (gsi
)));
1073 /* Dump OpenACC loop LOOP, its children, and its siblings. */
1076 dump_oacc_loop (FILE *file
, oacc_loop
*loop
, int depth
)
1080 fprintf (file
, "%*sLoop %x(%x) %s:%u\n", depth
* 2, "",
1081 loop
->flags
, loop
->mask
,
1082 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
));
1085 print_gimple_stmt (file
, loop
->marker
, depth
* 2);
1088 fprintf (file
, "%*sRoutine %s:%u:%s\n",
1089 depth
* 2, "", DECL_SOURCE_FILE (loop
->routine
),
1090 DECL_SOURCE_LINE (loop
->routine
),
1091 IDENTIFIER_POINTER (DECL_NAME (loop
->routine
)));
1093 for (ix
= GOMP_DIM_GANG
; ix
!= GOMP_DIM_MAX
; ix
++)
1094 if (loop
->heads
[ix
])
1095 dump_oacc_loop_part (file
, loop
->heads
[ix
], depth
, "Head", ix
);
1096 for (ix
= GOMP_DIM_MAX
; ix
--;)
1097 if (loop
->tails
[ix
])
1098 dump_oacc_loop_part (file
, loop
->tails
[ix
], depth
, "Tail", ix
);
1101 dump_oacc_loop (file
, loop
->child
, depth
+ 1);
1103 dump_oacc_loop (file
, loop
->sibling
, depth
);
1106 void debug_oacc_loop (oacc_loop
*);
1108 /* Dump loops to stderr. */
1111 debug_oacc_loop (oacc_loop
*loop
)
1113 dump_oacc_loop (stderr
, loop
, 0);
1116 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
1120 inform_oacc_loop (const oacc_loop
*loop
)
1123 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
) ? " gang" : "";
1125 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
) ? " worker" : "";
1127 = loop
->mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
) ? " vector" : "";
1128 const char *seq
= loop
->mask
== 0 ? " seq" : "";
1129 const dump_user_location_t loc
1130 = dump_user_location_t::from_location_t (loop
->loc
);
1131 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, loc
,
1132 "assigned OpenACC%s%s%s%s loop parallelism\n", gang
, worker
,
1136 inform_oacc_loop (loop
->child
);
1138 inform_oacc_loop (loop
->sibling
);
1141 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
1142 structures as we go. By construction these loops are properly
1146 oacc_loop_discover_walk (oacc_loop
*loop
, basic_block bb
)
1151 if (bb
->flags
& BB_VISITED
)
1155 bb
->flags
|= BB_VISITED
;
1157 /* Scan for loop markers. */
1158 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
1161 gimple
*stmt
= gsi_stmt (gsi
);
1163 if (!is_gimple_call (stmt
))
1166 gcall
*call
= as_a
<gcall
*> (stmt
);
1168 /* If this is a routine, make a dummy loop for it. */
1169 if (tree decl
= gimple_call_fndecl (call
))
1170 if (tree attrs
= oacc_get_fn_attrib (decl
))
1172 gcc_assert (!marker
);
1173 new_oacc_loop_routine (loop
, call
, decl
, attrs
);
1176 if (!gimple_call_internal_p (call
))
1179 switch (gimple_call_internal_fn (call
))
1184 case IFN_GOACC_LOOP
:
1185 case IFN_GOACC_TILE
:
1186 /* Record the abstraction function, so we can manipulate it
1188 loop
->ifns
.safe_push (call
);
1192 enum ifn_unique_kind kind
1193 = (enum ifn_unique_kind
) (TREE_INT_CST_LOW
1194 (gimple_call_arg (call
, 0)));
1195 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
1196 || kind
== IFN_UNIQUE_OACC_TAIL_MARK
)
1198 if (gimple_call_num_args (call
) == 2)
1200 gcc_assert (marker
&& !remaining
);
1202 if (kind
== IFN_UNIQUE_OACC_TAIL_MARK
)
1203 loop
= finish_oacc_loop (loop
);
1205 loop
->head_end
= call
;
1209 int count
= TREE_INT_CST_LOW (gimple_call_arg (call
, 2));
1213 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
)
1214 loop
= new_oacc_loop (loop
, call
);
1217 gcc_assert (count
== remaining
);
1221 if (kind
== IFN_UNIQUE_OACC_HEAD_MARK
)
1222 loop
->heads
[marker
] = call
;
1224 loop
->tails
[remaining
] = call
;
1231 if (remaining
|| marker
)
1233 bb
= single_succ (bb
);
1234 gcc_assert (single_pred_p (bb
) && !(bb
->flags
& BB_VISITED
));
1238 /* Walk successor blocks. */
1242 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1243 oacc_loop_discover_walk (loop
, e
->dest
);
1246 /* LOOP is the first sibling. Reverse the order in place and return
1247 the new first sibling. Recurse to child loops. */
1250 oacc_loop_sibling_nreverse (oacc_loop
*loop
)
1252 oacc_loop
*last
= NULL
;
1256 loop
->child
= oacc_loop_sibling_nreverse (loop
->child
);
1258 oacc_loop
*next
= loop
->sibling
;
1259 loop
->sibling
= last
;
1268 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1269 the current function. */
1272 oacc_loop_discovery ()
1274 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1275 in the following. */
1278 oacc_loop
*top
= new_oacc_loop_outer (current_function_decl
);
1279 oacc_loop_discover_walk (top
, ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1281 /* The siblings were constructed in reverse order, reverse them so
1282 that diagnostics come out in an unsurprising order. */
1283 top
= oacc_loop_sibling_nreverse (top
);
1288 /* Transform the abstract internal function markers starting at FROM
1289 to be for partitioning level LEVEL. Stop when we meet another HEAD
1293 oacc_loop_xform_head_tail (gcall
*from
, int level
)
1295 enum ifn_unique_kind kind
1296 = (enum ifn_unique_kind
) TREE_INT_CST_LOW (gimple_call_arg (from
, 0));
1297 tree replacement
= build_int_cst (unsigned_type_node
, level
);
1299 for (gimple_stmt_iterator gsi
= gsi_for_stmt (from
);;)
1301 gimple
*stmt
= gsi_stmt (gsi
);
1303 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
))
1305 enum ifn_unique_kind k
1306 = ((enum ifn_unique_kind
)
1307 TREE_INT_CST_LOW (gimple_call_arg (stmt
, 0)));
1309 if (k
== IFN_UNIQUE_OACC_FORK
|| k
== IFN_UNIQUE_OACC_JOIN
)
1310 *gimple_call_arg_ptr (stmt
, 2) = replacement
;
1311 else if (k
== kind
&& stmt
!= from
)
1314 else if (gimple_call_internal_p (stmt
, IFN_GOACC_REDUCTION
))
1315 *gimple_call_arg_ptr (stmt
, 3) = replacement
;
1318 while (gsi_end_p (gsi
))
1319 gsi
= gsi_start_bb (single_succ (gsi_bb (gsi
)));
1323 /* Process the discovered OpenACC loops, setting the correct
1324 partitioning level etc. */
1327 oacc_loop_process (oacc_loop
*loop
)
1330 oacc_loop_process (loop
->child
);
1332 if (loop
->mask
&& !loop
->routine
)
1335 tree mask_arg
= build_int_cst (unsigned_type_node
, loop
->mask
);
1336 tree e_mask_arg
= build_int_cst (unsigned_type_node
, loop
->e_mask
);
1337 tree chunk_arg
= loop
->chunk_size
;
1340 for (ix
= 0; loop
->ifns
.iterate (ix
, &call
); ix
++)
1341 switch (gimple_call_internal_fn (call
))
1343 case IFN_GOACC_LOOP
:
1345 bool is_e
= gimple_call_arg (call
, 5) == integer_minus_one_node
;
1346 gimple_call_set_arg (call
, 5, is_e
? e_mask_arg
: mask_arg
);
1348 gimple_call_set_arg (call
, 4, chunk_arg
);
1352 case IFN_GOACC_TILE
:
1353 gimple_call_set_arg (call
, 3, mask_arg
);
1354 gimple_call_set_arg (call
, 4, e_mask_arg
);
1361 unsigned dim
= GOMP_DIM_GANG
;
1362 unsigned mask
= loop
->mask
| loop
->e_mask
;
1363 for (ix
= 0; ix
!= GOMP_DIM_MAX
&& mask
; ix
++)
1365 while (!(GOMP_DIM_MASK (dim
) & mask
))
1368 oacc_loop_xform_head_tail (loop
->heads
[ix
], dim
);
1369 oacc_loop_xform_head_tail (loop
->tails
[ix
], dim
);
1371 mask
^= GOMP_DIM_MASK (dim
);
1376 oacc_loop_process (loop
->sibling
);
1379 /* Walk the OpenACC loop heirarchy checking and assigning the
1380 programmer-specified partitionings. OUTER_MASK is the partitioning
1381 this loop is contained within. Return mask of partitioning
1382 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1386 oacc_loop_fixed_partitions (oacc_loop
*loop
, unsigned outer_mask
)
1388 unsigned this_mask
= loop
->mask
;
1389 unsigned mask_all
= 0;
1392 #ifdef ACCEL_COMPILER
1393 /* When device_type is supported, we want the device compiler to be
1394 noisy, if the loop parameters are device_type-specific. */
1400 bool auto_par
= (loop
->flags
& OLF_AUTO
) != 0;
1401 bool seq_par
= (loop
->flags
& OLF_SEQ
) != 0;
1402 bool tiling
= (loop
->flags
& OLF_TILE
) != 0;
1404 this_mask
= ((loop
->flags
>> OLF_DIM_BASE
)
1405 & (GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1));
1407 /* Apply auto partitioning if this is a non-partitioned regular
1408 loop, or (no more than) single axis tiled loop. */
1410 = !seq_par
&& this_mask
== (tiling
? this_mask
& -this_mask
: 0);
1412 if ((this_mask
!= 0) + auto_par
+ seq_par
> 1)
1415 error_at (loop
->loc
,
1417 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1418 : G_("%<auto%> conflicts with other OpenACC loop "
1421 loop
->flags
&= ~OLF_AUTO
;
1425 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX
) - 1) << OLF_DIM_BASE
);
1430 if (maybe_auto
&& (loop
->flags
& OLF_INDEPENDENT
))
1432 loop
->flags
|= OLF_AUTO
;
1433 mask_all
|= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1437 if (this_mask
& outer_mask
)
1439 const oacc_loop
*outer
;
1440 for (outer
= loop
->parent
; outer
; outer
= outer
->parent
)
1441 if ((outer
->mask
| outer
->e_mask
) & this_mask
)
1448 error_at (loop
->loc
,
1450 ? G_("routine call uses same OpenACC parallelism"
1451 " as containing loop")
1452 : G_("inner loop uses same OpenACC parallelism"
1453 " as containing loop"));
1454 inform (outer
->loc
, "containing loop here");
1457 error_at (loop
->loc
,
1459 ? G_("routine call uses OpenACC parallelism disallowed"
1460 " by containing routine")
1461 : G_("loop uses OpenACC parallelism disallowed"
1462 " by containing routine"));
1465 inform (DECL_SOURCE_LOCATION (loop
->routine
),
1466 "routine %qD declared here", loop
->routine
);
1468 this_mask
&= ~outer_mask
;
1472 unsigned outermost
= least_bit_hwi (this_mask
);
1474 if (outermost
&& outermost
<= outer_mask
)
1478 error_at (loop
->loc
,
1479 "incorrectly nested OpenACC loop parallelism");
1481 const oacc_loop
*outer
;
1482 for (outer
= loop
->parent
;
1483 outer
->flags
&& outer
->flags
< outermost
;
1484 outer
= outer
->parent
)
1486 inform (outer
->loc
, "containing loop here");
1489 this_mask
&= ~outermost
;
1493 mask_all
|= this_mask
;
1495 if (loop
->flags
& OLF_TILE
)
1497 /* When tiling, vector goes to the element loop, and failing
1498 that we put worker there. The std doesn't contemplate
1499 specifying all three. We choose to put worker and vector on
1500 the element loops in that case. */
1501 unsigned this_e_mask
= this_mask
& GOMP_DIM_MASK (GOMP_DIM_VECTOR
);
1502 if (!this_e_mask
|| this_mask
& GOMP_DIM_MASK (GOMP_DIM_GANG
))
1503 this_e_mask
|= this_mask
& GOMP_DIM_MASK (GOMP_DIM_WORKER
);
1505 loop
->e_mask
= this_e_mask
;
1506 this_mask
^= this_e_mask
;
1509 loop
->mask
= this_mask
;
1512 fprintf (dump_file
, "Loop %s:%d user specified %d & %d\n",
1513 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
),
1514 loop
->mask
, loop
->e_mask
);
1518 unsigned tmp_mask
= outer_mask
| this_mask
| loop
->e_mask
;
1519 loop
->inner
= oacc_loop_fixed_partitions (loop
->child
, tmp_mask
);
1520 mask_all
|= loop
->inner
;
1524 mask_all
|= oacc_loop_fixed_partitions (loop
->sibling
, outer_mask
);
1529 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1530 OUTER_MASK is the partitioning this loop is contained within.
1531 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1532 Return the cumulative partitioning used by this loop, siblings and
1536 oacc_loop_auto_partitions (oacc_loop
*loop
, unsigned outer_mask
,
1539 bool assign
= (loop
->flags
& OLF_AUTO
) && (loop
->flags
& OLF_INDEPENDENT
);
1541 bool tiling
= loop
->flags
& OLF_TILE
;
1543 #ifdef ACCEL_COMPILER
1544 /* When device_type is supported, we want the device compiler to be
1545 noisy, if the loop parameters are device_type-specific. */
1549 if (assign
&& (!outer_assign
|| loop
->inner
))
1551 /* Allocate outermost and non-innermost loops at the outermost
1552 non-innermost available level. */
1553 unsigned this_mask
= GOMP_DIM_MASK (GOMP_DIM_GANG
);
1555 /* Find the first outermost available partition. */
1556 while (this_mask
<= outer_mask
)
1559 /* Grab two axes if tiling, and we've not assigned anything */
1560 if (tiling
&& !(loop
->mask
| loop
->e_mask
))
1561 this_mask
|= this_mask
<< 1;
1563 /* Prohibit the innermost partitioning at the moment. */
1564 this_mask
&= GOMP_DIM_MASK (GOMP_DIM_MAX
- 1) - 1;
1566 /* Don't use any dimension explicitly claimed by an inner loop. */
1567 this_mask
&= ~loop
->inner
;
1569 if (tiling
&& !loop
->e_mask
)
1571 /* If we got two axes, allocate the inner one to the element
1573 loop
->e_mask
= this_mask
& (this_mask
<< 1);
1574 this_mask
^= loop
->e_mask
;
1577 loop
->mask
|= this_mask
;
1582 unsigned tmp_mask
= outer_mask
| loop
->mask
| loop
->e_mask
;
1583 loop
->inner
= oacc_loop_auto_partitions (loop
->child
, tmp_mask
,
1584 outer_assign
| assign
);
1587 if (assign
&& (!loop
->mask
|| (tiling
&& !loop
->e_mask
) || !outer_assign
))
1589 /* Allocate the loop at the innermost available level. Note
1590 that we do this even if we already assigned this loop the
1591 outermost available level above. That way we'll partition
1592 this along 2 axes, if they are available. */
1593 unsigned this_mask
= 0;
1595 /* Determine the outermost partitioning used within this loop. */
1596 this_mask
= loop
->inner
| GOMP_DIM_MASK (GOMP_DIM_MAX
);
1597 this_mask
= least_bit_hwi (this_mask
);
1599 /* Pick the partitioning just inside that one. */
1602 /* And avoid picking one use by an outer loop. */
1603 this_mask
&= ~outer_mask
;
1605 /* If tiling and we failed completely above, grab the next one
1606 too. Making sure it doesn't hit an outer loop. */
1609 this_mask
&= ~(loop
->e_mask
| loop
->mask
);
1610 unsigned tile_mask
= ((this_mask
>> 1)
1611 & ~(outer_mask
| loop
->e_mask
| loop
->mask
));
1613 if (tile_mask
|| loop
->mask
)
1615 loop
->e_mask
|= this_mask
;
1616 this_mask
= tile_mask
;
1618 if (!loop
->e_mask
&& noisy
)
1619 warning_at (loop
->loc
, 0,
1620 "insufficient partitioning available"
1621 " to parallelize element loop");
1624 loop
->mask
|= this_mask
;
1625 if (!loop
->mask
&& noisy
)
1626 warning_at (loop
->loc
, 0,
1628 ? G_("insufficient partitioning available"
1629 " to parallelize tile loop")
1630 : G_("insufficient partitioning available"
1631 " to parallelize loop"));
1634 if (assign
&& dump_file
)
1635 fprintf (dump_file
, "Auto loop %s:%d assigned %d & %d\n",
1636 LOCATION_FILE (loop
->loc
), LOCATION_LINE (loop
->loc
),
1637 loop
->mask
, loop
->e_mask
);
1639 unsigned inner_mask
= 0;
1642 inner_mask
|= oacc_loop_auto_partitions (loop
->sibling
,
1643 outer_mask
, outer_assign
);
1645 inner_mask
|= loop
->inner
| loop
->mask
| loop
->e_mask
;
1650 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1651 axes. Return mask of partitioning. */
1654 oacc_loop_partition (oacc_loop
*loop
, unsigned outer_mask
)
1656 unsigned mask_all
= oacc_loop_fixed_partitions (loop
, outer_mask
);
1658 if (mask_all
& GOMP_DIM_MASK (GOMP_DIM_MAX
))
1660 mask_all
^= GOMP_DIM_MASK (GOMP_DIM_MAX
);
1661 mask_all
|= oacc_loop_auto_partitions (loop
, outer_mask
, false);
1666 /* Default fork/join early expander. Delete the function calls if
1667 there is no RTL expander. */
1670 default_goacc_fork_join (gcall
*ARG_UNUSED (call
),
1671 const int *ARG_UNUSED (dims
), bool is_fork
)
1674 return targetm
.have_oacc_fork ();
1676 return targetm
.have_oacc_join ();
1679 /* Default goacc.reduction early expander.
1681 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1682 If RES_PTR is not integer-zerop:
1683 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1684 TEARDOWN - emit '*RES_PTR = VAR'
1689 default_goacc_reduction (gcall
*call
)
1691 unsigned code
= (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call
, 0));
1692 gimple_stmt_iterator gsi
= gsi_for_stmt (call
);
1693 tree lhs
= gimple_call_lhs (call
);
1694 tree var
= gimple_call_arg (call
, 2);
1695 gimple_seq seq
= NULL
;
1697 if (code
== IFN_GOACC_REDUCTION_SETUP
1698 || code
== IFN_GOACC_REDUCTION_TEARDOWN
)
1700 /* Setup and Teardown need to copy from/to the receiver object,
1702 tree ref_to_res
= gimple_call_arg (call
, 1);
1704 if (!integer_zerop (ref_to_res
))
1706 tree dst
= build_simple_mem_ref (ref_to_res
);
1709 if (code
== IFN_GOACC_REDUCTION_SETUP
)
1715 gimple_seq_add_stmt (&seq
, gimple_build_assign (dst
, src
));
1719 /* Copy VAR to LHS, if there is an LHS. */
1721 gimple_seq_add_stmt (&seq
, gimple_build_assign (lhs
, var
));
1723 gsi_replace_with_seq (&gsi
, seq
, true);
1726 /* Main entry point for oacc transformations which run on the device
1727 compiler after LTO, so we know what the target device is at this
1728 point (including the host fallback). */
1731 execute_oacc_device_lower ()
1733 tree attrs
= oacc_get_fn_attrib (current_function_decl
);
1736 /* Not an offloaded function. */
1739 /* Parse the default dim argument exactly once. */
1740 if ((const void *)flag_openacc_dims
!= &flag_openacc_dims
)
1742 oacc_parse_default_dims (flag_openacc_dims
);
1743 flag_openacc_dims
= (char *)&flag_openacc_dims
;
1746 bool is_oacc_kernels
1747 = (lookup_attribute ("oacc kernels",
1748 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1749 bool is_oacc_kernels_parallelized
1750 = (lookup_attribute ("oacc kernels parallelized",
1751 DECL_ATTRIBUTES (current_function_decl
)) != NULL
);
1753 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1754 kernels, so remove the parallelism dimensions function attributes
1755 potentially set earlier on. */
1756 if (is_oacc_kernels
&& !is_oacc_kernels_parallelized
)
1758 oacc_set_fn_attrib (current_function_decl
, NULL
, NULL
);
1759 attrs
= oacc_get_fn_attrib (current_function_decl
);
1762 /* Discover, partition and process the loops. */
1763 oacc_loop
*loops
= oacc_loop_discovery ();
1764 int fn_level
= oacc_fn_attrib_level (attrs
);
1769 fprintf (dump_file
, "Function is OpenACC routine level %d\n",
1771 else if (is_oacc_kernels
)
1772 fprintf (dump_file
, "Function is %s OpenACC kernels offload\n",
1773 (is_oacc_kernels_parallelized
1774 ? "parallelized" : "unparallelized"));
1776 fprintf (dump_file
, "Function is OpenACC parallel offload\n");
1779 unsigned outer_mask
= fn_level
>= 0 ? GOMP_DIM_MASK (fn_level
) - 1 : 0;
1780 unsigned used_mask
= oacc_loop_partition (loops
, outer_mask
);
1781 /* OpenACC kernels constructs are special: they currently don't use the
1782 generic oacc_loop infrastructure and attribute/dimension processing. */
1783 if (is_oacc_kernels
&& is_oacc_kernels_parallelized
)
1785 /* Parallelized OpenACC kernels constructs use gang parallelism. See
1786 also tree-parloops.c:create_parallel_loop. */
1787 used_mask
|= GOMP_DIM_MASK (GOMP_DIM_GANG
);
1790 int dims
[GOMP_DIM_MAX
];
1791 oacc_validate_dims (current_function_decl
, attrs
, dims
, fn_level
, used_mask
);
1795 const char *comma
= "Compute dimensions [";
1796 for (int ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++, comma
= ", ")
1797 fprintf (dump_file
, "%s%d", comma
, dims
[ix
]);
1798 fprintf (dump_file
, "]\n");
1801 oacc_loop_process (loops
);
1804 fprintf (dump_file
, "OpenACC loops\n");
1805 dump_oacc_loop (dump_file
, loops
, 0);
1806 fprintf (dump_file
, "\n");
1808 if (dump_enabled_p ())
1810 oacc_loop
*l
= loops
;
1811 /* OpenACC kernels constructs are special: they currently don't use the
1812 generic oacc_loop infrastructure. */
1813 if (is_oacc_kernels
)
1815 /* Create a fake oacc_loop for diagnostic purposes. */
1816 l
= new_oacc_loop_raw (NULL
,
1817 DECL_SOURCE_LOCATION (current_function_decl
));
1818 l
->mask
= used_mask
;
1822 /* Skip the outermost, dummy OpenACC loop */
1826 inform_oacc_loop (l
);
1827 if (is_oacc_kernels
)
1831 /* Offloaded targets may introduce new basic blocks, which require
1832 dominance information to update SSA. */
1833 calculate_dominance_info (CDI_DOMINATORS
);
1835 /* Now lower internal loop functions to target-specific code
1838 FOR_ALL_BB_FN (bb
, cfun
)
1839 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);)
1841 gimple
*stmt
= gsi_stmt (gsi
);
1842 if (!is_gimple_call (stmt
))
1848 gcall
*call
= as_a
<gcall
*> (stmt
);
1849 if (!gimple_call_internal_p (call
))
1855 /* Rewind to allow rescan. */
1857 bool rescan
= false, remove
= false;
1858 enum internal_fn ifn_code
= gimple_call_internal_fn (call
);
1864 case IFN_GOACC_TILE
:
1865 oacc_xform_tile (call
);
1869 case IFN_GOACC_LOOP
:
1870 oacc_xform_loop (call
);
1874 case IFN_GOACC_REDUCTION
:
1875 /* Mark the function for SSA renaming. */
1876 mark_virtual_operands_for_renaming (cfun
);
1878 /* If the level is -1, this ended up being an unused
1879 axis. Handle as a default. */
1880 if (integer_minus_onep (gimple_call_arg (call
, 3)))
1881 default_goacc_reduction (call
);
1883 targetm
.goacc
.reduction (call
);
1889 enum ifn_unique_kind kind
1890 = ((enum ifn_unique_kind
)
1891 TREE_INT_CST_LOW (gimple_call_arg (call
, 0)));
1898 case IFN_UNIQUE_OACC_FORK
:
1899 case IFN_UNIQUE_OACC_JOIN
:
1900 if (integer_minus_onep (gimple_call_arg (call
, 2)))
1902 else if (!targetm
.goacc
.fork_join
1903 (call
, dims
, kind
== IFN_UNIQUE_OACC_FORK
))
1907 case IFN_UNIQUE_OACC_HEAD_MARK
:
1908 case IFN_UNIQUE_OACC_TAIL_MARK
:
1916 if (gsi_end_p (gsi
))
1917 /* We rewound past the beginning of the BB. */
1918 gsi
= gsi_start_bb (bb
);
1920 /* Undo the rewind. */
1925 if (gimple_vdef (call
))
1926 replace_uses_by (gimple_vdef (call
), gimple_vuse (call
));
1927 if (gimple_call_lhs (call
))
1929 /* Propagate the data dependency var. */
1930 gimple
*ass
= gimple_build_assign (gimple_call_lhs (call
),
1931 gimple_call_arg (call
, 1));
1932 gsi_replace (&gsi
, ass
, false);
1935 gsi_remove (&gsi
, true);
1938 /* If not rescanning, advance over the call. */
1942 free_oacc_loop (loops
);
1947 /* Default launch dimension validator. Force everything to 1. A
1948 backend that wants to provide larger dimensions must override this
1952 default_goacc_validate_dims (tree
ARG_UNUSED (decl
), int *dims
,
1953 int ARG_UNUSED (fn_level
),
1954 unsigned ARG_UNUSED (used
))
1956 bool changed
= false;
1958 for (unsigned ix
= 0; ix
!= GOMP_DIM_MAX
; ix
++)
1970 /* Default dimension bound is unknown on accelerator and 1 on host. */
1973 default_goacc_dim_limit (int ARG_UNUSED (axis
))
1975 #ifdef ACCEL_COMPILER
1984 const pass_data pass_data_oacc_device_lower
=
1986 GIMPLE_PASS
, /* type */
1987 "oaccdevlow", /* name */
1988 OPTGROUP_OMP
, /* optinfo_flags */
1989 TV_NONE
, /* tv_id */
1990 PROP_cfg
, /* properties_required */
1991 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1992 0, /* properties_destroyed */
1993 0, /* todo_flags_start */
1994 TODO_update_ssa
| TODO_cleanup_cfg
, /* todo_flags_finish */
1997 class pass_oacc_device_lower
: public gimple_opt_pass
2000 pass_oacc_device_lower (gcc::context
*ctxt
)
2001 : gimple_opt_pass (pass_data_oacc_device_lower
, ctxt
)
2004 /* opt_pass methods: */
2005 virtual bool gate (function
*) { return flag_openacc
; };
2007 virtual unsigned int execute (function
*)
2009 return execute_oacc_device_lower ();
2012 }; // class pass_oacc_device_lower
2017 make_pass_oacc_device_lower (gcc::context
*ctxt
)
2019 return new pass_oacc_device_lower (ctxt
);
2023 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
2024 GOMP_SIMT_ENTER call identifying the privatized variables, which are
2025 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
2026 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
2029 ompdevlow_adjust_simt_enter (gimple_stmt_iterator
*gsi
, bool *regimplify
)
2031 gimple
*alloc_stmt
= gsi_stmt (*gsi
);
2032 tree simtrec
= gimple_call_lhs (alloc_stmt
);
2033 tree simduid
= gimple_call_arg (alloc_stmt
, 0);
2034 gimple
*enter_stmt
= SSA_NAME_DEF_STMT (simduid
);
2035 gcc_assert (gimple_call_internal_p (enter_stmt
, IFN_GOMP_SIMT_ENTER
));
2036 tree rectype
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2037 TYPE_ARTIFICIAL (rectype
) = TYPE_NAMELESS (rectype
) = 1;
2038 TREE_ADDRESSABLE (rectype
) = 1;
2039 TREE_TYPE (simtrec
) = build_pointer_type (rectype
);
2040 for (unsigned i
= 1; i
< gimple_call_num_args (enter_stmt
); i
++)
2042 tree
*argp
= gimple_call_arg_ptr (enter_stmt
, i
);
2043 if (*argp
== null_pointer_node
)
2045 gcc_assert (TREE_CODE (*argp
) == ADDR_EXPR
2046 && VAR_P (TREE_OPERAND (*argp
, 0)));
2047 tree var
= TREE_OPERAND (*argp
, 0);
2049 tree field
= build_decl (DECL_SOURCE_LOCATION (var
), FIELD_DECL
,
2050 DECL_NAME (var
), TREE_TYPE (var
));
2051 SET_DECL_ALIGN (field
, DECL_ALIGN (var
));
2052 DECL_USER_ALIGN (field
) = DECL_USER_ALIGN (var
);
2053 TREE_THIS_VOLATILE (field
) = TREE_THIS_VOLATILE (var
);
2055 insert_field_into_struct (rectype
, field
);
2057 tree t
= build_simple_mem_ref (simtrec
);
2058 t
= build3 (COMPONENT_REF
, TREE_TYPE (var
), t
, field
, NULL
);
2059 TREE_THIS_VOLATILE (t
) = TREE_THIS_VOLATILE (var
);
2060 SET_DECL_VALUE_EXPR (var
, t
);
2061 DECL_HAS_VALUE_EXPR_P (var
) = 1;
2064 layout_type (rectype
);
2065 tree size
= TYPE_SIZE_UNIT (rectype
);
2066 tree align
= build_int_cst (TREE_TYPE (size
), TYPE_ALIGN_UNIT (rectype
));
2069 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC
, 2, size
, align
);
2070 gimple_call_set_lhs (alloc_stmt
, simtrec
);
2071 gsi_replace (gsi
, alloc_stmt
, false);
2072 gimple_stmt_iterator enter_gsi
= gsi_for_stmt (enter_stmt
);
2073 enter_stmt
= gimple_build_assign (simduid
, gimple_call_arg (enter_stmt
, 0));
2074 gsi_replace (&enter_gsi
, enter_stmt
, false);
2078 if (single_imm_use (simtrec
, &use
, &exit_stmt
))
2080 gcc_assert (gimple_call_internal_p (exit_stmt
, IFN_GOMP_SIMT_EXIT
));
2081 gimple_stmt_iterator exit_gsi
= gsi_for_stmt (exit_stmt
);
2082 tree clobber
= build_clobber (rectype
);
2083 exit_stmt
= gimple_build_assign (build_simple_mem_ref (simtrec
), clobber
);
2084 gsi_insert_before (&exit_gsi
, exit_stmt
, GSI_SAME_STMT
);
2087 gcc_checking_assert (has_zero_uses (simtrec
));
2090 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2093 find_simtpriv_var_op (tree
*tp
, int *walk_subtrees
, void *)
2098 && DECL_HAS_VALUE_EXPR_P (t
)
2099 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t
)))
2107 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2108 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2109 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2110 internal functions on non-SIMT targets, and likewise some SIMD internal
2111 functions on SIMT targets. */
2114 execute_omp_device_lower ()
2116 int vf
= targetm
.simt
.vf
? targetm
.simt
.vf () : 1;
2117 bool regimplify
= false;
2119 gimple_stmt_iterator gsi
;
2120 bool calls_declare_variant_alt
2121 = cgraph_node::get (cfun
->decl
)->calls_declare_variant_alt
;
2122 FOR_EACH_BB_FN (bb
, cfun
)
2123 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2125 gimple
*stmt
= gsi_stmt (gsi
);
2126 if (!is_gimple_call (stmt
))
2128 if (!gimple_call_internal_p (stmt
))
2130 if (calls_declare_variant_alt
)
2131 if (tree fndecl
= gimple_call_fndecl (stmt
))
2133 tree new_fndecl
= omp_resolve_declare_variant (fndecl
);
2134 if (new_fndecl
!= fndecl
)
2136 gimple_call_set_fndecl (stmt
, new_fndecl
);
2142 tree lhs
= gimple_call_lhs (stmt
), rhs
= NULL_TREE
;
2143 tree type
= lhs
? TREE_TYPE (lhs
) : integer_type_node
;
2144 switch (gimple_call_internal_fn (stmt
))
2146 case IFN_GOMP_USE_SIMT
:
2147 rhs
= vf
== 1 ? integer_zero_node
: integer_one_node
;
2149 case IFN_GOMP_SIMT_ENTER
:
2150 rhs
= vf
== 1 ? gimple_call_arg (stmt
, 0) : NULL_TREE
;
2151 goto simtreg_enter_exit
;
2152 case IFN_GOMP_SIMT_ENTER_ALLOC
:
2154 ompdevlow_adjust_simt_enter (&gsi
, ®implify
);
2155 rhs
= vf
== 1 ? null_pointer_node
: NULL_TREE
;
2156 goto simtreg_enter_exit
;
2157 case IFN_GOMP_SIMT_EXIT
:
2161 unlink_stmt_vdef (stmt
);
2163 case IFN_GOMP_SIMT_LANE
:
2164 case IFN_GOMP_SIMT_LAST_LANE
:
2165 rhs
= vf
== 1 ? build_zero_cst (type
) : NULL_TREE
;
2167 case IFN_GOMP_SIMT_VF
:
2168 rhs
= build_int_cst (type
, vf
);
2170 case IFN_GOMP_SIMT_ORDERED_PRED
:
2171 rhs
= vf
== 1 ? integer_zero_node
: NULL_TREE
;
2173 unlink_stmt_vdef (stmt
);
2175 case IFN_GOMP_SIMT_VOTE_ANY
:
2176 case IFN_GOMP_SIMT_XCHG_BFLY
:
2177 case IFN_GOMP_SIMT_XCHG_IDX
:
2178 rhs
= vf
== 1 ? gimple_call_arg (stmt
, 0) : NULL_TREE
;
2180 case IFN_GOMP_SIMD_LANE
:
2181 case IFN_GOMP_SIMD_LAST_LANE
:
2182 rhs
= vf
!= 1 ? build_zero_cst (type
) : NULL_TREE
;
2184 case IFN_GOMP_SIMD_VF
:
2185 rhs
= vf
!= 1 ? build_one_cst (type
) : NULL_TREE
;
2192 stmt
= lhs
? gimple_build_assign (lhs
, rhs
) : gimple_build_nop ();
2193 gsi_replace (&gsi
, stmt
, false);
2196 FOR_EACH_BB_REVERSE_FN (bb
, cfun
)
2197 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
2198 if (walk_gimple_stmt (&gsi
, NULL
, find_simtpriv_var_op
, NULL
))
2200 if (gimple_clobber_p (gsi_stmt (gsi
)))
2201 gsi_remove (&gsi
, true);
2203 gimple_regimplify_operands (gsi_stmt (gsi
), &gsi
);
2206 cfun
->has_force_vectorize_loops
= false;
2212 const pass_data pass_data_omp_device_lower
=
2214 GIMPLE_PASS
, /* type */
2215 "ompdevlow", /* name */
2216 OPTGROUP_OMP
, /* optinfo_flags */
2217 TV_NONE
, /* tv_id */
2218 PROP_cfg
, /* properties_required */
2219 PROP_gimple_lomp_dev
, /* properties_provided */
2220 0, /* properties_destroyed */
2221 0, /* todo_flags_start */
2222 TODO_update_ssa
, /* todo_flags_finish */
2225 class pass_omp_device_lower
: public gimple_opt_pass
2228 pass_omp_device_lower (gcc::context
*ctxt
)
2229 : gimple_opt_pass (pass_data_omp_device_lower
, ctxt
)
2232 /* opt_pass methods: */
2233 virtual bool gate (function
*fun
)
2235 return (!(fun
->curr_properties
& PROP_gimple_lomp_dev
)
2237 && cgraph_node::get (fun
->decl
)->calls_declare_variant_alt
));
2239 virtual unsigned int execute (function
*)
2241 return execute_omp_device_lower ();
2244 }; // class pass_expand_omp_ssa
2249 make_pass_omp_device_lower (gcc::context
*ctxt
)
2251 return new pass_omp_device_lower (ctxt
);
2254 /* "omp declare target link" handling pass. */
2258 const pass_data pass_data_omp_target_link
=
2260 GIMPLE_PASS
, /* type */
2261 "omptargetlink", /* name */
2262 OPTGROUP_OMP
, /* optinfo_flags */
2263 TV_NONE
, /* tv_id */
2264 PROP_ssa
, /* properties_required */
2265 0, /* properties_provided */
2266 0, /* properties_destroyed */
2267 0, /* todo_flags_start */
2268 TODO_update_ssa
, /* todo_flags_finish */
2271 class pass_omp_target_link
: public gimple_opt_pass
2274 pass_omp_target_link (gcc::context
*ctxt
)
2275 : gimple_opt_pass (pass_data_omp_target_link
, ctxt
)
2278 /* opt_pass methods: */
2279 virtual bool gate (function
*fun
)
2281 #ifdef ACCEL_COMPILER
2282 return offloading_function_p (fun
->decl
);
2289 virtual unsigned execute (function
*);
2292 /* Callback for walk_gimple_stmt used to scan for link var operands. */
2295 find_link_var_op (tree
*tp
, int *walk_subtrees
, void *)
2300 && DECL_HAS_VALUE_EXPR_P (t
)
2301 && is_global_var (t
)
2302 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t
)))
2312 pass_omp_target_link::execute (function
*fun
)
2315 FOR_EACH_BB_FN (bb
, fun
)
2317 gimple_stmt_iterator gsi
;
2318 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2319 if (walk_gimple_stmt (&gsi
, NULL
, find_link_var_op
, NULL
))
2320 gimple_regimplify_operands (gsi_stmt (gsi
), &gsi
);
2329 make_pass_omp_target_link (gcc::context
*ctxt
)
2331 return new pass_omp_target_link (ctxt
);