testsuite: remove SPE tests.
[official-gcc.git] / gcc / omp-offload.c
blob32c2485abd422339105bd3deae5b9a21a03e5db6
1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
4 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
41 #include "tree-cfg.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
51 #include "intl.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "cfgloop.h"
55 #include "context.h"
57 /* Describe the OpenACC looping structure of a function. The entire
58 function is held in a 'NULL' loop. */
60 struct oacc_loop
62 oacc_loop *parent; /* Containing loop. */
64 oacc_loop *child; /* First inner loop. */
66 oacc_loop *sibling; /* Next loop within same parent. */
68 location_t loc; /* Location of the loop start. */
70 gcall *marker; /* Initial head marker. */
72 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
73 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
75 tree routine; /* Pseudo-loop enclosing a routine. */
77 unsigned mask; /* Partitioning mask. */
78 unsigned e_mask; /* Partitioning of element loops (when tiling). */
79 unsigned inner; /* Partitioning of inner loops. */
80 unsigned flags; /* Partitioning flags. */
81 vec<gcall *> ifns; /* Contained loop abstraction functions. */
82 tree chunk_size; /* Chunk size. */
83 gcall *head_end; /* Final marker of head sequence. */
86 /* Holds offload tables with decls. */
87 vec<tree, va_gc> *offload_funcs, *offload_vars;
89 /* Return level at which oacc routine may spawn a partitioned loop, or
90 -1 if it is not a routine (i.e. is an offload fn). */
92 int
93 oacc_fn_attrib_level (tree attr)
95 tree pos = TREE_VALUE (attr);
97 if (!TREE_PURPOSE (pos))
98 return -1;
100 int ix = 0;
101 for (ix = 0; ix != GOMP_DIM_MAX;
102 ix++, pos = TREE_CHAIN (pos))
103 if (!integer_zerop (TREE_PURPOSE (pos)))
104 break;
106 return ix;
109 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
110 adds their addresses and sizes to constructor-vector V_CTOR. */
112 static void
113 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
114 vec<constructor_elt, va_gc> *v_ctor)
116 unsigned len = vec_safe_length (v_decls);
117 for (unsigned i = 0; i < len; i++)
119 tree it = (*v_decls)[i];
120 bool is_var = VAR_P (it);
121 bool is_link_var
122 = is_var
123 #ifdef ACCEL_COMPILER
124 && DECL_HAS_VALUE_EXPR_P (it)
125 #endif
126 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
128 /* See also omp_finish_file and output_offload_tables in lto-cgraph.c. */
129 if (!in_lto_p && !symtab_node::get (it))
130 continue;
132 tree size = NULL_TREE;
133 if (is_var)
134 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
136 tree addr;
137 if (!is_link_var)
138 addr = build_fold_addr_expr (it);
139 else
141 #ifdef ACCEL_COMPILER
142 /* For "omp declare target link" vars add address of the pointer to
143 the target table, instead of address of the var. */
144 tree value_expr = DECL_VALUE_EXPR (it);
145 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
146 varpool_node::finalize_decl (link_ptr_decl);
147 addr = build_fold_addr_expr (link_ptr_decl);
148 #else
149 addr = build_fold_addr_expr (it);
150 #endif
152 /* Most significant bit of the size marks "omp declare target link"
153 vars in host and target tables. */
154 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
155 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
156 * BITS_PER_UNIT - 1);
157 size = wide_int_to_tree (const_ptr_type_node, isize);
160 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
161 if (is_var)
162 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
166 /* Return true if DECL is a function for which its references should be
167 analyzed. */
169 static bool
170 omp_declare_target_fn_p (tree decl)
172 return (TREE_CODE (decl) == FUNCTION_DECL
173 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
174 && !lookup_attribute ("omp declare target host",
175 DECL_ATTRIBUTES (decl))
176 && (!flag_openacc
177 || oacc_get_fn_attrib (decl) == NULL_TREE));
180 /* Return true if DECL Is a variable for which its initializer references
181 should be analyzed. */
183 static bool
184 omp_declare_target_var_p (tree decl)
186 return (VAR_P (decl)
187 && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))
188 && !lookup_attribute ("omp declare target link",
189 DECL_ATTRIBUTES (decl)));
192 /* Helper function for omp_discover_implicit_declare_target, called through
193 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
194 declare target to. */
196 static tree
197 omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data)
199 if (TREE_CODE (*tp) == FUNCTION_DECL
200 && !omp_declare_target_fn_p (*tp)
201 && !lookup_attribute ("omp declare target host", DECL_ATTRIBUTES (*tp)))
203 tree id = get_identifier ("omp declare target");
204 if (!DECL_EXTERNAL (*tp) && DECL_SAVED_TREE (*tp))
205 ((vec<tree> *) data)->safe_push (*tp);
206 DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
207 symtab_node *node = symtab_node::get (*tp);
208 if (node != NULL)
210 node->offloadable = 1;
211 if (ENABLE_OFFLOADING)
212 g->have_offload = true;
215 else if (TYPE_P (*tp))
216 *walk_subtrees = 0;
217 /* else if (TREE_CODE (*tp) == OMP_TARGET)
219 if (tree dev = omp_find_clause (OMP_TARGET_CLAUSES (*tp)))
220 if (OMP_DEVICE_ANCESTOR (dev))
221 *walk_subtrees = 0;
222 } */
223 return NULL_TREE;
226 /* Similarly, but ignore references outside of OMP_TARGET regions. */
228 static tree
229 omp_discover_declare_target_fn_r (tree *tp, int *walk_subtrees, void *data)
231 if (TREE_CODE (*tp) == OMP_TARGET)
233 /* And not OMP_DEVICE_ANCESTOR. */
234 walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
235 omp_discover_declare_target_tgt_fn_r,
236 data);
237 *walk_subtrees = 0;
239 else if (TYPE_P (*tp))
240 *walk_subtrees = 0;
241 return NULL_TREE;
244 /* Helper function for omp_discover_implicit_declare_target, called through
245 walk_tree. Mark referenced FUNCTION_DECLs implicitly as
246 declare target to. */
248 static tree
249 omp_discover_declare_target_var_r (tree *tp, int *walk_subtrees, void *data)
251 if (TREE_CODE (*tp) == FUNCTION_DECL)
252 return omp_discover_declare_target_tgt_fn_r (tp, walk_subtrees, data);
253 else if (VAR_P (*tp)
254 && is_global_var (*tp)
255 && !omp_declare_target_var_p (*tp))
257 tree id = get_identifier ("omp declare target");
258 if (lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp)))
260 error_at (DECL_SOURCE_LOCATION (*tp),
261 "%qD specified both in declare target %<link%> and "
262 "implicitly in %<to%> clauses", *tp);
263 DECL_ATTRIBUTES (*tp)
264 = remove_attribute ("omp declare target link", DECL_ATTRIBUTES (*tp));
266 if (TREE_STATIC (*tp) && DECL_INITIAL (*tp))
267 ((vec<tree> *) data)->safe_push (*tp);
268 DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp));
269 symtab_node *node = symtab_node::get (*tp);
270 if (node != NULL && !node->offloadable)
272 node->offloadable = 1;
273 if (ENABLE_OFFLOADING)
275 g->have_offload = true;
276 if (is_a <varpool_node *> (node))
277 vec_safe_push (offload_vars, node->decl);
281 else if (TYPE_P (*tp))
282 *walk_subtrees = 0;
283 return NULL_TREE;
286 /* Perform the OpenMP implicit declare target to discovery. */
288 void
289 omp_discover_implicit_declare_target (void)
291 cgraph_node *node;
292 varpool_node *vnode;
293 auto_vec<tree> worklist;
295 FOR_EACH_DEFINED_FUNCTION (node)
296 if (DECL_SAVED_TREE (node->decl))
298 if (omp_declare_target_fn_p (node->decl))
299 worklist.safe_push (node->decl);
300 else if (DECL_STRUCT_FUNCTION (node->decl)
301 && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
302 worklist.safe_push (node->decl);
304 FOR_EACH_STATIC_INITIALIZER (vnode)
305 if (omp_declare_target_var_p (vnode->decl))
306 worklist.safe_push (vnode->decl);
307 while (!worklist.is_empty ())
309 tree decl = worklist.pop ();
310 if (VAR_P (decl))
311 walk_tree_without_duplicates (&DECL_INITIAL (decl),
312 omp_discover_declare_target_var_r,
313 &worklist);
314 else if (omp_declare_target_fn_p (decl))
315 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
316 omp_discover_declare_target_tgt_fn_r,
317 &worklist);
318 else
319 walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
320 omp_discover_declare_target_fn_r,
321 &worklist);
326 /* Create new symbols containing (address, size) pairs for global variables,
327 marked with "omp declare target" attribute, as well as addresses for the
328 functions, which are outlined offloading regions. */
329 void
330 omp_finish_file (void)
332 unsigned num_funcs = vec_safe_length (offload_funcs);
333 unsigned num_vars = vec_safe_length (offload_vars);
335 if (num_funcs == 0 && num_vars == 0)
336 return;
338 if (targetm_common.have_named_sections)
340 vec<constructor_elt, va_gc> *v_f, *v_v;
341 vec_alloc (v_f, num_funcs);
342 vec_alloc (v_v, num_vars * 2);
344 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
345 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
347 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
348 vec_safe_length (v_v));
349 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
350 num_funcs);
351 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
352 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
353 tree ctor_v = build_constructor (vars_decl_type, v_v);
354 tree ctor_f = build_constructor (funcs_decl_type, v_f);
355 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
356 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
357 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
358 get_identifier (".offload_func_table"),
359 funcs_decl_type);
360 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
361 get_identifier (".offload_var_table"),
362 vars_decl_type);
363 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
364 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
365 otherwise a joint table in a binary will contain padding between
366 tables from multiple object files. */
367 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
368 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
369 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
370 DECL_INITIAL (funcs_decl) = ctor_f;
371 DECL_INITIAL (vars_decl) = ctor_v;
372 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
373 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
375 varpool_node::finalize_decl (vars_decl);
376 varpool_node::finalize_decl (funcs_decl);
378 else
380 for (unsigned i = 0; i < num_funcs; i++)
382 tree it = (*offload_funcs)[i];
383 /* See also add_decls_addresses_to_decl_constructor
384 and output_offload_tables in lto-cgraph.c. */
385 if (!in_lto_p && !symtab_node::get (it))
386 continue;
387 targetm.record_offload_symbol (it);
389 for (unsigned i = 0; i < num_vars; i++)
391 tree it = (*offload_vars)[i];
392 if (!in_lto_p && !symtab_node::get (it))
393 continue;
394 #ifdef ACCEL_COMPILER
395 if (DECL_HAS_VALUE_EXPR_P (it)
396 && lookup_attribute ("omp declare target link",
397 DECL_ATTRIBUTES (it)))
399 tree value_expr = DECL_VALUE_EXPR (it);
400 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
401 targetm.record_offload_symbol (link_ptr_decl);
402 varpool_node::finalize_decl (link_ptr_decl);
404 else
405 #endif
406 targetm.record_offload_symbol (it);
411 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
412 axis DIM. Return a tmp var holding the result. */
414 static tree
415 oacc_dim_call (bool pos, int dim, gimple_seq *seq)
417 tree arg = build_int_cst (unsigned_type_node, dim);
418 tree size = create_tmp_var (integer_type_node);
419 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
420 gimple *call = gimple_build_call_internal (fn, 1, arg);
422 gimple_call_set_lhs (call, size);
423 gimple_seq_add_stmt (seq, call);
425 return size;
428 /* Find the number of threads (POS = false), or thread number (POS =
429 true) for an OpenACC region partitioned as MASK. Setup code
430 required for the calculation is added to SEQ. */
432 static tree
433 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
435 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
436 unsigned ix;
438 /* Start at gang level, and examine relevant dimension indices. */
439 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
440 if (GOMP_DIM_MASK (ix) & mask)
442 if (res)
444 /* We had an outer index, so scale that by the size of
445 this dimension. */
446 tree n = oacc_dim_call (false, ix, seq);
447 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
449 if (pos)
451 /* Determine index in this dimension. */
452 tree id = oacc_dim_call (true, ix, seq);
453 if (res)
454 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
455 else
456 res = id;
460 if (res == NULL_TREE)
461 res = integer_zero_node;
463 return res;
466 /* Transform IFN_GOACC_LOOP calls to actual code. See
467 expand_oacc_for for where these are generated. At the vector
468 level, we stride loops, such that each member of a warp will
469 operate on adjacent iterations. At the worker and gang level,
470 each gang/warp executes a set of contiguous iterations. Chunking
471 can override this such that each iteration engine executes a
472 contiguous chunk, and then moves on to stride to the next chunk. */
474 static void
475 oacc_xform_loop (gcall *call)
477 gimple_stmt_iterator gsi = gsi_for_stmt (call);
478 enum ifn_goacc_loop_kind code
479 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
480 tree dir = gimple_call_arg (call, 1);
481 tree range = gimple_call_arg (call, 2);
482 tree step = gimple_call_arg (call, 3);
483 tree chunk_size = NULL_TREE;
484 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
485 tree lhs = gimple_call_lhs (call);
486 tree type = NULL_TREE;
487 tree diff_type = TREE_TYPE (range);
488 tree r = NULL_TREE;
489 gimple_seq seq = NULL;
490 bool chunking = false, striding = true;
491 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
492 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
494 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
495 if (!lhs)
497 gsi_replace_with_seq (&gsi, seq, true);
498 return;
501 type = TREE_TYPE (lhs);
503 #ifdef ACCEL_COMPILER
504 chunk_size = gimple_call_arg (call, 4);
505 if (integer_minus_onep (chunk_size) /* Force static allocation. */
506 || integer_zerop (chunk_size)) /* Default (also static). */
508 /* If we're at the gang level, we want each to execute a
509 contiguous run of iterations. Otherwise we want each element
510 to stride. */
511 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
512 chunking = false;
514 else
516 /* Chunk of size 1 is striding. */
517 striding = integer_onep (chunk_size);
518 chunking = !striding;
520 #endif
522 /* striding=true, chunking=true
523 -> invalid.
524 striding=true, chunking=false
525 -> chunks=1
526 striding=false,chunking=true
527 -> chunks=ceil (range/(chunksize*threads*step))
528 striding=false,chunking=false
529 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
530 push_gimplify_context (true);
532 switch (code)
534 default: gcc_unreachable ();
536 case IFN_GOACC_LOOP_CHUNKS:
537 if (!chunking)
538 r = build_int_cst (type, 1);
539 else
541 /* chunk_max
542 = (range - dir) / (chunks * step * num_threads) + dir */
543 tree per = oacc_thread_numbers (false, mask, &seq);
544 per = fold_convert (type, per);
545 chunk_size = fold_convert (type, chunk_size);
546 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
547 per = fold_build2 (MULT_EXPR, type, per, step);
548 r = build2 (MINUS_EXPR, type, range, dir);
549 r = build2 (PLUS_EXPR, type, r, per);
550 r = build2 (TRUNC_DIV_EXPR, type, r, per);
552 break;
554 case IFN_GOACC_LOOP_STEP:
556 /* If striding, step by the entire compute volume, otherwise
557 step by the inner volume. */
558 unsigned volume = striding ? mask : inner_mask;
560 r = oacc_thread_numbers (false, volume, &seq);
561 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
563 break;
565 case IFN_GOACC_LOOP_OFFSET:
566 /* Enable vectorization on non-SIMT targets. */
567 if (!targetm.simt.vf
568 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
569 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
570 the loop. */
571 && (flag_tree_loop_vectorize
572 || !global_options_set.x_flag_tree_loop_vectorize))
574 basic_block bb = gsi_bb (gsi);
575 class loop *parent = bb->loop_father;
576 class loop *body = parent->inner;
578 parent->force_vectorize = true;
579 parent->safelen = INT_MAX;
581 /* "Chunking loops" may have inner loops. */
582 if (parent->inner)
584 body->force_vectorize = true;
585 body->safelen = INT_MAX;
588 cfun->has_force_vectorize_loops = true;
590 if (striding)
592 r = oacc_thread_numbers (true, mask, &seq);
593 r = fold_convert (diff_type, r);
595 else
597 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
598 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
599 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
600 inner_size, outer_size);
602 volume = fold_convert (diff_type, volume);
603 if (chunking)
604 chunk_size = fold_convert (diff_type, chunk_size);
605 else
607 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
609 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
610 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
611 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
614 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
615 fold_convert (diff_type, inner_size));
616 r = oacc_thread_numbers (true, outer_mask, &seq);
617 r = fold_convert (diff_type, r);
618 r = build2 (MULT_EXPR, diff_type, r, span);
620 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
621 inner = fold_convert (diff_type, inner);
622 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
624 if (chunking)
626 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
627 tree per
628 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
629 per = build2 (MULT_EXPR, diff_type, per, chunk);
631 r = build2 (PLUS_EXPR, diff_type, r, per);
634 r = fold_build2 (MULT_EXPR, diff_type, r, step);
635 if (type != diff_type)
636 r = fold_convert (type, r);
637 break;
639 case IFN_GOACC_LOOP_BOUND:
640 if (striding)
641 r = range;
642 else
644 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
645 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
646 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
647 inner_size, outer_size);
649 volume = fold_convert (diff_type, volume);
650 if (chunking)
651 chunk_size = fold_convert (diff_type, chunk_size);
652 else
654 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
656 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
657 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
658 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
661 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
662 fold_convert (diff_type, inner_size));
664 r = fold_build2 (MULT_EXPR, diff_type, span, step);
666 tree offset = gimple_call_arg (call, 6);
667 r = build2 (PLUS_EXPR, diff_type, r,
668 fold_convert (diff_type, offset));
669 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
670 diff_type, r, range);
672 if (diff_type != type)
673 r = fold_convert (type, r);
674 break;
677 gimplify_assign (lhs, r, &seq);
679 pop_gimplify_context (NULL);
681 gsi_replace_with_seq (&gsi, seq, true);
684 /* Transform a GOACC_TILE call. Determines the element loop span for
685 the specified loop of the nest. This is 1 if we're not tiling.
687 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
689 static void
690 oacc_xform_tile (gcall *call)
692 gimple_stmt_iterator gsi = gsi_for_stmt (call);
693 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
694 /* Inner loops have higher loop_nos. */
695 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
696 tree tile_size = gimple_call_arg (call, 2);
697 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
698 tree lhs = gimple_call_lhs (call);
699 tree type = TREE_TYPE (lhs);
700 gimple_seq seq = NULL;
701 tree span = build_int_cst (type, 1);
703 gcc_assert (!(e_mask
704 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
705 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
706 push_gimplify_context (!seen_error ());
708 #ifndef ACCEL_COMPILER
709 /* Partitioning disabled on host compilers. */
710 e_mask = 0;
711 #endif
712 if (!e_mask)
713 /* Not paritioning. */
714 span = integer_one_node;
715 else if (!integer_zerop (tile_size))
716 /* User explicitly specified size. */
717 span = tile_size;
718 else
720 /* Pick a size based on the paritioning of the element loop and
721 the number of loop nests. */
722 tree first_size = NULL_TREE;
723 tree second_size = NULL_TREE;
725 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
726 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
727 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
728 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
730 if (!first_size)
732 first_size = second_size;
733 second_size = NULL_TREE;
736 if (loop_no + 1 == collapse)
738 span = first_size;
739 if (!loop_no && second_size)
740 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
741 span, second_size);
743 else if (loop_no + 2 == collapse)
744 span = second_size;
745 else
746 span = NULL_TREE;
748 if (!span)
749 /* There's no obvious element size for this loop. Options
750 are 1, first_size or some non-unity constant (32 is my
751 favourite). We should gather some statistics. */
752 span = first_size;
755 span = fold_convert (type, span);
756 gimplify_assign (lhs, span, &seq);
758 pop_gimplify_context (NULL);
760 gsi_replace_with_seq (&gsi, seq, true);
763 /* Default partitioned and minimum partitioned dimensions. */
765 static int oacc_default_dims[GOMP_DIM_MAX];
766 static int oacc_min_dims[GOMP_DIM_MAX];
769 oacc_get_default_dim (int dim)
771 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
772 return oacc_default_dims[dim];
776 oacc_get_min_dim (int dim)
778 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
779 return oacc_min_dims[dim];
782 /* Parse the default dimension parameter. This is a set of
783 :-separated optional compute dimensions. Each specified dimension
784 is a positive integer. When device type support is added, it is
785 planned to be a comma separated list of such compute dimensions,
786 with all but the first prefixed by the colon-terminated device
787 type. */
789 static void
790 oacc_parse_default_dims (const char *dims)
792 int ix;
794 for (ix = GOMP_DIM_MAX; ix--;)
796 oacc_default_dims[ix] = -1;
797 oacc_min_dims[ix] = 1;
800 #ifndef ACCEL_COMPILER
801 /* Cannot be overridden on the host. */
802 dims = NULL;
803 #endif
804 if (dims)
806 const char *pos = dims;
808 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
810 if (ix)
812 if (*pos != ':')
813 goto malformed;
814 pos++;
817 if (*pos != ':')
819 long val;
820 const char *eptr;
822 errno = 0;
823 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
824 if (errno || val <= 0 || (int) val != val)
825 goto malformed;
826 pos = eptr;
827 oacc_default_dims[ix] = (int) val;
830 if (*pos)
832 malformed:
833 error_at (UNKNOWN_LOCATION,
834 "%<-fopenacc-dim%> operand is malformed at %qs", pos);
838 /* Allow the backend to validate the dimensions. */
839 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
840 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
843 /* Validate and update the dimensions for offloaded FN. ATTRS is the
844 raw attribute. DIMS is an array of dimensions, which is filled in.
845 LEVEL is the partitioning level of a routine, or -1 for an offload
846 region itself. USED is the mask of partitioned execution in the
847 function. */
849 static void
850 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
852 tree purpose[GOMP_DIM_MAX];
853 unsigned ix;
854 tree pos = TREE_VALUE (attrs);
856 /* Make sure the attribute creator attached the dimension
857 information. */
858 gcc_assert (pos);
860 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
862 purpose[ix] = TREE_PURPOSE (pos);
863 tree val = TREE_VALUE (pos);
864 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
865 pos = TREE_CHAIN (pos);
868 bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
870 /* Default anything left to 1 or a partitioned default. */
871 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
872 if (dims[ix] < 0)
874 /* The OpenACC spec says 'If the [num_gangs] clause is not
875 specified, an implementation-defined default will be used;
876 the default may depend on the code within the construct.'
877 (2.5.6). Thus an implementation is free to choose
878 non-unity default for a parallel region that doesn't have
879 any gang-partitioned loops. However, it appears that there
880 is a sufficient body of user code that expects non-gang
881 partitioned regions to not execute in gang-redundant mode.
882 So we (a) don't warn about the non-portability and (b) pick
883 the minimum permissible dimension size when there is no
884 partitioned execution. Otherwise we pick the global
885 default for the dimension, which the user can control. The
886 same wording and logic applies to num_workers and
887 vector_length, however the worker- or vector- single
888 execution doesn't have the same impact as gang-redundant
889 execution. (If the minimum gang-level partioning is not 1,
890 the target is probably too confusing.) */
891 dims[ix] = (used & GOMP_DIM_MASK (ix)
892 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
893 changed = true;
896 if (changed)
898 /* Replace the attribute with new values. */
899 pos = NULL_TREE;
900 for (ix = GOMP_DIM_MAX; ix--;)
901 pos = tree_cons (purpose[ix],
902 build_int_cst (integer_type_node, dims[ix]), pos);
903 oacc_replace_fn_attrib (fn, pos);
907 /* Create an empty OpenACC loop structure at LOC. */
909 static oacc_loop *
910 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
912 oacc_loop *loop = XCNEW (oacc_loop);
914 loop->parent = parent;
916 if (parent)
918 loop->sibling = parent->child;
919 parent->child = loop;
922 loop->loc = loc;
923 return loop;
926 /* Create an outermost, dummy OpenACC loop for offloaded function
927 DECL. */
929 static oacc_loop *
930 new_oacc_loop_outer (tree decl)
932 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
935 /* Start a new OpenACC loop structure beginning at head marker HEAD.
936 Link into PARENT loop. Return the new loop. */
938 static oacc_loop *
939 new_oacc_loop (oacc_loop *parent, gcall *marker)
941 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
943 loop->marker = marker;
945 /* TODO: This is where device_type flattening would occur for the loop
946 flags. */
948 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
950 tree chunk_size = integer_zero_node;
951 if (loop->flags & OLF_GANG_STATIC)
952 chunk_size = gimple_call_arg (marker, 4);
953 loop->chunk_size = chunk_size;
955 return loop;
958 /* Create a dummy loop encompassing a call to a openACC routine.
959 Extract the routine's partitioning requirements. */
961 static void
962 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
964 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
965 int level = oacc_fn_attrib_level (attrs);
967 gcc_assert (level >= 0);
969 loop->marker = call;
970 loop->routine = decl;
971 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
972 ^ (GOMP_DIM_MASK (level) - 1));
975 /* Finish off the current OpenACC loop ending at tail marker TAIL.
976 Return the parent loop. */
978 static oacc_loop *
979 finish_oacc_loop (oacc_loop *loop)
981 /* If the loop has been collapsed, don't partition it. */
982 if (loop->ifns.is_empty ())
983 loop->mask = loop->flags = 0;
984 return loop->parent;
987 /* Free all OpenACC loop structures within LOOP (inclusive). */
989 static void
990 free_oacc_loop (oacc_loop *loop)
992 if (loop->sibling)
993 free_oacc_loop (loop->sibling);
994 if (loop->child)
995 free_oacc_loop (loop->child);
997 loop->ifns.release ();
998 free (loop);
1001 /* Dump out the OpenACC loop head or tail beginning at FROM. */
1003 static void
1004 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
1005 const char *title, int level)
1007 enum ifn_unique_kind kind
1008 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1010 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
1011 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1013 gimple *stmt = gsi_stmt (gsi);
1015 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1017 enum ifn_unique_kind k
1018 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
1019 (gimple_call_arg (stmt, 0)));
1021 if (k == kind && stmt != from)
1022 break;
1024 print_gimple_stmt (file, stmt, depth * 2 + 2);
1026 gsi_next (&gsi);
1027 while (gsi_end_p (gsi))
1028 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1032 /* Dump OpenACC loop LOOP, its children, and its siblings. */
1034 static void
1035 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
1037 int ix;
1039 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
1040 loop->flags, loop->mask,
1041 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
1043 if (loop->marker)
1044 print_gimple_stmt (file, loop->marker, depth * 2);
1046 if (loop->routine)
1047 fprintf (file, "%*sRoutine %s:%u:%s\n",
1048 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
1049 DECL_SOURCE_LINE (loop->routine),
1050 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
1052 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
1053 if (loop->heads[ix])
1054 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
1055 for (ix = GOMP_DIM_MAX; ix--;)
1056 if (loop->tails[ix])
1057 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
1059 if (loop->child)
1060 dump_oacc_loop (file, loop->child, depth + 1);
1061 if (loop->sibling)
1062 dump_oacc_loop (file, loop->sibling, depth);
1065 void debug_oacc_loop (oacc_loop *);
1067 /* Dump loops to stderr. */
1069 DEBUG_FUNCTION void
1070 debug_oacc_loop (oacc_loop *loop)
1072 dump_oacc_loop (stderr, loop, 0);
1075 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
1076 siblings. */
1078 static void
1079 inform_oacc_loop (const oacc_loop *loop)
1081 const char *gang
1082 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
1083 const char *worker
1084 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
1085 const char *vector
1086 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
1087 const char *seq = loop->mask == 0 ? " seq" : "";
1088 const dump_user_location_t loc
1089 = dump_user_location_t::from_location_t (loop->loc);
1090 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1091 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
1092 vector, seq);
1094 if (loop->child)
1095 inform_oacc_loop (loop->child);
1096 if (loop->sibling)
1097 inform_oacc_loop (loop->sibling);
1100 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
1101 structures as we go. By construction these loops are properly
1102 nested. */
1104 static void
1105 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
1107 int marker = 0;
1108 int remaining = 0;
1110 if (bb->flags & BB_VISITED)
1111 return;
1113 follow:
1114 bb->flags |= BB_VISITED;
1116 /* Scan for loop markers. */
1117 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1118 gsi_next (&gsi))
1120 gimple *stmt = gsi_stmt (gsi);
1122 if (!is_gimple_call (stmt))
1123 continue;
1125 gcall *call = as_a <gcall *> (stmt);
1127 /* If this is a routine, make a dummy loop for it. */
1128 if (tree decl = gimple_call_fndecl (call))
1129 if (tree attrs = oacc_get_fn_attrib (decl))
1131 gcc_assert (!marker);
1132 new_oacc_loop_routine (loop, call, decl, attrs);
1135 if (!gimple_call_internal_p (call))
1136 continue;
1138 switch (gimple_call_internal_fn (call))
1140 default:
1141 break;
1143 case IFN_GOACC_LOOP:
1144 case IFN_GOACC_TILE:
1145 /* Record the abstraction function, so we can manipulate it
1146 later. */
1147 loop->ifns.safe_push (call);
1148 break;
1150 case IFN_UNIQUE:
1151 enum ifn_unique_kind kind
1152 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
1153 (gimple_call_arg (call, 0)));
1154 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
1155 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
1157 if (gimple_call_num_args (call) == 2)
1159 gcc_assert (marker && !remaining);
1160 marker = 0;
1161 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
1162 loop = finish_oacc_loop (loop);
1163 else
1164 loop->head_end = call;
1166 else
1168 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
1170 if (!marker)
1172 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1173 loop = new_oacc_loop (loop, call);
1174 remaining = count;
1176 gcc_assert (count == remaining);
1177 if (remaining)
1179 remaining--;
1180 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
1181 loop->heads[marker] = call;
1182 else
1183 loop->tails[remaining] = call;
1185 marker++;
1190 if (remaining || marker)
1192 bb = single_succ (bb);
1193 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
1194 goto follow;
1197 /* Walk successor blocks. */
1198 edge e;
1199 edge_iterator ei;
1201 FOR_EACH_EDGE (e, ei, bb->succs)
1202 oacc_loop_discover_walk (loop, e->dest);
1205 /* LOOP is the first sibling. Reverse the order in place and return
1206 the new first sibling. Recurse to child loops. */
1208 static oacc_loop *
1209 oacc_loop_sibling_nreverse (oacc_loop *loop)
1211 oacc_loop *last = NULL;
1214 if (loop->child)
1215 loop->child = oacc_loop_sibling_nreverse (loop->child);
1217 oacc_loop *next = loop->sibling;
1218 loop->sibling = last;
1219 last = loop;
1220 loop = next;
1222 while (loop);
1224 return last;
1227 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1228 the current function. */
1230 static oacc_loop *
1231 oacc_loop_discovery ()
1233 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1234 in the following. */
1235 clear_bb_flags ();
1237 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1238 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1240 /* The siblings were constructed in reverse order, reverse them so
1241 that diagnostics come out in an unsurprising order. */
1242 top = oacc_loop_sibling_nreverse (top);
1244 return top;
1247 /* Transform the abstract internal function markers starting at FROM
1248 to be for partitioning level LEVEL. Stop when we meet another HEAD
1249 or TAIL marker. */
1251 static void
1252 oacc_loop_xform_head_tail (gcall *from, int level)
1254 enum ifn_unique_kind kind
1255 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1256 tree replacement = build_int_cst (unsigned_type_node, level);
1258 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1260 gimple *stmt = gsi_stmt (gsi);
1262 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1264 enum ifn_unique_kind k
1265 = ((enum ifn_unique_kind)
1266 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1268 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1269 *gimple_call_arg_ptr (stmt, 2) = replacement;
1270 else if (k == kind && stmt != from)
1271 break;
1273 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1274 *gimple_call_arg_ptr (stmt, 3) = replacement;
1276 gsi_next (&gsi);
1277 while (gsi_end_p (gsi))
1278 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1282 /* Process the discovered OpenACC loops, setting the correct
1283 partitioning level etc. */
1285 static void
1286 oacc_loop_process (oacc_loop *loop)
1288 if (loop->child)
1289 oacc_loop_process (loop->child);
1291 if (loop->mask && !loop->routine)
1293 int ix;
1294 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1295 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
1296 tree chunk_arg = loop->chunk_size;
1297 gcall *call;
1299 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1300 switch (gimple_call_internal_fn (call))
1302 case IFN_GOACC_LOOP:
1304 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1305 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1306 if (!is_e)
1307 gimple_call_set_arg (call, 4, chunk_arg);
1309 break;
1311 case IFN_GOACC_TILE:
1312 gimple_call_set_arg (call, 3, mask_arg);
1313 gimple_call_set_arg (call, 4, e_mask_arg);
1314 break;
1316 default:
1317 gcc_unreachable ();
1320 unsigned dim = GOMP_DIM_GANG;
1321 unsigned mask = loop->mask | loop->e_mask;
1322 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1324 while (!(GOMP_DIM_MASK (dim) & mask))
1325 dim++;
1327 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1328 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1330 mask ^= GOMP_DIM_MASK (dim);
1334 if (loop->sibling)
1335 oacc_loop_process (loop->sibling);
1338 /* Walk the OpenACC loop heirarchy checking and assigning the
1339 programmer-specified partitionings. OUTER_MASK is the partitioning
1340 this loop is contained within. Return mask of partitioning
1341 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1342 bit. */
1344 static unsigned
1345 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1347 unsigned this_mask = loop->mask;
1348 unsigned mask_all = 0;
1349 bool noisy = true;
1351 #ifdef ACCEL_COMPILER
1352 /* When device_type is supported, we want the device compiler to be
1353 noisy, if the loop parameters are device_type-specific. */
1354 noisy = false;
1355 #endif
1357 if (!loop->routine)
1359 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1360 bool seq_par = (loop->flags & OLF_SEQ) != 0;
1361 bool tiling = (loop->flags & OLF_TILE) != 0;
1363 this_mask = ((loop->flags >> OLF_DIM_BASE)
1364 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1366 /* Apply auto partitioning if this is a non-partitioned regular
1367 loop, or (no more than) single axis tiled loop. */
1368 bool maybe_auto
1369 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1371 if ((this_mask != 0) + auto_par + seq_par > 1)
1373 if (noisy)
1374 error_at (loop->loc,
1375 seq_par
1376 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1377 : G_("%<auto%> conflicts with other OpenACC loop "
1378 "specifiers"));
1379 maybe_auto = false;
1380 loop->flags &= ~OLF_AUTO;
1381 if (seq_par)
1383 loop->flags
1384 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1385 this_mask = 0;
1389 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1391 loop->flags |= OLF_AUTO;
1392 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1396 if (this_mask & outer_mask)
1398 const oacc_loop *outer;
1399 for (outer = loop->parent; outer; outer = outer->parent)
1400 if ((outer->mask | outer->e_mask) & this_mask)
1401 break;
1403 if (noisy)
1405 if (outer)
1407 error_at (loop->loc,
1408 loop->routine
1409 ? G_("routine call uses same OpenACC parallelism"
1410 " as containing loop")
1411 : G_("inner loop uses same OpenACC parallelism"
1412 " as containing loop"));
1413 inform (outer->loc, "containing loop here");
1415 else
1416 error_at (loop->loc,
1417 loop->routine
1418 ? G_("routine call uses OpenACC parallelism disallowed"
1419 " by containing routine")
1420 : G_("loop uses OpenACC parallelism disallowed"
1421 " by containing routine"));
1423 if (loop->routine)
1424 inform (DECL_SOURCE_LOCATION (loop->routine),
1425 "routine %qD declared here", loop->routine);
1427 this_mask &= ~outer_mask;
1429 else
1431 unsigned outermost = least_bit_hwi (this_mask);
1433 if (outermost && outermost <= outer_mask)
1435 if (noisy)
1437 error_at (loop->loc,
1438 "incorrectly nested OpenACC loop parallelism");
1440 const oacc_loop *outer;
1441 for (outer = loop->parent;
1442 outer->flags && outer->flags < outermost;
1443 outer = outer->parent)
1444 continue;
1445 inform (outer->loc, "containing loop here");
1448 this_mask &= ~outermost;
1452 mask_all |= this_mask;
1454 if (loop->flags & OLF_TILE)
1456 /* When tiling, vector goes to the element loop, and failing
1457 that we put worker there. The std doesn't contemplate
1458 specifying all three. We choose to put worker and vector on
1459 the element loops in that case. */
1460 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1461 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1462 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1464 loop->e_mask = this_e_mask;
1465 this_mask ^= this_e_mask;
1468 loop->mask = this_mask;
1470 if (dump_file)
1471 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1472 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1473 loop->mask, loop->e_mask);
1475 if (loop->child)
1477 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1478 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
1479 mask_all |= loop->inner;
1482 if (loop->sibling)
1483 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1485 return mask_all;
1488 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1489 OUTER_MASK is the partitioning this loop is contained within.
1490 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1491 Return the cumulative partitioning used by this loop, siblings and
1492 children. */
1494 static unsigned
1495 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1496 bool outer_assign)
1498 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1499 bool noisy = true;
1500 bool tiling = loop->flags & OLF_TILE;
1502 #ifdef ACCEL_COMPILER
1503 /* When device_type is supported, we want the device compiler to be
1504 noisy, if the loop parameters are device_type-specific. */
1505 noisy = false;
1506 #endif
1508 if (assign && (!outer_assign || loop->inner))
1510 /* Allocate outermost and non-innermost loops at the outermost
1511 non-innermost available level. */
1512 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1514 /* Find the first outermost available partition. */
1515 while (this_mask <= outer_mask)
1516 this_mask <<= 1;
1518 /* Grab two axes if tiling, and we've not assigned anything */
1519 if (tiling && !(loop->mask | loop->e_mask))
1520 this_mask |= this_mask << 1;
1522 /* Prohibit the innermost partitioning at the moment. */
1523 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
1525 /* Don't use any dimension explicitly claimed by an inner loop. */
1526 this_mask &= ~loop->inner;
1528 if (tiling && !loop->e_mask)
1530 /* If we got two axes, allocate the inner one to the element
1531 loop. */
1532 loop->e_mask = this_mask & (this_mask << 1);
1533 this_mask ^= loop->e_mask;
1536 loop->mask |= this_mask;
1539 if (loop->child)
1541 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1542 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1543 outer_assign | assign);
1546 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
1548 /* Allocate the loop at the innermost available level. Note
1549 that we do this even if we already assigned this loop the
1550 outermost available level above. That way we'll partition
1551 this along 2 axes, if they are available. */
1552 unsigned this_mask = 0;
1554 /* Determine the outermost partitioning used within this loop. */
1555 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1556 this_mask = least_bit_hwi (this_mask);
1558 /* Pick the partitioning just inside that one. */
1559 this_mask >>= 1;
1561 /* And avoid picking one use by an outer loop. */
1562 this_mask &= ~outer_mask;
1564 /* If tiling and we failed completely above, grab the next one
1565 too. Making sure it doesn't hit an outer loop. */
1566 if (tiling)
1568 this_mask &= ~(loop->e_mask | loop->mask);
1569 unsigned tile_mask = ((this_mask >> 1)
1570 & ~(outer_mask | loop->e_mask | loop->mask));
1572 if (tile_mask || loop->mask)
1574 loop->e_mask |= this_mask;
1575 this_mask = tile_mask;
1577 if (!loop->e_mask && noisy)
1578 warning_at (loop->loc, 0,
1579 "insufficient partitioning available"
1580 " to parallelize element loop");
1583 loop->mask |= this_mask;
1584 if (!loop->mask && noisy)
1585 warning_at (loop->loc, 0,
1586 tiling
1587 ? G_("insufficient partitioning available"
1588 " to parallelize tile loop")
1589 : G_("insufficient partitioning available"
1590 " to parallelize loop"));
1593 if (assign && dump_file)
1594 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
1595 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1596 loop->mask, loop->e_mask);
1598 unsigned inner_mask = 0;
1600 if (loop->sibling)
1601 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1602 outer_mask, outer_assign);
1604 inner_mask |= loop->inner | loop->mask | loop->e_mask;
1606 return inner_mask;
1609 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1610 axes. Return mask of partitioning. */
1612 static unsigned
1613 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1615 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1617 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1619 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1620 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
1622 return mask_all;
1625 /* Default fork/join early expander. Delete the function calls if
1626 there is no RTL expander. */
1628 bool
1629 default_goacc_fork_join (gcall *ARG_UNUSED (call),
1630 const int *ARG_UNUSED (dims), bool is_fork)
1632 if (is_fork)
1633 return targetm.have_oacc_fork ();
1634 else
1635 return targetm.have_oacc_join ();
1638 /* Default goacc.reduction early expander.
1640 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1641 If RES_PTR is not integer-zerop:
1642 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1643 TEARDOWN - emit '*RES_PTR = VAR'
1644 If LHS is not NULL
1645 emit 'LHS = VAR' */
1647 void
1648 default_goacc_reduction (gcall *call)
1650 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1651 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1652 tree lhs = gimple_call_lhs (call);
1653 tree var = gimple_call_arg (call, 2);
1654 gimple_seq seq = NULL;
1656 if (code == IFN_GOACC_REDUCTION_SETUP
1657 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1659 /* Setup and Teardown need to copy from/to the receiver object,
1660 if there is one. */
1661 tree ref_to_res = gimple_call_arg (call, 1);
1663 if (!integer_zerop (ref_to_res))
1665 tree dst = build_simple_mem_ref (ref_to_res);
1666 tree src = var;
1668 if (code == IFN_GOACC_REDUCTION_SETUP)
1670 src = dst;
1671 dst = lhs;
1672 lhs = NULL;
1674 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1678 /* Copy VAR to LHS, if there is an LHS. */
1679 if (lhs)
1680 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1682 gsi_replace_with_seq (&gsi, seq, true);
1685 /* Main entry point for oacc transformations which run on the device
1686 compiler after LTO, so we know what the target device is at this
1687 point (including the host fallback). */
1689 static unsigned int
1690 execute_oacc_device_lower ()
1692 tree attrs = oacc_get_fn_attrib (current_function_decl);
1694 if (!attrs)
1695 /* Not an offloaded function. */
1696 return 0;
1698 /* Parse the default dim argument exactly once. */
1699 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1701 oacc_parse_default_dims (flag_openacc_dims);
1702 flag_openacc_dims = (char *)&flag_openacc_dims;
1705 bool is_oacc_kernels
1706 = (lookup_attribute ("oacc kernels",
1707 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1708 bool is_oacc_kernels_parallelized
1709 = (lookup_attribute ("oacc kernels parallelized",
1710 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1712 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1713 kernels, so remove the parallelism dimensions function attributes
1714 potentially set earlier on. */
1715 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
1717 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
1718 attrs = oacc_get_fn_attrib (current_function_decl);
1721 /* Discover, partition and process the loops. */
1722 oacc_loop *loops = oacc_loop_discovery ();
1723 int fn_level = oacc_fn_attrib_level (attrs);
1725 if (dump_file)
1727 if (fn_level >= 0)
1728 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1729 fn_level);
1730 else if (is_oacc_kernels)
1731 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1732 (is_oacc_kernels_parallelized
1733 ? "parallelized" : "unparallelized"));
1734 else
1735 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1738 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1739 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
1740 /* OpenACC kernels constructs are special: they currently don't use the
1741 generic oacc_loop infrastructure and attribute/dimension processing. */
1742 if (is_oacc_kernels && is_oacc_kernels_parallelized)
1744 /* Parallelized OpenACC kernels constructs use gang parallelism. See
1745 also tree-parloops.c:create_parallel_loop. */
1746 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
1749 int dims[GOMP_DIM_MAX];
1750 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1752 if (dump_file)
1754 const char *comma = "Compute dimensions [";
1755 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1756 fprintf (dump_file, "%s%d", comma, dims[ix]);
1757 fprintf (dump_file, "]\n");
1760 oacc_loop_process (loops);
1761 if (dump_file)
1763 fprintf (dump_file, "OpenACC loops\n");
1764 dump_oacc_loop (dump_file, loops, 0);
1765 fprintf (dump_file, "\n");
1767 if (dump_enabled_p ())
1769 oacc_loop *l = loops;
1770 /* OpenACC kernels constructs are special: they currently don't use the
1771 generic oacc_loop infrastructure. */
1772 if (is_oacc_kernels)
1774 /* Create a fake oacc_loop for diagnostic purposes. */
1775 l = new_oacc_loop_raw (NULL,
1776 DECL_SOURCE_LOCATION (current_function_decl));
1777 l->mask = used_mask;
1779 else
1781 /* Skip the outermost, dummy OpenACC loop */
1782 l = l->child;
1784 if (l)
1785 inform_oacc_loop (l);
1786 if (is_oacc_kernels)
1787 free_oacc_loop (l);
1790 /* Offloaded targets may introduce new basic blocks, which require
1791 dominance information to update SSA. */
1792 calculate_dominance_info (CDI_DOMINATORS);
1794 /* Now lower internal loop functions to target-specific code
1795 sequences. */
1796 basic_block bb;
1797 FOR_ALL_BB_FN (bb, cfun)
1798 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1800 gimple *stmt = gsi_stmt (gsi);
1801 if (!is_gimple_call (stmt))
1803 gsi_next (&gsi);
1804 continue;
1807 gcall *call = as_a <gcall *> (stmt);
1808 if (!gimple_call_internal_p (call))
1810 gsi_next (&gsi);
1811 continue;
1814 /* Rewind to allow rescan. */
1815 gsi_prev (&gsi);
1816 bool rescan = false, remove = false;
1817 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1819 switch (ifn_code)
1821 default: break;
1823 case IFN_GOACC_TILE:
1824 oacc_xform_tile (call);
1825 rescan = true;
1826 break;
1828 case IFN_GOACC_LOOP:
1829 oacc_xform_loop (call);
1830 rescan = true;
1831 break;
1833 case IFN_GOACC_REDUCTION:
1834 /* Mark the function for SSA renaming. */
1835 mark_virtual_operands_for_renaming (cfun);
1837 /* If the level is -1, this ended up being an unused
1838 axis. Handle as a default. */
1839 if (integer_minus_onep (gimple_call_arg (call, 3)))
1840 default_goacc_reduction (call);
1841 else
1842 targetm.goacc.reduction (call);
1843 rescan = true;
1844 break;
1846 case IFN_UNIQUE:
1848 enum ifn_unique_kind kind
1849 = ((enum ifn_unique_kind)
1850 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1852 switch (kind)
1854 default:
1855 break;
1857 case IFN_UNIQUE_OACC_FORK:
1858 case IFN_UNIQUE_OACC_JOIN:
1859 if (integer_minus_onep (gimple_call_arg (call, 2)))
1860 remove = true;
1861 else if (!targetm.goacc.fork_join
1862 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1863 remove = true;
1864 break;
1866 case IFN_UNIQUE_OACC_HEAD_MARK:
1867 case IFN_UNIQUE_OACC_TAIL_MARK:
1868 remove = true;
1869 break;
1871 break;
1875 if (gsi_end_p (gsi))
1876 /* We rewound past the beginning of the BB. */
1877 gsi = gsi_start_bb (bb);
1878 else
1879 /* Undo the rewind. */
1880 gsi_next (&gsi);
1882 if (remove)
1884 if (gimple_vdef (call))
1885 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1886 if (gimple_call_lhs (call))
1888 /* Propagate the data dependency var. */
1889 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1890 gimple_call_arg (call, 1));
1891 gsi_replace (&gsi, ass, false);
1893 else
1894 gsi_remove (&gsi, true);
1896 else if (!rescan)
1897 /* If not rescanning, advance over the call. */
1898 gsi_next (&gsi);
1901 free_oacc_loop (loops);
1903 return 0;
1906 /* Default launch dimension validator. Force everything to 1. A
1907 backend that wants to provide larger dimensions must override this
1908 hook. */
1910 bool
1911 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
1912 int ARG_UNUSED (fn_level),
1913 unsigned ARG_UNUSED (used))
1915 bool changed = false;
1917 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1919 if (dims[ix] != 1)
1921 dims[ix] = 1;
1922 changed = true;
1926 return changed;
1929 /* Default dimension bound is unknown on accelerator and 1 on host. */
1932 default_goacc_dim_limit (int ARG_UNUSED (axis))
1934 #ifdef ACCEL_COMPILER
1935 return 0;
1936 #else
1937 return 1;
1938 #endif
1941 namespace {
1943 const pass_data pass_data_oacc_device_lower =
1945 GIMPLE_PASS, /* type */
1946 "oaccdevlow", /* name */
1947 OPTGROUP_OMP, /* optinfo_flags */
1948 TV_NONE, /* tv_id */
1949 PROP_cfg, /* properties_required */
1950 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1951 0, /* properties_destroyed */
1952 0, /* todo_flags_start */
1953 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1956 class pass_oacc_device_lower : public gimple_opt_pass
1958 public:
1959 pass_oacc_device_lower (gcc::context *ctxt)
1960 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1963 /* opt_pass methods: */
1964 virtual bool gate (function *) { return flag_openacc; };
1966 virtual unsigned int execute (function *)
1968 return execute_oacc_device_lower ();
1971 }; // class pass_oacc_device_lower
1973 } // anon namespace
1975 gimple_opt_pass *
1976 make_pass_oacc_device_lower (gcc::context *ctxt)
1978 return new pass_oacc_device_lower (ctxt);
1982 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
1983 GOMP_SIMT_ENTER call identifying the privatized variables, which are
1984 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
1985 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
1987 static void
1988 ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
1990 gimple *alloc_stmt = gsi_stmt (*gsi);
1991 tree simtrec = gimple_call_lhs (alloc_stmt);
1992 tree simduid = gimple_call_arg (alloc_stmt, 0);
1993 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
1994 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
1995 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
1996 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
1997 TREE_ADDRESSABLE (rectype) = 1;
1998 TREE_TYPE (simtrec) = build_pointer_type (rectype);
1999 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
2001 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
2002 if (*argp == null_pointer_node)
2003 continue;
2004 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
2005 && VAR_P (TREE_OPERAND (*argp, 0)));
2006 tree var = TREE_OPERAND (*argp, 0);
2008 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
2009 DECL_NAME (var), TREE_TYPE (var));
2010 SET_DECL_ALIGN (field, DECL_ALIGN (var));
2011 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
2012 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
2014 insert_field_into_struct (rectype, field);
2016 tree t = build_simple_mem_ref (simtrec);
2017 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
2018 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
2019 SET_DECL_VALUE_EXPR (var, t);
2020 DECL_HAS_VALUE_EXPR_P (var) = 1;
2021 *regimplify = true;
2023 layout_type (rectype);
2024 tree size = TYPE_SIZE_UNIT (rectype);
2025 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
2027 alloc_stmt
2028 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
2029 gimple_call_set_lhs (alloc_stmt, simtrec);
2030 gsi_replace (gsi, alloc_stmt, false);
2031 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
2032 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
2033 gsi_replace (&enter_gsi, enter_stmt, false);
2035 use_operand_p use;
2036 gimple *exit_stmt;
2037 if (single_imm_use (simtrec, &use, &exit_stmt))
2039 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
2040 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
2041 tree clobber = build_clobber (rectype);
2042 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
2043 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
2045 else
2046 gcc_checking_assert (has_zero_uses (simtrec));
2049 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
2051 static tree
2052 find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
2054 tree t = *tp;
2056 if (VAR_P (t)
2057 && DECL_HAS_VALUE_EXPR_P (t)
2058 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
2060 *walk_subtrees = 0;
2061 return t;
2063 return NULL_TREE;
2066 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
2067 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
2068 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
2069 internal functions on non-SIMT targets, and likewise some SIMD internal
2070 functions on SIMT targets. */
2072 static unsigned int
2073 execute_omp_device_lower ()
2075 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
2076 bool regimplify = false;
2077 basic_block bb;
2078 gimple_stmt_iterator gsi;
2079 bool calls_declare_variant_alt
2080 = cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
2081 FOR_EACH_BB_FN (bb, cfun)
2082 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2084 gimple *stmt = gsi_stmt (gsi);
2085 if (!is_gimple_call (stmt))
2086 continue;
2087 if (!gimple_call_internal_p (stmt))
2089 if (calls_declare_variant_alt)
2090 if (tree fndecl = gimple_call_fndecl (stmt))
2092 tree new_fndecl = omp_resolve_declare_variant (fndecl);
2093 if (new_fndecl != fndecl)
2095 gimple_call_set_fndecl (stmt, new_fndecl);
2096 update_stmt (stmt);
2099 continue;
2101 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
2102 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
2103 switch (gimple_call_internal_fn (stmt))
2105 case IFN_GOMP_USE_SIMT:
2106 rhs = vf == 1 ? integer_zero_node : integer_one_node;
2107 break;
2108 case IFN_GOMP_SIMT_ENTER:
2109 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2110 goto simtreg_enter_exit;
2111 case IFN_GOMP_SIMT_ENTER_ALLOC:
2112 if (vf != 1)
2113 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
2114 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
2115 goto simtreg_enter_exit;
2116 case IFN_GOMP_SIMT_EXIT:
2117 simtreg_enter_exit:
2118 if (vf != 1)
2119 continue;
2120 unlink_stmt_vdef (stmt);
2121 break;
2122 case IFN_GOMP_SIMT_LANE:
2123 case IFN_GOMP_SIMT_LAST_LANE:
2124 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
2125 break;
2126 case IFN_GOMP_SIMT_VF:
2127 rhs = build_int_cst (type, vf);
2128 break;
2129 case IFN_GOMP_SIMT_ORDERED_PRED:
2130 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
2131 if (rhs || !lhs)
2132 unlink_stmt_vdef (stmt);
2133 break;
2134 case IFN_GOMP_SIMT_VOTE_ANY:
2135 case IFN_GOMP_SIMT_XCHG_BFLY:
2136 case IFN_GOMP_SIMT_XCHG_IDX:
2137 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
2138 break;
2139 case IFN_GOMP_SIMD_LANE:
2140 case IFN_GOMP_SIMD_LAST_LANE:
2141 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
2142 break;
2143 case IFN_GOMP_SIMD_VF:
2144 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
2145 break;
2146 default:
2147 continue;
2149 if (lhs && !rhs)
2150 continue;
2151 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
2152 gsi_replace (&gsi, stmt, false);
2154 if (regimplify)
2155 FOR_EACH_BB_REVERSE_FN (bb, cfun)
2156 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
2157 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
2159 if (gimple_clobber_p (gsi_stmt (gsi)))
2160 gsi_remove (&gsi, true);
2161 else
2162 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2164 if (vf != 1)
2165 cfun->has_force_vectorize_loops = false;
2166 return 0;
2169 namespace {
2171 const pass_data pass_data_omp_device_lower =
2173 GIMPLE_PASS, /* type */
2174 "ompdevlow", /* name */
2175 OPTGROUP_OMP, /* optinfo_flags */
2176 TV_NONE, /* tv_id */
2177 PROP_cfg, /* properties_required */
2178 PROP_gimple_lomp_dev, /* properties_provided */
2179 0, /* properties_destroyed */
2180 0, /* todo_flags_start */
2181 TODO_update_ssa, /* todo_flags_finish */
2184 class pass_omp_device_lower : public gimple_opt_pass
2186 public:
2187 pass_omp_device_lower (gcc::context *ctxt)
2188 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
2191 /* opt_pass methods: */
2192 virtual bool gate (function *fun)
2194 return (!(fun->curr_properties & PROP_gimple_lomp_dev)
2195 || (flag_openmp
2196 && cgraph_node::get (fun->decl)->calls_declare_variant_alt));
2198 virtual unsigned int execute (function *)
2200 return execute_omp_device_lower ();
2203 }; // class pass_expand_omp_ssa
2205 } // anon namespace
2207 gimple_opt_pass *
2208 make_pass_omp_device_lower (gcc::context *ctxt)
2210 return new pass_omp_device_lower (ctxt);
2213 /* "omp declare target link" handling pass. */
2215 namespace {
2217 const pass_data pass_data_omp_target_link =
2219 GIMPLE_PASS, /* type */
2220 "omptargetlink", /* name */
2221 OPTGROUP_OMP, /* optinfo_flags */
2222 TV_NONE, /* tv_id */
2223 PROP_ssa, /* properties_required */
2224 0, /* properties_provided */
2225 0, /* properties_destroyed */
2226 0, /* todo_flags_start */
2227 TODO_update_ssa, /* todo_flags_finish */
2230 class pass_omp_target_link : public gimple_opt_pass
2232 public:
2233 pass_omp_target_link (gcc::context *ctxt)
2234 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2237 /* opt_pass methods: */
2238 virtual bool gate (function *fun)
2240 #ifdef ACCEL_COMPILER
2241 return offloading_function_p (fun->decl);
2242 #else
2243 (void) fun;
2244 return false;
2245 #endif
2248 virtual unsigned execute (function *);
2251 /* Callback for walk_gimple_stmt used to scan for link var operands. */
2253 static tree
2254 find_link_var_op (tree *tp, int *walk_subtrees, void *)
2256 tree t = *tp;
2258 if (VAR_P (t)
2259 && DECL_HAS_VALUE_EXPR_P (t)
2260 && is_global_var (t)
2261 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2263 *walk_subtrees = 0;
2264 return t;
2267 return NULL_TREE;
2270 unsigned
2271 pass_omp_target_link::execute (function *fun)
2273 basic_block bb;
2274 FOR_EACH_BB_FN (bb, fun)
2276 gimple_stmt_iterator gsi;
2277 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2278 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2279 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2282 return 0;
2285 } // anon namespace
2287 gimple_opt_pass *
2288 make_pass_omp_target_link (gcc::context *ctxt)
2290 return new pass_omp_target_link (ctxt);