[ARM] Fix assembly comment syntax in -mprint-tune-info
[official-gcc.git] / gcc / omp-offload.c
blob11696b86c7251592280e1b7b72ccad1f91165d70
1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
4 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimplify-me.h"
39 #include "gimple-walk.h"
40 #include "tree-cfg.h"
41 #include "tree-into-ssa.h"
42 #include "common/common-target.h"
43 #include "omp-general.h"
44 #include "omp-offload.h"
45 #include "lto-section-names.h"
46 #include "gomp-constants.h"
47 #include "gimple-pretty-print.h"
49 /* Describe the OpenACC looping structure of a function. The entire
50 function is held in a 'NULL' loop. */
52 struct oacc_loop
54 oacc_loop *parent; /* Containing loop. */
56 oacc_loop *child; /* First inner loop. */
58 oacc_loop *sibling; /* Next loop within same parent. */
60 location_t loc; /* Location of the loop start. */
62 gcall *marker; /* Initial head marker. */
64 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
65 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
67 tree routine; /* Pseudo-loop enclosing a routine. */
69 unsigned mask; /* Partitioning mask. */
70 unsigned e_mask; /* Partitioning of element loops (when tiling). */
71 unsigned inner; /* Partitioning of inner loops. */
72 unsigned flags; /* Partitioning flags. */
73 vec<gcall *> ifns; /* Contained loop abstraction functions. */
74 tree chunk_size; /* Chunk size. */
75 gcall *head_end; /* Final marker of head sequence. */
78 /* Holds offload tables with decls. */
79 vec<tree, va_gc> *offload_funcs, *offload_vars;
81 /* Return level at which oacc routine may spawn a partitioned loop, or
82 -1 if it is not a routine (i.e. is an offload fn). */
84 static int
85 oacc_fn_attrib_level (tree attr)
87 tree pos = TREE_VALUE (attr);
89 if (!TREE_PURPOSE (pos))
90 return -1;
92 int ix = 0;
93 for (ix = 0; ix != GOMP_DIM_MAX;
94 ix++, pos = TREE_CHAIN (pos))
95 if (!integer_zerop (TREE_PURPOSE (pos)))
96 break;
98 return ix;
101 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
102 adds their addresses and sizes to constructor-vector V_CTOR. */
104 static void
105 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
106 vec<constructor_elt, va_gc> *v_ctor)
108 unsigned len = vec_safe_length (v_decls);
109 for (unsigned i = 0; i < len; i++)
111 tree it = (*v_decls)[i];
112 bool is_var = VAR_P (it);
113 bool is_link_var
114 = is_var
115 #ifdef ACCEL_COMPILER
116 && DECL_HAS_VALUE_EXPR_P (it)
117 #endif
118 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
120 tree size = NULL_TREE;
121 if (is_var)
122 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
124 tree addr;
125 if (!is_link_var)
126 addr = build_fold_addr_expr (it);
127 else
129 #ifdef ACCEL_COMPILER
130 /* For "omp declare target link" vars add address of the pointer to
131 the target table, instead of address of the var. */
132 tree value_expr = DECL_VALUE_EXPR (it);
133 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
134 varpool_node::finalize_decl (link_ptr_decl);
135 addr = build_fold_addr_expr (link_ptr_decl);
136 #else
137 addr = build_fold_addr_expr (it);
138 #endif
140 /* Most significant bit of the size marks "omp declare target link"
141 vars in host and target tables. */
142 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
143 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
144 * BITS_PER_UNIT - 1);
145 size = wide_int_to_tree (const_ptr_type_node, isize);
148 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
149 if (is_var)
150 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
154 /* Create new symbols containing (address, size) pairs for global variables,
155 marked with "omp declare target" attribute, as well as addresses for the
156 functions, which are outlined offloading regions. */
157 void
158 omp_finish_file (void)
160 unsigned num_funcs = vec_safe_length (offload_funcs);
161 unsigned num_vars = vec_safe_length (offload_vars);
163 if (num_funcs == 0 && num_vars == 0)
164 return;
166 if (targetm_common.have_named_sections)
168 vec<constructor_elt, va_gc> *v_f, *v_v;
169 vec_alloc (v_f, num_funcs);
170 vec_alloc (v_v, num_vars * 2);
172 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
173 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
175 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
176 num_vars * 2);
177 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
178 num_funcs);
179 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
180 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
181 tree ctor_v = build_constructor (vars_decl_type, v_v);
182 tree ctor_f = build_constructor (funcs_decl_type, v_f);
183 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
184 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
185 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
186 get_identifier (".offload_func_table"),
187 funcs_decl_type);
188 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
189 get_identifier (".offload_var_table"),
190 vars_decl_type);
191 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
192 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
193 otherwise a joint table in a binary will contain padding between
194 tables from multiple object files. */
195 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
196 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
197 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
198 DECL_INITIAL (funcs_decl) = ctor_f;
199 DECL_INITIAL (vars_decl) = ctor_v;
200 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
201 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
203 varpool_node::finalize_decl (vars_decl);
204 varpool_node::finalize_decl (funcs_decl);
206 else
208 for (unsigned i = 0; i < num_funcs; i++)
210 tree it = (*offload_funcs)[i];
211 targetm.record_offload_symbol (it);
213 for (unsigned i = 0; i < num_vars; i++)
215 tree it = (*offload_vars)[i];
216 targetm.record_offload_symbol (it);
221 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
222 axis DIM. Return a tmp var holding the result. */
224 static tree
225 oacc_dim_call (bool pos, int dim, gimple_seq *seq)
227 tree arg = build_int_cst (unsigned_type_node, dim);
228 tree size = create_tmp_var (integer_type_node);
229 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
230 gimple *call = gimple_build_call_internal (fn, 1, arg);
232 gimple_call_set_lhs (call, size);
233 gimple_seq_add_stmt (seq, call);
235 return size;
238 /* Find the number of threads (POS = false), or thread number (POS =
239 true) for an OpenACC region partitioned as MASK. Setup code
240 required for the calculation is added to SEQ. */
242 static tree
243 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
245 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
246 unsigned ix;
248 /* Start at gang level, and examine relevant dimension indices. */
249 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
250 if (GOMP_DIM_MASK (ix) & mask)
252 if (res)
254 /* We had an outer index, so scale that by the size of
255 this dimension. */
256 tree n = oacc_dim_call (false, ix, seq);
257 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
259 if (pos)
261 /* Determine index in this dimension. */
262 tree id = oacc_dim_call (true, ix, seq);
263 if (res)
264 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
265 else
266 res = id;
270 if (res == NULL_TREE)
271 res = integer_zero_node;
273 return res;
276 /* Transform IFN_GOACC_LOOP calls to actual code. See
277 expand_oacc_for for where these are generated. At the vector
278 level, we stride loops, such that each member of a warp will
279 operate on adjacent iterations. At the worker and gang level,
280 each gang/warp executes a set of contiguous iterations. Chunking
281 can override this such that each iteration engine executes a
282 contiguous chunk, and then moves on to stride to the next chunk. */
284 static void
285 oacc_xform_loop (gcall *call)
287 gimple_stmt_iterator gsi = gsi_for_stmt (call);
288 enum ifn_goacc_loop_kind code
289 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
290 tree dir = gimple_call_arg (call, 1);
291 tree range = gimple_call_arg (call, 2);
292 tree step = gimple_call_arg (call, 3);
293 tree chunk_size = NULL_TREE;
294 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
295 tree lhs = gimple_call_lhs (call);
296 tree type = TREE_TYPE (lhs);
297 tree diff_type = TREE_TYPE (range);
298 tree r = NULL_TREE;
299 gimple_seq seq = NULL;
300 bool chunking = false, striding = true;
301 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
302 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
304 #ifdef ACCEL_COMPILER
305 chunk_size = gimple_call_arg (call, 4);
306 if (integer_minus_onep (chunk_size) /* Force static allocation. */
307 || integer_zerop (chunk_size)) /* Default (also static). */
309 /* If we're at the gang level, we want each to execute a
310 contiguous run of iterations. Otherwise we want each element
311 to stride. */
312 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
313 chunking = false;
315 else
317 /* Chunk of size 1 is striding. */
318 striding = integer_onep (chunk_size);
319 chunking = !striding;
321 #endif
323 /* striding=true, chunking=true
324 -> invalid.
325 striding=true, chunking=false
326 -> chunks=1
327 striding=false,chunking=true
328 -> chunks=ceil (range/(chunksize*threads*step))
329 striding=false,chunking=false
330 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
331 push_gimplify_context (true);
333 switch (code)
335 default: gcc_unreachable ();
337 case IFN_GOACC_LOOP_CHUNKS:
338 if (!chunking)
339 r = build_int_cst (type, 1);
340 else
342 /* chunk_max
343 = (range - dir) / (chunks * step * num_threads) + dir */
344 tree per = oacc_thread_numbers (false, mask, &seq);
345 per = fold_convert (type, per);
346 chunk_size = fold_convert (type, chunk_size);
347 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
348 per = fold_build2 (MULT_EXPR, type, per, step);
349 r = build2 (MINUS_EXPR, type, range, dir);
350 r = build2 (PLUS_EXPR, type, r, per);
351 r = build2 (TRUNC_DIV_EXPR, type, r, per);
353 break;
355 case IFN_GOACC_LOOP_STEP:
357 /* If striding, step by the entire compute volume, otherwise
358 step by the inner volume. */
359 unsigned volume = striding ? mask : inner_mask;
361 r = oacc_thread_numbers (false, volume, &seq);
362 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
364 break;
366 case IFN_GOACC_LOOP_OFFSET:
367 if (striding)
369 r = oacc_thread_numbers (true, mask, &seq);
370 r = fold_convert (diff_type, r);
372 else
374 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
375 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
376 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
377 inner_size, outer_size);
379 volume = fold_convert (diff_type, volume);
380 if (chunking)
381 chunk_size = fold_convert (diff_type, chunk_size);
382 else
384 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
386 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
387 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
388 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
391 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
392 fold_convert (diff_type, inner_size));
393 r = oacc_thread_numbers (true, outer_mask, &seq);
394 r = fold_convert (diff_type, r);
395 r = build2 (MULT_EXPR, diff_type, r, span);
397 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
398 inner = fold_convert (diff_type, inner);
399 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
401 if (chunking)
403 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
404 tree per
405 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
406 per = build2 (MULT_EXPR, diff_type, per, chunk);
408 r = build2 (PLUS_EXPR, diff_type, r, per);
411 r = fold_build2 (MULT_EXPR, diff_type, r, step);
412 if (type != diff_type)
413 r = fold_convert (type, r);
414 break;
416 case IFN_GOACC_LOOP_BOUND:
417 if (striding)
418 r = range;
419 else
421 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
422 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
423 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
424 inner_size, outer_size);
426 volume = fold_convert (diff_type, volume);
427 if (chunking)
428 chunk_size = fold_convert (diff_type, chunk_size);
429 else
431 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
433 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
434 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
435 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
438 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
439 fold_convert (diff_type, inner_size));
441 r = fold_build2 (MULT_EXPR, diff_type, span, step);
443 tree offset = gimple_call_arg (call, 6);
444 r = build2 (PLUS_EXPR, diff_type, r,
445 fold_convert (diff_type, offset));
446 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
447 diff_type, r, range);
449 if (diff_type != type)
450 r = fold_convert (type, r);
451 break;
454 gimplify_assign (lhs, r, &seq);
456 pop_gimplify_context (NULL);
458 gsi_replace_with_seq (&gsi, seq, true);
461 /* Transform a GOACC_TILE call. Determines the element loop span for
462 the specified loop of the nest. This is 1 if we're not tiling.
464 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
466 static void
467 oacc_xform_tile (gcall *call)
469 gimple_stmt_iterator gsi = gsi_for_stmt (call);
470 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
471 /* Inner loops have higher loop_nos. */
472 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
473 tree tile_size = gimple_call_arg (call, 2);
474 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
475 tree lhs = gimple_call_lhs (call);
476 tree type = TREE_TYPE (lhs);
477 gimple_seq seq = NULL;
478 tree span = build_int_cst (type, 1);
480 gcc_assert (!(e_mask
481 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
482 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
483 push_gimplify_context (!seen_error ());
485 #ifndef ACCEL_COMPILER
486 /* Partitioning disabled on host compilers. */
487 e_mask = 0;
488 #endif
489 if (!e_mask)
490 /* Not paritioning. */
491 span = integer_one_node;
492 else if (!integer_zerop (tile_size))
493 /* User explicitly specified size. */
494 span = tile_size;
495 else
497 /* Pick a size based on the paritioning of the element loop and
498 the number of loop nests. */
499 tree first_size = NULL_TREE;
500 tree second_size = NULL_TREE;
502 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
503 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
504 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
505 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
507 if (!first_size)
509 first_size = second_size;
510 second_size = NULL_TREE;
513 if (loop_no + 1 == collapse)
515 span = first_size;
516 if (!loop_no && second_size)
517 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
518 span, second_size);
520 else if (loop_no + 2 == collapse)
521 span = second_size;
522 else
523 span = NULL_TREE;
525 if (!span)
526 /* There's no obvious element size for this loop. Options
527 are 1, first_size or some non-unity constant (32 is my
528 favourite). We should gather some statistics. */
529 span = first_size;
532 span = fold_convert (type, span);
533 gimplify_assign (lhs, span, &seq);
535 pop_gimplify_context (NULL);
537 gsi_replace_with_seq (&gsi, seq, true);
540 /* Default partitioned and minimum partitioned dimensions. */
542 static int oacc_default_dims[GOMP_DIM_MAX];
543 static int oacc_min_dims[GOMP_DIM_MAX];
545 /* Parse the default dimension parameter. This is a set of
546 :-separated optional compute dimensions. Each specified dimension
547 is a positive integer. When device type support is added, it is
548 planned to be a comma separated list of such compute dimensions,
549 with all but the first prefixed by the colon-terminated device
550 type. */
552 static void
553 oacc_parse_default_dims (const char *dims)
555 int ix;
557 for (ix = GOMP_DIM_MAX; ix--;)
559 oacc_default_dims[ix] = -1;
560 oacc_min_dims[ix] = 1;
563 #ifndef ACCEL_COMPILER
564 /* Cannot be overridden on the host. */
565 dims = NULL;
566 #endif
567 if (dims)
569 const char *pos = dims;
571 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
573 if (ix)
575 if (*pos != ':')
576 goto malformed;
577 pos++;
580 if (*pos != ':')
582 long val;
583 const char *eptr;
585 errno = 0;
586 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
587 if (errno || val <= 0 || (int) val != val)
588 goto malformed;
589 pos = eptr;
590 oacc_default_dims[ix] = (int) val;
593 if (*pos)
595 malformed:
596 error_at (UNKNOWN_LOCATION,
597 "-fopenacc-dim operand is malformed at '%s'", pos);
601 /* Allow the backend to validate the dimensions. */
602 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
603 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
606 /* Validate and update the dimensions for offloaded FN. ATTRS is the
607 raw attribute. DIMS is an array of dimensions, which is filled in.
608 LEVEL is the partitioning level of a routine, or -1 for an offload
609 region itself. USED is the mask of partitioned execution in the
610 function. */
612 static void
613 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
615 tree purpose[GOMP_DIM_MAX];
616 unsigned ix;
617 tree pos = TREE_VALUE (attrs);
618 bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
620 /* Make sure the attribute creator attached the dimension
621 information. */
622 gcc_assert (pos);
624 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
626 purpose[ix] = TREE_PURPOSE (pos);
627 tree val = TREE_VALUE (pos);
628 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
629 pos = TREE_CHAIN (pos);
632 bool changed = targetm.goacc.validate_dims (fn, dims, level);
634 /* Default anything left to 1 or a partitioned default. */
635 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
636 if (dims[ix] < 0)
638 /* The OpenACC spec says 'If the [num_gangs] clause is not
639 specified, an implementation-defined default will be used;
640 the default may depend on the code within the construct.'
641 (2.5.6). Thus an implementation is free to choose
642 non-unity default for a parallel region that doesn't have
643 any gang-partitioned loops. However, it appears that there
644 is a sufficient body of user code that expects non-gang
645 partitioned regions to not execute in gang-redundant mode.
646 So we (a) don't warn about the non-portability and (b) pick
647 the minimum permissible dimension size when there is no
648 partitioned execution. Otherwise we pick the global
649 default for the dimension, which the user can control. The
650 same wording and logic applies to num_workers and
651 vector_length, however the worker- or vector- single
652 execution doesn't have the same impact as gang-redundant
653 execution. (If the minimum gang-level partioning is not 1,
654 the target is probably too confusing.) */
655 dims[ix] = (used & GOMP_DIM_MASK (ix)
656 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
657 changed = true;
660 if (changed)
662 /* Replace the attribute with new values. */
663 pos = NULL_TREE;
664 for (ix = GOMP_DIM_MAX; ix--;)
666 pos = tree_cons (purpose[ix],
667 build_int_cst (integer_type_node, dims[ix]),
668 pos);
669 if (is_kernel)
670 TREE_PUBLIC (pos) = 1;
672 oacc_replace_fn_attrib (fn, pos);
676 /* Create an empty OpenACC loop structure at LOC. */
678 static oacc_loop *
679 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
681 oacc_loop *loop = XCNEW (oacc_loop);
683 loop->parent = parent;
684 loop->child = loop->sibling = NULL;
686 if (parent)
688 loop->sibling = parent->child;
689 parent->child = loop;
692 loop->loc = loc;
693 loop->marker = NULL;
694 memset (loop->heads, 0, sizeof (loop->heads));
695 memset (loop->tails, 0, sizeof (loop->tails));
696 loop->routine = NULL_TREE;
698 loop->mask = loop->e_mask = loop->flags = loop->inner = 0;
699 loop->chunk_size = 0;
700 loop->head_end = NULL;
702 return loop;
705 /* Create an outermost, dummy OpenACC loop for offloaded function
706 DECL. */
708 static oacc_loop *
709 new_oacc_loop_outer (tree decl)
711 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
714 /* Start a new OpenACC loop structure beginning at head marker HEAD.
715 Link into PARENT loop. Return the new loop. */
717 static oacc_loop *
718 new_oacc_loop (oacc_loop *parent, gcall *marker)
720 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
722 loop->marker = marker;
724 /* TODO: This is where device_type flattening would occur for the loop
725 flags. */
727 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
729 tree chunk_size = integer_zero_node;
730 if (loop->flags & OLF_GANG_STATIC)
731 chunk_size = gimple_call_arg (marker, 4);
732 loop->chunk_size = chunk_size;
734 return loop;
737 /* Create a dummy loop encompassing a call to a openACC routine.
738 Extract the routine's partitioning requirements. */
740 static void
741 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
743 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
744 int level = oacc_fn_attrib_level (attrs);
746 gcc_assert (level >= 0);
748 loop->marker = call;
749 loop->routine = decl;
750 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
751 ^ (GOMP_DIM_MASK (level) - 1));
754 /* Finish off the current OpenACC loop ending at tail marker TAIL.
755 Return the parent loop. */
757 static oacc_loop *
758 finish_oacc_loop (oacc_loop *loop)
760 /* If the loop has been collapsed, don't partition it. */
761 if (loop->ifns.is_empty ())
762 loop->mask = loop->flags = 0;
763 return loop->parent;
766 /* Free all OpenACC loop structures within LOOP (inclusive). */
768 static void
769 free_oacc_loop (oacc_loop *loop)
771 if (loop->sibling)
772 free_oacc_loop (loop->sibling);
773 if (loop->child)
774 free_oacc_loop (loop->child);
776 free (loop);
779 /* Dump out the OpenACC loop head or tail beginning at FROM. */
781 static void
782 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
783 const char *title, int level)
785 enum ifn_unique_kind kind
786 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
788 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
789 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
791 gimple *stmt = gsi_stmt (gsi);
793 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
795 enum ifn_unique_kind k
796 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
797 (gimple_call_arg (stmt, 0)));
799 if (k == kind && stmt != from)
800 break;
802 print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
804 gsi_next (&gsi);
805 while (gsi_end_p (gsi))
806 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
810 /* Dump OpenACC loops LOOP, its siblings and its children. */
812 static void
813 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
815 int ix;
817 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
818 loop->flags, loop->mask,
819 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
821 if (loop->marker)
822 print_gimple_stmt (file, loop->marker, depth * 2, 0);
824 if (loop->routine)
825 fprintf (file, "%*sRoutine %s:%u:%s\n",
826 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
827 DECL_SOURCE_LINE (loop->routine),
828 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
830 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
831 if (loop->heads[ix])
832 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
833 for (ix = GOMP_DIM_MAX; ix--;)
834 if (loop->tails[ix])
835 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
837 if (loop->child)
838 dump_oacc_loop (file, loop->child, depth + 1);
839 if (loop->sibling)
840 dump_oacc_loop (file, loop->sibling, depth);
843 void debug_oacc_loop (oacc_loop *);
845 /* Dump loops to stderr. */
847 DEBUG_FUNCTION void
848 debug_oacc_loop (oacc_loop *loop)
850 dump_oacc_loop (stderr, loop, 0);
853 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
854 structures as we go. By construction these loops are properly
855 nested. */
857 static void
858 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
860 int marker = 0;
861 int remaining = 0;
863 if (bb->flags & BB_VISITED)
864 return;
866 follow:
867 bb->flags |= BB_VISITED;
869 /* Scan for loop markers. */
870 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
871 gsi_next (&gsi))
873 gimple *stmt = gsi_stmt (gsi);
875 if (!is_gimple_call (stmt))
876 continue;
878 gcall *call = as_a <gcall *> (stmt);
880 /* If this is a routine, make a dummy loop for it. */
881 if (tree decl = gimple_call_fndecl (call))
882 if (tree attrs = oacc_get_fn_attrib (decl))
884 gcc_assert (!marker);
885 new_oacc_loop_routine (loop, call, decl, attrs);
888 if (!gimple_call_internal_p (call))
889 continue;
891 switch (gimple_call_internal_fn (call))
893 default:
894 break;
896 case IFN_GOACC_LOOP:
897 case IFN_GOACC_TILE:
898 /* Record the abstraction function, so we can manipulate it
899 later. */
900 loop->ifns.safe_push (call);
901 break;
903 case IFN_UNIQUE:
904 enum ifn_unique_kind kind
905 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
906 (gimple_call_arg (call, 0)));
907 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
908 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
910 if (gimple_call_num_args (call) == 2)
912 gcc_assert (marker && !remaining);
913 marker = 0;
914 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
915 loop = finish_oacc_loop (loop);
916 else
917 loop->head_end = call;
919 else
921 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
923 if (!marker)
925 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
926 loop = new_oacc_loop (loop, call);
927 remaining = count;
929 gcc_assert (count == remaining);
930 if (remaining)
932 remaining--;
933 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
934 loop->heads[marker] = call;
935 else
936 loop->tails[remaining] = call;
938 marker++;
943 if (remaining || marker)
945 bb = single_succ (bb);
946 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
947 goto follow;
950 /* Walk successor blocks. */
951 edge e;
952 edge_iterator ei;
954 FOR_EACH_EDGE (e, ei, bb->succs)
955 oacc_loop_discover_walk (loop, e->dest);
958 /* LOOP is the first sibling. Reverse the order in place and return
959 the new first sibling. Recurse to child loops. */
961 static oacc_loop *
962 oacc_loop_sibling_nreverse (oacc_loop *loop)
964 oacc_loop *last = NULL;
967 if (loop->child)
968 loop->child = oacc_loop_sibling_nreverse (loop->child);
970 oacc_loop *next = loop->sibling;
971 loop->sibling = last;
972 last = loop;
973 loop = next;
975 while (loop);
977 return last;
980 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
981 the current function. */
983 static oacc_loop *
984 oacc_loop_discovery ()
986 /* Clear basic block flags, in particular BB_VISITED which we're going to use
987 in the following. */
988 clear_bb_flags ();
990 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
991 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
993 /* The siblings were constructed in reverse order, reverse them so
994 that diagnostics come out in an unsurprising order. */
995 top = oacc_loop_sibling_nreverse (top);
997 return top;
1000 /* Transform the abstract internal function markers starting at FROM
1001 to be for partitioning level LEVEL. Stop when we meet another HEAD
1002 or TAIL marker. */
1004 static void
1005 oacc_loop_xform_head_tail (gcall *from, int level)
1007 enum ifn_unique_kind kind
1008 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1009 tree replacement = build_int_cst (unsigned_type_node, level);
1011 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1013 gimple *stmt = gsi_stmt (gsi);
1015 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1017 enum ifn_unique_kind k
1018 = ((enum ifn_unique_kind)
1019 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1021 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1022 *gimple_call_arg_ptr (stmt, 2) = replacement;
1023 else if (k == kind && stmt != from)
1024 break;
1026 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1027 *gimple_call_arg_ptr (stmt, 3) = replacement;
1029 gsi_next (&gsi);
1030 while (gsi_end_p (gsi))
1031 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1035 /* Process the discovered OpenACC loops, setting the correct
1036 partitioning level etc. */
1038 static void
1039 oacc_loop_process (oacc_loop *loop)
1041 if (loop->child)
1042 oacc_loop_process (loop->child);
1044 if (loop->mask && !loop->routine)
1046 int ix;
1047 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1048 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
1049 tree chunk_arg = loop->chunk_size;
1050 gcall *call;
1052 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1053 switch (gimple_call_internal_fn (call))
1055 case IFN_GOACC_LOOP:
1057 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1058 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1059 if (!is_e)
1060 gimple_call_set_arg (call, 4, chunk_arg);
1062 break;
1064 case IFN_GOACC_TILE:
1065 gimple_call_set_arg (call, 3, mask_arg);
1066 gimple_call_set_arg (call, 4, e_mask_arg);
1067 break;
1069 default:
1070 gcc_unreachable ();
1073 unsigned dim = GOMP_DIM_GANG;
1074 unsigned mask = loop->mask | loop->e_mask;
1075 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1077 while (!(GOMP_DIM_MASK (dim) & mask))
1078 dim++;
1080 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1081 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1083 mask ^= GOMP_DIM_MASK (dim);
1087 if (loop->sibling)
1088 oacc_loop_process (loop->sibling);
1091 /* Walk the OpenACC loop heirarchy checking and assigning the
1092 programmer-specified partitionings. OUTER_MASK is the partitioning
1093 this loop is contained within. Return mask of partitioning
1094 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1095 bit. */
1097 static unsigned
1098 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1100 unsigned this_mask = loop->mask;
1101 unsigned mask_all = 0;
1102 bool noisy = true;
1104 #ifdef ACCEL_COMPILER
1105 /* When device_type is supported, we want the device compiler to be
1106 noisy, if the loop parameters are device_type-specific. */
1107 noisy = false;
1108 #endif
1110 if (!loop->routine)
1112 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1113 bool seq_par = (loop->flags & OLF_SEQ) != 0;
1114 bool tiling = (loop->flags & OLF_TILE) != 0;
1116 this_mask = ((loop->flags >> OLF_DIM_BASE)
1117 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1119 /* Apply auto partitioning if this is a non-partitioned regular
1120 loop, or (no more than) single axis tiled loop. */
1121 bool maybe_auto
1122 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1124 if ((this_mask != 0) + auto_par + seq_par > 1)
1126 if (noisy)
1127 error_at (loop->loc,
1128 seq_par
1129 ? "%<seq%> overrides other OpenACC loop specifiers"
1130 : "%<auto%> conflicts with other OpenACC loop "
1131 "specifiers");
1132 maybe_auto = false;
1133 loop->flags &= ~OLF_AUTO;
1134 if (seq_par)
1136 loop->flags
1137 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1138 this_mask = 0;
1142 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1144 loop->flags |= OLF_AUTO;
1145 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1149 if (this_mask & outer_mask)
1151 const oacc_loop *outer;
1152 for (outer = loop->parent; outer; outer = outer->parent)
1153 if ((outer->mask | outer->e_mask) & this_mask)
1154 break;
1156 if (noisy)
1158 if (outer)
1160 error_at (loop->loc,
1161 "%s uses same OpenACC parallelism as containing loop",
1162 loop->routine ? "routine call" : "inner loop");
1163 inform (outer->loc, "containing loop here");
1165 else
1166 error_at (loop->loc,
1167 "%s uses OpenACC parallelism disallowed by containing "
1168 "routine", loop->routine ? "routine call" : "loop");
1170 if (loop->routine)
1171 inform (DECL_SOURCE_LOCATION (loop->routine),
1172 "routine %qD declared here", loop->routine);
1174 this_mask &= ~outer_mask;
1176 else
1178 unsigned outermost = least_bit_hwi (this_mask);
1180 if (outermost && outermost <= outer_mask)
1182 if (noisy)
1184 error_at (loop->loc,
1185 "incorrectly nested OpenACC loop parallelism");
1187 const oacc_loop *outer;
1188 for (outer = loop->parent;
1189 outer->flags && outer->flags < outermost;
1190 outer = outer->parent)
1191 continue;
1192 inform (outer->loc, "containing loop here");
1195 this_mask &= ~outermost;
1199 mask_all |= this_mask;
1201 if (loop->flags & OLF_TILE)
1203 /* When tiling, vector goes to the element loop, and failing
1204 that we put worker there. The std doesn't contemplate
1205 specifying all three. We choose to put worker and vector on
1206 the element loops in that case. */
1207 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1208 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1209 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1211 loop->e_mask = this_e_mask;
1212 this_mask ^= this_e_mask;
1215 loop->mask = this_mask;
1217 if (dump_file)
1218 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1219 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1220 loop->mask, loop->e_mask);
1222 if (loop->child)
1224 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1225 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
1226 mask_all |= loop->inner;
1229 if (loop->sibling)
1230 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1232 return mask_all;
1235 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1236 OUTER_MASK is the partitioning this loop is contained within.
1237 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1238 Return the cumulative partitioning used by this loop, siblings and
1239 children. */
1241 static unsigned
1242 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1243 bool outer_assign)
1245 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1246 bool noisy = true;
1247 bool tiling = loop->flags & OLF_TILE;
1249 #ifdef ACCEL_COMPILER
1250 /* When device_type is supported, we want the device compiler to be
1251 noisy, if the loop parameters are device_type-specific. */
1252 noisy = false;
1253 #endif
1255 if (assign && (!outer_assign || loop->inner))
1257 /* Allocate outermost and non-innermost loops at the outermost
1258 non-innermost available level. */
1259 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1261 /* Find the first outermost available partition. */
1262 while (this_mask <= outer_mask)
1263 this_mask <<= 1;
1265 /* Grab two axes if tiling, and we've not assigned anything */
1266 if (tiling && !(loop->mask | loop->e_mask))
1267 this_mask |= this_mask << 1;
1269 /* Prohibit the innermost partitioning at the moment. */
1270 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
1272 /* Don't use any dimension explicitly claimed by an inner loop. */
1273 this_mask &= ~loop->inner;
1275 if (tiling && !loop->e_mask)
1277 /* If we got two axes, allocate the inner one to the element
1278 loop. */
1279 loop->e_mask = this_mask & (this_mask << 1);
1280 this_mask ^= loop->e_mask;
1283 loop->mask |= this_mask;
1286 if (loop->child)
1288 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1289 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1290 outer_assign | assign);
1293 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
1295 /* Allocate the loop at the innermost available level. Note
1296 that we do this even if we already assigned this loop the
1297 outermost available level above. That way we'll partition
1298 this along 2 axes, if they are available. */
1299 unsigned this_mask = 0;
1301 /* Determine the outermost partitioning used within this loop. */
1302 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1303 this_mask = least_bit_hwi (this_mask);
1305 /* Pick the partitioning just inside that one. */
1306 this_mask >>= 1;
1308 /* And avoid picking one use by an outer loop. */
1309 this_mask &= ~outer_mask;
1311 /* If tiling and we failed completely above, grab the next one
1312 too. Making sure it doesn't hit an outer loop. */
1313 if (tiling)
1315 this_mask &= ~(loop->e_mask | loop->mask);
1316 unsigned tile_mask = ((this_mask >> 1)
1317 & ~(outer_mask | loop->e_mask | loop->mask));
1319 if (tile_mask || loop->mask)
1321 loop->e_mask |= this_mask;
1322 this_mask = tile_mask;
1324 if (!loop->e_mask && noisy)
1325 warning_at (loop->loc, 0,
1326 "insufficient partitioning available"
1327 " to parallelize element loop");
1330 loop->mask |= this_mask;
1331 if (!loop->mask && noisy)
1332 warning_at (loop->loc, 0,
1333 "insufficient partitioning available"
1334 " to parallelize%s loop", tiling ? " tile" : "");
1337 if (assign && dump_file)
1338 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
1339 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1340 loop->mask, loop->e_mask);
1342 unsigned inner_mask = 0;
1344 if (loop->sibling)
1345 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1346 outer_mask, outer_assign);
1348 inner_mask |= loop->inner | loop->mask | loop->e_mask;
1350 return inner_mask;
1353 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1354 axes. Return mask of partitioning. */
1356 static unsigned
1357 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1359 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1361 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1363 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1364 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
1366 return mask_all;
1369 /* Default fork/join early expander. Delete the function calls if
1370 there is no RTL expander. */
1372 bool
1373 default_goacc_fork_join (gcall *ARG_UNUSED (call),
1374 const int *ARG_UNUSED (dims), bool is_fork)
1376 if (is_fork)
1377 return targetm.have_oacc_fork ();
1378 else
1379 return targetm.have_oacc_join ();
1382 /* Default goacc.reduction early expander.
1384 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1385 If RES_PTR is not integer-zerop:
1386 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1387 TEARDOWN - emit '*RES_PTR = VAR'
1388 If LHS is not NULL
1389 emit 'LHS = VAR' */
1391 void
1392 default_goacc_reduction (gcall *call)
1394 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1395 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1396 tree lhs = gimple_call_lhs (call);
1397 tree var = gimple_call_arg (call, 2);
1398 gimple_seq seq = NULL;
1400 if (code == IFN_GOACC_REDUCTION_SETUP
1401 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1403 /* Setup and Teardown need to copy from/to the receiver object,
1404 if there is one. */
1405 tree ref_to_res = gimple_call_arg (call, 1);
1407 if (!integer_zerop (ref_to_res))
1409 tree dst = build_simple_mem_ref (ref_to_res);
1410 tree src = var;
1412 if (code == IFN_GOACC_REDUCTION_SETUP)
1414 src = dst;
1415 dst = lhs;
1416 lhs = NULL;
1418 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1422 /* Copy VAR to LHS, if there is an LHS. */
1423 if (lhs)
1424 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1426 gsi_replace_with_seq (&gsi, seq, true);
1429 /* Main entry point for oacc transformations which run on the device
1430 compiler after LTO, so we know what the target device is at this
1431 point (including the host fallback). */
1433 static unsigned int
1434 execute_oacc_device_lower ()
1436 tree attrs = oacc_get_fn_attrib (current_function_decl);
1438 if (!attrs)
1439 /* Not an offloaded function. */
1440 return 0;
1442 /* Parse the default dim argument exactly once. */
1443 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1445 oacc_parse_default_dims (flag_openacc_dims);
1446 flag_openacc_dims = (char *)&flag_openacc_dims;
1449 /* Discover, partition and process the loops. */
1450 oacc_loop *loops = oacc_loop_discovery ();
1451 int fn_level = oacc_fn_attrib_level (attrs);
1453 if (dump_file)
1454 fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
1455 ? "Function is kernels offload\n"
1456 : fn_level < 0 ? "Function is parallel offload\n"
1457 : "Function is routine level %d\n", fn_level);
1459 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1460 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
1461 int dims[GOMP_DIM_MAX];
1463 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1465 if (dump_file)
1467 const char *comma = "Compute dimensions [";
1468 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1469 fprintf (dump_file, "%s%d", comma, dims[ix]);
1470 fprintf (dump_file, "]\n");
1473 oacc_loop_process (loops);
1474 if (dump_file)
1476 fprintf (dump_file, "OpenACC loops\n");
1477 dump_oacc_loop (dump_file, loops, 0);
1478 fprintf (dump_file, "\n");
1481 /* Offloaded targets may introduce new basic blocks, which require
1482 dominance information to update SSA. */
1483 calculate_dominance_info (CDI_DOMINATORS);
1485 /* Now lower internal loop functions to target-specific code
1486 sequences. */
1487 basic_block bb;
1488 FOR_ALL_BB_FN (bb, cfun)
1489 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1491 gimple *stmt = gsi_stmt (gsi);
1492 if (!is_gimple_call (stmt))
1494 gsi_next (&gsi);
1495 continue;
1498 gcall *call = as_a <gcall *> (stmt);
1499 if (!gimple_call_internal_p (call))
1501 gsi_next (&gsi);
1502 continue;
1505 /* Rewind to allow rescan. */
1506 gsi_prev (&gsi);
1507 bool rescan = false, remove = false;
1508 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1510 switch (ifn_code)
1512 default: break;
1514 case IFN_GOACC_TILE:
1515 oacc_xform_tile (call);
1516 rescan = true;
1517 break;
1519 case IFN_GOACC_LOOP:
1520 oacc_xform_loop (call);
1521 rescan = true;
1522 break;
1524 case IFN_GOACC_REDUCTION:
1525 /* Mark the function for SSA renaming. */
1526 mark_virtual_operands_for_renaming (cfun);
1528 /* If the level is -1, this ended up being an unused
1529 axis. Handle as a default. */
1530 if (integer_minus_onep (gimple_call_arg (call, 3)))
1531 default_goacc_reduction (call);
1532 else
1533 targetm.goacc.reduction (call);
1534 rescan = true;
1535 break;
1537 case IFN_UNIQUE:
1539 enum ifn_unique_kind kind
1540 = ((enum ifn_unique_kind)
1541 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1543 switch (kind)
1545 default:
1546 break;
1548 case IFN_UNIQUE_OACC_FORK:
1549 case IFN_UNIQUE_OACC_JOIN:
1550 if (integer_minus_onep (gimple_call_arg (call, 2)))
1551 remove = true;
1552 else if (!targetm.goacc.fork_join
1553 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1554 remove = true;
1555 break;
1557 case IFN_UNIQUE_OACC_HEAD_MARK:
1558 case IFN_UNIQUE_OACC_TAIL_MARK:
1559 remove = true;
1560 break;
1562 break;
1566 if (gsi_end_p (gsi))
1567 /* We rewound past the beginning of the BB. */
1568 gsi = gsi_start_bb (bb);
1569 else
1570 /* Undo the rewind. */
1571 gsi_next (&gsi);
1573 if (remove)
1575 if (gimple_vdef (call))
1576 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1577 if (gimple_call_lhs (call))
1579 /* Propagate the data dependency var. */
1580 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1581 gimple_call_arg (call, 1));
1582 gsi_replace (&gsi, ass, false);
1584 else
1585 gsi_remove (&gsi, true);
1587 else if (!rescan)
1588 /* If not rescanning, advance over the call. */
1589 gsi_next (&gsi);
1592 free_oacc_loop (loops);
1594 return 0;
1597 /* Default launch dimension validator. Force everything to 1. A
1598 backend that wants to provide larger dimensions must override this
1599 hook. */
1601 bool
1602 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
1603 int ARG_UNUSED (fn_level))
1605 bool changed = false;
1607 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1609 if (dims[ix] != 1)
1611 dims[ix] = 1;
1612 changed = true;
1616 return changed;
1619 /* Default dimension bound is unknown on accelerator and 1 on host. */
1622 default_goacc_dim_limit (int ARG_UNUSED (axis))
1624 #ifdef ACCEL_COMPILER
1625 return 0;
1626 #else
1627 return 1;
1628 #endif
1631 namespace {
1633 const pass_data pass_data_oacc_device_lower =
1635 GIMPLE_PASS, /* type */
1636 "oaccdevlow", /* name */
1637 OPTGROUP_OPENMP, /* optinfo_flags */
1638 TV_NONE, /* tv_id */
1639 PROP_cfg, /* properties_required */
1640 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1641 0, /* properties_destroyed */
1642 0, /* todo_flags_start */
1643 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1646 class pass_oacc_device_lower : public gimple_opt_pass
1648 public:
1649 pass_oacc_device_lower (gcc::context *ctxt)
1650 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1653 /* opt_pass methods: */
1654 virtual bool gate (function *) { return flag_openacc; };
1656 virtual unsigned int execute (function *)
1658 return execute_oacc_device_lower ();
1661 }; // class pass_oacc_device_lower
1663 } // anon namespace
1665 gimple_opt_pass *
1666 make_pass_oacc_device_lower (gcc::context *ctxt)
1668 return new pass_oacc_device_lower (ctxt);
1671 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
1672 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
1673 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
1674 internal functions on non-SIMT targets, and likewise some SIMD internal
1675 functions on SIMT targets. */
1677 static unsigned int
1678 execute_omp_device_lower ()
1680 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
1681 basic_block bb;
1682 gimple_stmt_iterator gsi;
1683 FOR_EACH_BB_FN (bb, cfun)
1684 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1686 gimple *stmt = gsi_stmt (gsi);
1687 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
1688 continue;
1689 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
1690 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
1691 switch (gimple_call_internal_fn (stmt))
1693 case IFN_GOMP_USE_SIMT:
1694 rhs = vf == 1 ? integer_zero_node : integer_one_node;
1695 break;
1696 case IFN_GOMP_SIMT_LANE:
1697 case IFN_GOMP_SIMT_LAST_LANE:
1698 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
1699 break;
1700 case IFN_GOMP_SIMT_VF:
1701 rhs = build_int_cst (type, vf);
1702 break;
1703 case IFN_GOMP_SIMT_ORDERED_PRED:
1704 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
1705 if (rhs || !lhs)
1706 unlink_stmt_vdef (stmt);
1707 break;
1708 case IFN_GOMP_SIMT_VOTE_ANY:
1709 case IFN_GOMP_SIMT_XCHG_BFLY:
1710 case IFN_GOMP_SIMT_XCHG_IDX:
1711 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1712 break;
1713 case IFN_GOMP_SIMD_LANE:
1714 case IFN_GOMP_SIMD_LAST_LANE:
1715 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
1716 break;
1717 case IFN_GOMP_SIMD_VF:
1718 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
1719 break;
1720 default:
1721 continue;
1723 if (lhs && !rhs)
1724 continue;
1725 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
1726 gsi_replace (&gsi, stmt, false);
1728 if (vf != 1)
1729 cfun->has_force_vectorize_loops = false;
1730 return 0;
1733 namespace {
1735 const pass_data pass_data_omp_device_lower =
1737 GIMPLE_PASS, /* type */
1738 "ompdevlow", /* name */
1739 OPTGROUP_OPENMP, /* optinfo_flags */
1740 TV_NONE, /* tv_id */
1741 PROP_cfg, /* properties_required */
1742 PROP_gimple_lomp_dev, /* properties_provided */
1743 0, /* properties_destroyed */
1744 0, /* todo_flags_start */
1745 TODO_update_ssa, /* todo_flags_finish */
1748 class pass_omp_device_lower : public gimple_opt_pass
1750 public:
1751 pass_omp_device_lower (gcc::context *ctxt)
1752 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
1755 /* opt_pass methods: */
1756 virtual bool gate (function *fun)
1758 return !(fun->curr_properties & PROP_gimple_lomp_dev);
1760 virtual unsigned int execute (function *)
1762 return execute_omp_device_lower ();
1765 }; // class pass_expand_omp_ssa
1767 } // anon namespace
1769 gimple_opt_pass *
1770 make_pass_omp_device_lower (gcc::context *ctxt)
1772 return new pass_omp_device_lower (ctxt);
1775 /* "omp declare target link" handling pass. */
1777 namespace {
1779 const pass_data pass_data_omp_target_link =
1781 GIMPLE_PASS, /* type */
1782 "omptargetlink", /* name */
1783 OPTGROUP_OPENMP, /* optinfo_flags */
1784 TV_NONE, /* tv_id */
1785 PROP_ssa, /* properties_required */
1786 0, /* properties_provided */
1787 0, /* properties_destroyed */
1788 0, /* todo_flags_start */
1789 TODO_update_ssa, /* todo_flags_finish */
1792 class pass_omp_target_link : public gimple_opt_pass
1794 public:
1795 pass_omp_target_link (gcc::context *ctxt)
1796 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
1799 /* opt_pass methods: */
1800 virtual bool gate (function *fun)
1802 #ifdef ACCEL_COMPILER
1803 tree attrs = DECL_ATTRIBUTES (fun->decl);
1804 return lookup_attribute ("omp declare target", attrs)
1805 || lookup_attribute ("omp target entrypoint", attrs);
1806 #else
1807 (void) fun;
1808 return false;
1809 #endif
1812 virtual unsigned execute (function *);
1815 /* Callback for walk_gimple_stmt used to scan for link var operands. */
1817 static tree
1818 find_link_var_op (tree *tp, int *walk_subtrees, void *)
1820 tree t = *tp;
1822 if (VAR_P (t)
1823 && DECL_HAS_VALUE_EXPR_P (t)
1824 && is_global_var (t)
1825 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
1827 *walk_subtrees = 0;
1828 return t;
1831 return NULL_TREE;
1834 unsigned
1835 pass_omp_target_link::execute (function *fun)
1837 basic_block bb;
1838 FOR_EACH_BB_FN (bb, fun)
1840 gimple_stmt_iterator gsi;
1841 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1842 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
1843 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
1846 return 0;
1849 } // anon namespace
1851 gimple_opt_pass *
1852 make_pass_omp_target_link (gcc::context *ctxt)
1854 return new pass_omp_target_link (ctxt);