[doc] Correct optimisation levels documentation for -fstore-merging
[official-gcc.git] / gcc / omp-offload.c
blob6ff6bc2eeb9557c463894ed4b862ff03f79ec8c4
1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
4 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimplify-me.h"
39 #include "gimple-walk.h"
40 #include "tree-cfg.h"
41 #include "tree-into-ssa.h"
42 #include "common/common-target.h"
43 #include "omp-general.h"
44 #include "omp-offload.h"
45 #include "lto-section-names.h"
46 #include "gomp-constants.h"
47 #include "gimple-pretty-print.h"
49 /* Describe the OpenACC looping structure of a function. The entire
50 function is held in a 'NULL' loop. */
52 struct oacc_loop
54 oacc_loop *parent; /* Containing loop. */
56 oacc_loop *child; /* First inner loop. */
58 oacc_loop *sibling; /* Next loop within same parent. */
60 location_t loc; /* Location of the loop start. */
62 gcall *marker; /* Initial head marker. */
64 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
65 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
67 tree routine; /* Pseudo-loop enclosing a routine. */
69 unsigned mask; /* Partitioning mask. */
70 unsigned inner; /* Partitioning of inner loops. */
71 unsigned flags; /* Partitioning flags. */
72 unsigned ifns; /* Contained loop abstraction functions. */
73 tree chunk_size; /* Chunk size. */
74 gcall *head_end; /* Final marker of head sequence. */
77 /* Holds offload tables with decls. */
78 vec<tree, va_gc> *offload_funcs, *offload_vars;
80 /* Return level at which oacc routine may spawn a partitioned loop, or
81 -1 if it is not a routine (i.e. is an offload fn). */
83 static int
84 oacc_fn_attrib_level (tree attr)
86 tree pos = TREE_VALUE (attr);
88 if (!TREE_PURPOSE (pos))
89 return -1;
91 int ix = 0;
92 for (ix = 0; ix != GOMP_DIM_MAX;
93 ix++, pos = TREE_CHAIN (pos))
94 if (!integer_zerop (TREE_PURPOSE (pos)))
95 break;
97 return ix;
100 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
101 adds their addresses and sizes to constructor-vector V_CTOR. */
103 static void
104 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
105 vec<constructor_elt, va_gc> *v_ctor)
107 unsigned len = vec_safe_length (v_decls);
108 for (unsigned i = 0; i < len; i++)
110 tree it = (*v_decls)[i];
111 bool is_var = VAR_P (it);
112 bool is_link_var
113 = is_var
114 #ifdef ACCEL_COMPILER
115 && DECL_HAS_VALUE_EXPR_P (it)
116 #endif
117 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
119 tree size = NULL_TREE;
120 if (is_var)
121 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
123 tree addr;
124 if (!is_link_var)
125 addr = build_fold_addr_expr (it);
126 else
128 #ifdef ACCEL_COMPILER
129 /* For "omp declare target link" vars add address of the pointer to
130 the target table, instead of address of the var. */
131 tree value_expr = DECL_VALUE_EXPR (it);
132 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
133 varpool_node::finalize_decl (link_ptr_decl);
134 addr = build_fold_addr_expr (link_ptr_decl);
135 #else
136 addr = build_fold_addr_expr (it);
137 #endif
139 /* Most significant bit of the size marks "omp declare target link"
140 vars in host and target tables. */
141 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
142 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
143 * BITS_PER_UNIT - 1);
144 size = wide_int_to_tree (const_ptr_type_node, isize);
147 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
148 if (is_var)
149 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
153 /* Create new symbols containing (address, size) pairs for global variables,
154 marked with "omp declare target" attribute, as well as addresses for the
155 functions, which are outlined offloading regions. */
156 void
157 omp_finish_file (void)
159 unsigned num_funcs = vec_safe_length (offload_funcs);
160 unsigned num_vars = vec_safe_length (offload_vars);
162 if (num_funcs == 0 && num_vars == 0)
163 return;
165 if (targetm_common.have_named_sections)
167 vec<constructor_elt, va_gc> *v_f, *v_v;
168 vec_alloc (v_f, num_funcs);
169 vec_alloc (v_v, num_vars * 2);
171 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
172 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
174 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
175 num_vars * 2);
176 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
177 num_funcs);
178 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
179 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
180 tree ctor_v = build_constructor (vars_decl_type, v_v);
181 tree ctor_f = build_constructor (funcs_decl_type, v_f);
182 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
183 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
184 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
185 get_identifier (".offload_func_table"),
186 funcs_decl_type);
187 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
188 get_identifier (".offload_var_table"),
189 vars_decl_type);
190 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
191 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
192 otherwise a joint table in a binary will contain padding between
193 tables from multiple object files. */
194 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
195 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
196 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
197 DECL_INITIAL (funcs_decl) = ctor_f;
198 DECL_INITIAL (vars_decl) = ctor_v;
199 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
200 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
202 varpool_node::finalize_decl (vars_decl);
203 varpool_node::finalize_decl (funcs_decl);
205 else
207 for (unsigned i = 0; i < num_funcs; i++)
209 tree it = (*offload_funcs)[i];
210 targetm.record_offload_symbol (it);
212 for (unsigned i = 0; i < num_vars; i++)
214 tree it = (*offload_vars)[i];
215 targetm.record_offload_symbol (it);
220 /* Find the number of threads (POS = false), or thread number (POS =
221 true) for an OpenACC region partitioned as MASK. Setup code
222 required for the calculation is added to SEQ. */
224 static tree
225 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
227 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
228 unsigned ix;
230 /* Start at gang level, and examine relevant dimension indices. */
231 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
232 if (GOMP_DIM_MASK (ix) & mask)
234 tree arg = build_int_cst (unsigned_type_node, ix);
236 if (res)
238 /* We had an outer index, so scale that by the size of
239 this dimension. */
240 tree n = create_tmp_var (integer_type_node);
241 gimple *call
242 = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
244 gimple_call_set_lhs (call, n);
245 gimple_seq_add_stmt (seq, call);
246 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
248 if (pos)
250 /* Determine index in this dimension. */
251 tree id = create_tmp_var (integer_type_node);
252 gimple *call = gimple_build_call_internal
253 (IFN_GOACC_DIM_POS, 1, arg);
255 gimple_call_set_lhs (call, id);
256 gimple_seq_add_stmt (seq, call);
257 if (res)
258 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
259 else
260 res = id;
264 if (res == NULL_TREE)
265 res = integer_zero_node;
267 return res;
270 /* Transform IFN_GOACC_LOOP calls to actual code. See
271 expand_oacc_for for where these are generated. At the vector
272 level, we stride loops, such that each member of a warp will
273 operate on adjacent iterations. At the worker and gang level,
274 each gang/warp executes a set of contiguous iterations. Chunking
275 can override this such that each iteration engine executes a
276 contiguous chunk, and then moves on to stride to the next chunk. */
278 static void
279 oacc_xform_loop (gcall *call)
281 gimple_stmt_iterator gsi = gsi_for_stmt (call);
282 enum ifn_goacc_loop_kind code
283 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
284 tree dir = gimple_call_arg (call, 1);
285 tree range = gimple_call_arg (call, 2);
286 tree step = gimple_call_arg (call, 3);
287 tree chunk_size = NULL_TREE;
288 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
289 tree lhs = gimple_call_lhs (call);
290 tree type = TREE_TYPE (lhs);
291 tree diff_type = TREE_TYPE (range);
292 tree r = NULL_TREE;
293 gimple_seq seq = NULL;
294 bool chunking = false, striding = true;
295 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
296 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
298 #ifdef ACCEL_COMPILER
299 chunk_size = gimple_call_arg (call, 4);
300 if (integer_minus_onep (chunk_size) /* Force static allocation. */
301 || integer_zerop (chunk_size)) /* Default (also static). */
303 /* If we're at the gang level, we want each to execute a
304 contiguous run of iterations. Otherwise we want each element
305 to stride. */
306 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
307 chunking = false;
309 else
311 /* Chunk of size 1 is striding. */
312 striding = integer_onep (chunk_size);
313 chunking = !striding;
315 #endif
317 /* striding=true, chunking=true
318 -> invalid.
319 striding=true, chunking=false
320 -> chunks=1
321 striding=false,chunking=true
322 -> chunks=ceil (range/(chunksize*threads*step))
323 striding=false,chunking=false
324 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
325 push_gimplify_context (true);
327 switch (code)
329 default: gcc_unreachable ();
331 case IFN_GOACC_LOOP_CHUNKS:
332 if (!chunking)
333 r = build_int_cst (type, 1);
334 else
336 /* chunk_max
337 = (range - dir) / (chunks * step * num_threads) + dir */
338 tree per = oacc_thread_numbers (false, mask, &seq);
339 per = fold_convert (type, per);
340 chunk_size = fold_convert (type, chunk_size);
341 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
342 per = fold_build2 (MULT_EXPR, type, per, step);
343 r = build2 (MINUS_EXPR, type, range, dir);
344 r = build2 (PLUS_EXPR, type, r, per);
345 r = build2 (TRUNC_DIV_EXPR, type, r, per);
347 break;
349 case IFN_GOACC_LOOP_STEP:
351 /* If striding, step by the entire compute volume, otherwise
352 step by the inner volume. */
353 unsigned volume = striding ? mask : inner_mask;
355 r = oacc_thread_numbers (false, volume, &seq);
356 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
358 break;
360 case IFN_GOACC_LOOP_OFFSET:
361 if (striding)
363 r = oacc_thread_numbers (true, mask, &seq);
364 r = fold_convert (diff_type, r);
366 else
368 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
369 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
370 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
371 inner_size, outer_size);
373 volume = fold_convert (diff_type, volume);
374 if (chunking)
375 chunk_size = fold_convert (diff_type, chunk_size);
376 else
378 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
380 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
381 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
382 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
385 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
386 fold_convert (diff_type, inner_size));
387 r = oacc_thread_numbers (true, outer_mask, &seq);
388 r = fold_convert (diff_type, r);
389 r = build2 (MULT_EXPR, diff_type, r, span);
391 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
392 inner = fold_convert (diff_type, inner);
393 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
395 if (chunking)
397 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
398 tree per
399 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
400 per = build2 (MULT_EXPR, diff_type, per, chunk);
402 r = build2 (PLUS_EXPR, diff_type, r, per);
405 r = fold_build2 (MULT_EXPR, diff_type, r, step);
406 if (type != diff_type)
407 r = fold_convert (type, r);
408 break;
410 case IFN_GOACC_LOOP_BOUND:
411 if (striding)
412 r = range;
413 else
415 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
416 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
417 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
418 inner_size, outer_size);
420 volume = fold_convert (diff_type, volume);
421 if (chunking)
422 chunk_size = fold_convert (diff_type, chunk_size);
423 else
425 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
427 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
428 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
429 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
432 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
433 fold_convert (diff_type, inner_size));
435 r = fold_build2 (MULT_EXPR, diff_type, span, step);
437 tree offset = gimple_call_arg (call, 6);
438 r = build2 (PLUS_EXPR, diff_type, r,
439 fold_convert (diff_type, offset));
440 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
441 diff_type, r, range);
443 if (diff_type != type)
444 r = fold_convert (type, r);
445 break;
448 gimplify_assign (lhs, r, &seq);
450 pop_gimplify_context (NULL);
452 gsi_replace_with_seq (&gsi, seq, true);
455 /* Default partitioned and minimum partitioned dimensions. */
457 static int oacc_default_dims[GOMP_DIM_MAX];
458 static int oacc_min_dims[GOMP_DIM_MAX];
460 /* Parse the default dimension parameter. This is a set of
461 :-separated optional compute dimensions. Each specified dimension
462 is a positive integer. When device type support is added, it is
463 planned to be a comma separated list of such compute dimensions,
464 with all but the first prefixed by the colon-terminated device
465 type. */
467 static void
468 oacc_parse_default_dims (const char *dims)
470 int ix;
472 for (ix = GOMP_DIM_MAX; ix--;)
474 oacc_default_dims[ix] = -1;
475 oacc_min_dims[ix] = 1;
478 #ifndef ACCEL_COMPILER
479 /* Cannot be overridden on the host. */
480 dims = NULL;
481 #endif
482 if (dims)
484 const char *pos = dims;
486 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
488 if (ix)
490 if (*pos != ':')
491 goto malformed;
492 pos++;
495 if (*pos != ':')
497 long val;
498 const char *eptr;
500 errno = 0;
501 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
502 if (errno || val <= 0 || (int) val != val)
503 goto malformed;
504 pos = eptr;
505 oacc_default_dims[ix] = (int) val;
508 if (*pos)
510 malformed:
511 error_at (UNKNOWN_LOCATION,
512 "-fopenacc-dim operand is malformed at '%s'", pos);
516 /* Allow the backend to validate the dimensions. */
517 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
518 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
521 /* Validate and update the dimensions for offloaded FN. ATTRS is the
522 raw attribute. DIMS is an array of dimensions, which is filled in.
523 LEVEL is the partitioning level of a routine, or -1 for an offload
524 region itself. USED is the mask of partitioned execution in the
525 function. */
527 static void
528 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
530 tree purpose[GOMP_DIM_MAX];
531 unsigned ix;
532 tree pos = TREE_VALUE (attrs);
533 bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
535 /* Make sure the attribute creator attached the dimension
536 information. */
537 gcc_assert (pos);
539 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
541 purpose[ix] = TREE_PURPOSE (pos);
542 tree val = TREE_VALUE (pos);
543 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
544 pos = TREE_CHAIN (pos);
547 bool changed = targetm.goacc.validate_dims (fn, dims, level);
549 /* Default anything left to 1 or a partitioned default. */
550 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
551 if (dims[ix] < 0)
553 /* The OpenACC spec says 'If the [num_gangs] clause is not
554 specified, an implementation-defined default will be used;
555 the default may depend on the code within the construct.'
556 (2.5.6). Thus an implementation is free to choose
557 non-unity default for a parallel region that doesn't have
558 any gang-partitioned loops. However, it appears that there
559 is a sufficient body of user code that expects non-gang
560 partitioned regions to not execute in gang-redundant mode.
561 So we (a) don't warn about the non-portability and (b) pick
562 the minimum permissible dimension size when there is no
563 partitioned execution. Otherwise we pick the global
564 default for the dimension, which the user can control. The
565 same wording and logic applies to num_workers and
566 vector_length, however the worker- or vector- single
567 execution doesn't have the same impact as gang-redundant
568 execution. (If the minimum gang-level partioning is not 1,
569 the target is probably too confusing.) */
570 dims[ix] = (used & GOMP_DIM_MASK (ix)
571 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
572 changed = true;
575 if (changed)
577 /* Replace the attribute with new values. */
578 pos = NULL_TREE;
579 for (ix = GOMP_DIM_MAX; ix--;)
581 pos = tree_cons (purpose[ix],
582 build_int_cst (integer_type_node, dims[ix]),
583 pos);
584 if (is_kernel)
585 TREE_PUBLIC (pos) = 1;
587 oacc_replace_fn_attrib (fn, pos);
591 /* Create an empty OpenACC loop structure at LOC. */
593 static oacc_loop *
594 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
596 oacc_loop *loop = XCNEW (oacc_loop);
598 loop->parent = parent;
599 loop->child = loop->sibling = NULL;
601 if (parent)
603 loop->sibling = parent->child;
604 parent->child = loop;
607 loop->loc = loc;
608 loop->marker = NULL;
609 memset (loop->heads, 0, sizeof (loop->heads));
610 memset (loop->tails, 0, sizeof (loop->tails));
611 loop->routine = NULL_TREE;
613 loop->mask = loop->flags = loop->inner = 0;
614 loop->ifns = 0;
615 loop->chunk_size = 0;
616 loop->head_end = NULL;
618 return loop;
621 /* Create an outermost, dummy OpenACC loop for offloaded function
622 DECL. */
624 static oacc_loop *
625 new_oacc_loop_outer (tree decl)
627 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
630 /* Start a new OpenACC loop structure beginning at head marker HEAD.
631 Link into PARENT loop. Return the new loop. */
633 static oacc_loop *
634 new_oacc_loop (oacc_loop *parent, gcall *marker)
636 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
638 loop->marker = marker;
640 /* TODO: This is where device_type flattening would occur for the loop
641 flags. */
643 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
645 tree chunk_size = integer_zero_node;
646 if (loop->flags & OLF_GANG_STATIC)
647 chunk_size = gimple_call_arg (marker, 4);
648 loop->chunk_size = chunk_size;
650 return loop;
653 /* Create a dummy loop encompassing a call to a openACC routine.
654 Extract the routine's partitioning requirements. */
656 static void
657 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
659 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
660 int level = oacc_fn_attrib_level (attrs);
662 gcc_assert (level >= 0);
664 loop->marker = call;
665 loop->routine = decl;
666 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
667 ^ (GOMP_DIM_MASK (level) - 1));
670 /* Finish off the current OpenACC loop ending at tail marker TAIL.
671 Return the parent loop. */
673 static oacc_loop *
674 finish_oacc_loop (oacc_loop *loop)
676 /* If the loop has been collapsed, don't partition it. */
677 if (!loop->ifns)
678 loop->mask = loop->flags = 0;
679 return loop->parent;
682 /* Free all OpenACC loop structures within LOOP (inclusive). */
684 static void
685 free_oacc_loop (oacc_loop *loop)
687 if (loop->sibling)
688 free_oacc_loop (loop->sibling);
689 if (loop->child)
690 free_oacc_loop (loop->child);
692 free (loop);
695 /* Dump out the OpenACC loop head or tail beginning at FROM. */
697 static void
698 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
699 const char *title, int level)
701 enum ifn_unique_kind kind
702 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
704 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
705 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
707 gimple *stmt = gsi_stmt (gsi);
709 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
711 enum ifn_unique_kind k
712 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
713 (gimple_call_arg (stmt, 0)));
715 if (k == kind && stmt != from)
716 break;
718 print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
720 gsi_next (&gsi);
721 while (gsi_end_p (gsi))
722 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
726 /* Dump OpenACC loops LOOP, its siblings and its children. */
728 static void
729 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
731 int ix;
733 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
734 loop->flags, loop->mask,
735 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
737 if (loop->marker)
738 print_gimple_stmt (file, loop->marker, depth * 2, 0);
740 if (loop->routine)
741 fprintf (file, "%*sRoutine %s:%u:%s\n",
742 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
743 DECL_SOURCE_LINE (loop->routine),
744 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
746 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
747 if (loop->heads[ix])
748 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
749 for (ix = GOMP_DIM_MAX; ix--;)
750 if (loop->tails[ix])
751 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
753 if (loop->child)
754 dump_oacc_loop (file, loop->child, depth + 1);
755 if (loop->sibling)
756 dump_oacc_loop (file, loop->sibling, depth);
759 void debug_oacc_loop (oacc_loop *);
761 /* Dump loops to stderr. */
763 DEBUG_FUNCTION void
764 debug_oacc_loop (oacc_loop *loop)
766 dump_oacc_loop (stderr, loop, 0);
769 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
770 structures as we go. By construction these loops are properly
771 nested. */
773 static void
774 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
776 int marker = 0;
777 int remaining = 0;
779 if (bb->flags & BB_VISITED)
780 return;
782 follow:
783 bb->flags |= BB_VISITED;
785 /* Scan for loop markers. */
786 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
787 gsi_next (&gsi))
789 gimple *stmt = gsi_stmt (gsi);
791 if (!is_gimple_call (stmt))
792 continue;
794 gcall *call = as_a <gcall *> (stmt);
796 /* If this is a routine, make a dummy loop for it. */
797 if (tree decl = gimple_call_fndecl (call))
798 if (tree attrs = oacc_get_fn_attrib (decl))
800 gcc_assert (!marker);
801 new_oacc_loop_routine (loop, call, decl, attrs);
804 if (!gimple_call_internal_p (call))
805 continue;
807 switch (gimple_call_internal_fn (call))
809 default:
810 break;
812 case IFN_GOACC_LOOP:
813 /* Count the goacc loop abstraction fns, to determine if the
814 loop was collapsed already. */
815 loop->ifns++;
816 break;
818 case IFN_UNIQUE:
819 enum ifn_unique_kind kind
820 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
821 (gimple_call_arg (call, 0)));
822 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
823 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
825 if (gimple_call_num_args (call) == 2)
827 gcc_assert (marker && !remaining);
828 marker = 0;
829 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
830 loop = finish_oacc_loop (loop);
831 else
832 loop->head_end = call;
834 else
836 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
838 if (!marker)
840 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
841 loop = new_oacc_loop (loop, call);
842 remaining = count;
844 gcc_assert (count == remaining);
845 if (remaining)
847 remaining--;
848 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
849 loop->heads[marker] = call;
850 else
851 loop->tails[remaining] = call;
853 marker++;
858 if (remaining || marker)
860 bb = single_succ (bb);
861 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
862 goto follow;
865 /* Walk successor blocks. */
866 edge e;
867 edge_iterator ei;
869 FOR_EACH_EDGE (e, ei, bb->succs)
870 oacc_loop_discover_walk (loop, e->dest);
873 /* LOOP is the first sibling. Reverse the order in place and return
874 the new first sibling. Recurse to child loops. */
876 static oacc_loop *
877 oacc_loop_sibling_nreverse (oacc_loop *loop)
879 oacc_loop *last = NULL;
882 if (loop->child)
883 loop->child = oacc_loop_sibling_nreverse (loop->child);
885 oacc_loop *next = loop->sibling;
886 loop->sibling = last;
887 last = loop;
888 loop = next;
890 while (loop);
892 return last;
895 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
896 the current function. */
898 static oacc_loop *
899 oacc_loop_discovery ()
901 /* Clear basic block flags, in particular BB_VISITED which we're going to use
902 in the following. */
903 clear_bb_flags ();
905 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
906 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
908 /* The siblings were constructed in reverse order, reverse them so
909 that diagnostics come out in an unsurprising order. */
910 top = oacc_loop_sibling_nreverse (top);
912 return top;
915 /* Transform the abstract internal function markers starting at FROM
916 to be for partitioning level LEVEL. Stop when we meet another HEAD
917 or TAIL marker. */
919 static void
920 oacc_loop_xform_head_tail (gcall *from, int level)
922 enum ifn_unique_kind kind
923 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
924 tree replacement = build_int_cst (unsigned_type_node, level);
926 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
928 gimple *stmt = gsi_stmt (gsi);
930 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
932 enum ifn_unique_kind k
933 = ((enum ifn_unique_kind)
934 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
936 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
937 *gimple_call_arg_ptr (stmt, 2) = replacement;
938 else if (k == kind && stmt != from)
939 break;
941 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
942 *gimple_call_arg_ptr (stmt, 3) = replacement;
944 gsi_next (&gsi);
945 while (gsi_end_p (gsi))
946 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
950 /* Transform the IFN_GOACC_LOOP internal functions by providing the
951 determined partitioning mask and chunking argument. END_MARKER
952 points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS
953 is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is
954 the replacement partitioning mask and CHUNK_ARG is the replacement
955 chunking arg. */
957 static void
958 oacc_loop_xform_loop (gcall *end_marker, unsigned ifns,
959 tree mask_arg, tree chunk_arg)
961 gimple_stmt_iterator gsi = gsi_for_stmt (end_marker);
963 gcc_checking_assert (ifns);
964 for (;;)
966 for (; !gsi_end_p (gsi); gsi_next (&gsi))
968 gimple *stmt = gsi_stmt (gsi);
970 if (!is_gimple_call (stmt))
971 continue;
973 gcall *call = as_a <gcall *> (stmt);
975 if (!gimple_call_internal_p (call))
976 continue;
978 if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP)
979 continue;
981 *gimple_call_arg_ptr (call, 5) = mask_arg;
982 *gimple_call_arg_ptr (call, 4) = chunk_arg;
983 ifns--;
984 if (!ifns)
985 return;
988 /* The LOOP_BOUND ifn could be in the single successor
989 block. */
990 basic_block bb = single_succ (gsi_bb (gsi));
991 gsi = gsi_start_bb (bb);
995 /* Process the discovered OpenACC loops, setting the correct
996 partitioning level etc. */
998 static void
999 oacc_loop_process (oacc_loop *loop)
1001 if (loop->child)
1002 oacc_loop_process (loop->child);
1004 if (loop->mask && !loop->routine)
1006 int ix;
1007 unsigned mask = loop->mask;
1008 unsigned dim = GOMP_DIM_GANG;
1009 tree mask_arg = build_int_cst (unsigned_type_node, mask);
1010 tree chunk_arg = loop->chunk_size;
1012 oacc_loop_xform_loop (loop->head_end, loop->ifns, mask_arg, chunk_arg);
1014 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1016 while (!(GOMP_DIM_MASK (dim) & mask))
1017 dim++;
1019 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1020 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1022 mask ^= GOMP_DIM_MASK (dim);
1026 if (loop->sibling)
1027 oacc_loop_process (loop->sibling);
1030 /* Walk the OpenACC loop heirarchy checking and assigning the
1031 programmer-specified partitionings. OUTER_MASK is the partitioning
1032 this loop is contained within. Return mask of partitioning
1033 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1034 bit. */
1036 static unsigned
1037 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1039 unsigned this_mask = loop->mask;
1040 unsigned mask_all = 0;
1041 bool noisy = true;
1043 #ifdef ACCEL_COMPILER
1044 /* When device_type is supported, we want the device compiler to be
1045 noisy, if the loop parameters are device_type-specific. */
1046 noisy = false;
1047 #endif
1049 if (!loop->routine)
1051 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1052 bool seq_par = (loop->flags & OLF_SEQ) != 0;
1054 this_mask = ((loop->flags >> OLF_DIM_BASE)
1055 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1057 if ((this_mask != 0) + auto_par + seq_par > 1)
1059 if (noisy)
1060 error_at (loop->loc,
1061 seq_par
1062 ? "%<seq%> overrides other OpenACC loop specifiers"
1063 : "%<auto%> conflicts with other OpenACC loop "
1064 "specifiers");
1065 auto_par = false;
1066 loop->flags &= ~OLF_AUTO;
1067 if (seq_par)
1069 loop->flags
1070 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1071 this_mask = 0;
1074 if (auto_par && (loop->flags & OLF_INDEPENDENT))
1075 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1078 if (this_mask & outer_mask)
1080 const oacc_loop *outer;
1081 for (outer = loop->parent; outer; outer = outer->parent)
1082 if (outer->mask & this_mask)
1083 break;
1085 if (noisy)
1087 if (outer)
1089 error_at (loop->loc,
1090 "%s uses same OpenACC parallelism as containing loop",
1091 loop->routine ? "routine call" : "inner loop");
1092 inform (outer->loc, "containing loop here");
1094 else
1095 error_at (loop->loc,
1096 "%s uses OpenACC parallelism disallowed by containing "
1097 "routine", loop->routine ? "routine call" : "loop");
1099 if (loop->routine)
1100 inform (DECL_SOURCE_LOCATION (loop->routine),
1101 "routine %qD declared here", loop->routine);
1103 this_mask &= ~outer_mask;
1105 else
1107 unsigned outermost = least_bit_hwi (this_mask);
1109 if (outermost && outermost <= outer_mask)
1111 if (noisy)
1113 error_at (loop->loc,
1114 "incorrectly nested OpenACC loop parallelism");
1116 const oacc_loop *outer;
1117 for (outer = loop->parent;
1118 outer->flags && outer->flags < outermost;
1119 outer = outer->parent)
1120 continue;
1121 inform (outer->loc, "containing loop here");
1124 this_mask &= ~outermost;
1128 loop->mask = this_mask;
1129 mask_all |= this_mask;
1131 if (loop->child)
1133 loop->inner = oacc_loop_fixed_partitions (loop->child,
1134 outer_mask | this_mask);
1135 mask_all |= loop->inner;
1138 if (loop->sibling)
1139 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1141 return mask_all;
1144 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1145 OUTER_MASK is the partitioning this loop is contained within.
1146 Return the cumulative partitioning used by this loop, siblings and
1147 children. */
1149 static unsigned
1150 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
1152 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1153 bool noisy = true;
1155 #ifdef ACCEL_COMPILER
1156 /* When device_type is supported, we want the device compiler to be
1157 noisy, if the loop parameters are device_type-specific. */
1158 noisy = false;
1159 #endif
1161 if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1))
1163 /* Allocate the outermost loop at the outermost available
1164 level. */
1165 unsigned this_mask = outer_mask + 1;
1167 if (!(this_mask & loop->inner))
1168 loop->mask = this_mask;
1171 if (loop->child)
1173 unsigned child_mask = outer_mask | loop->mask;
1175 if (loop->mask || assign)
1176 child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1178 loop->inner = oacc_loop_auto_partitions (loop->child, child_mask);
1181 if (assign && !loop->mask)
1183 /* Allocate the loop at the innermost available level. */
1184 unsigned this_mask = 0;
1186 /* Determine the outermost partitioning used within this loop. */
1187 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1188 this_mask = least_bit_hwi (this_mask);
1190 /* Pick the partitioning just inside that one. */
1191 this_mask >>= 1;
1193 /* And avoid picking one use by an outer loop. */
1194 this_mask &= ~outer_mask;
1196 if (!this_mask && noisy)
1197 warning_at (loop->loc, 0,
1198 "insufficient partitioning available to parallelize loop");
1200 loop->mask = this_mask;
1203 if (assign && dump_file)
1204 fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
1205 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1206 loop->mask);
1208 unsigned inner_mask = 0;
1210 if (loop->sibling)
1211 inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
1213 inner_mask |= loop->inner | loop->mask;
1215 return inner_mask;
1218 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1219 axes. Return mask of partitioning. */
1221 static unsigned
1222 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1224 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1226 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1228 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1229 mask_all |= oacc_loop_auto_partitions (loop, outer_mask);
1231 return mask_all;
1234 /* Default fork/join early expander. Delete the function calls if
1235 there is no RTL expander. */
1237 bool
1238 default_goacc_fork_join (gcall *ARG_UNUSED (call),
1239 const int *ARG_UNUSED (dims), bool is_fork)
1241 if (is_fork)
1242 return targetm.have_oacc_fork ();
1243 else
1244 return targetm.have_oacc_join ();
1247 /* Default goacc.reduction early expander.
1249 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1250 If RES_PTR is not integer-zerop:
1251 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1252 TEARDOWN - emit '*RES_PTR = VAR'
1253 If LHS is not NULL
1254 emit 'LHS = VAR' */
1256 void
1257 default_goacc_reduction (gcall *call)
1259 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1260 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1261 tree lhs = gimple_call_lhs (call);
1262 tree var = gimple_call_arg (call, 2);
1263 gimple_seq seq = NULL;
1265 if (code == IFN_GOACC_REDUCTION_SETUP
1266 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1268 /* Setup and Teardown need to copy from/to the receiver object,
1269 if there is one. */
1270 tree ref_to_res = gimple_call_arg (call, 1);
1272 if (!integer_zerop (ref_to_res))
1274 tree dst = build_simple_mem_ref (ref_to_res);
1275 tree src = var;
1277 if (code == IFN_GOACC_REDUCTION_SETUP)
1279 src = dst;
1280 dst = lhs;
1281 lhs = NULL;
1283 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1287 /* Copy VAR to LHS, if there is an LHS. */
1288 if (lhs)
1289 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1291 gsi_replace_with_seq (&gsi, seq, true);
1294 /* Main entry point for oacc transformations which run on the device
1295 compiler after LTO, so we know what the target device is at this
1296 point (including the host fallback). */
1298 static unsigned int
1299 execute_oacc_device_lower ()
1301 tree attrs = oacc_get_fn_attrib (current_function_decl);
1303 if (!attrs)
1304 /* Not an offloaded function. */
1305 return 0;
1307 /* Parse the default dim argument exactly once. */
1308 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1310 oacc_parse_default_dims (flag_openacc_dims);
1311 flag_openacc_dims = (char *)&flag_openacc_dims;
1314 /* Discover, partition and process the loops. */
1315 oacc_loop *loops = oacc_loop_discovery ();
1316 int fn_level = oacc_fn_attrib_level (attrs);
1318 if (dump_file)
1319 fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
1320 ? "Function is kernels offload\n"
1321 : fn_level < 0 ? "Function is parallel offload\n"
1322 : "Function is routine level %d\n", fn_level);
1324 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1325 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
1326 int dims[GOMP_DIM_MAX];
1328 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1330 if (dump_file)
1332 const char *comma = "Compute dimensions [";
1333 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1334 fprintf (dump_file, "%s%d", comma, dims[ix]);
1335 fprintf (dump_file, "]\n");
1338 oacc_loop_process (loops);
1339 if (dump_file)
1341 fprintf (dump_file, "OpenACC loops\n");
1342 dump_oacc_loop (dump_file, loops, 0);
1343 fprintf (dump_file, "\n");
1346 /* Offloaded targets may introduce new basic blocks, which require
1347 dominance information to update SSA. */
1348 calculate_dominance_info (CDI_DOMINATORS);
1350 /* Now lower internal loop functions to target-specific code
1351 sequences. */
1352 basic_block bb;
1353 FOR_ALL_BB_FN (bb, cfun)
1354 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1356 gimple *stmt = gsi_stmt (gsi);
1357 if (!is_gimple_call (stmt))
1359 gsi_next (&gsi);
1360 continue;
1363 gcall *call = as_a <gcall *> (stmt);
1364 if (!gimple_call_internal_p (call))
1366 gsi_next (&gsi);
1367 continue;
1370 /* Rewind to allow rescan. */
1371 gsi_prev (&gsi);
1372 bool rescan = false, remove = false;
1373 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1375 switch (ifn_code)
1377 default: break;
1379 case IFN_GOACC_LOOP:
1380 oacc_xform_loop (call);
1381 rescan = true;
1382 break;
1384 case IFN_GOACC_REDUCTION:
1385 /* Mark the function for SSA renaming. */
1386 mark_virtual_operands_for_renaming (cfun);
1388 /* If the level is -1, this ended up being an unused
1389 axis. Handle as a default. */
1390 if (integer_minus_onep (gimple_call_arg (call, 3)))
1391 default_goacc_reduction (call);
1392 else
1393 targetm.goacc.reduction (call);
1394 rescan = true;
1395 break;
1397 case IFN_UNIQUE:
1399 enum ifn_unique_kind kind
1400 = ((enum ifn_unique_kind)
1401 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1403 switch (kind)
1405 default:
1406 gcc_unreachable ();
1408 case IFN_UNIQUE_OACC_FORK:
1409 case IFN_UNIQUE_OACC_JOIN:
1410 if (integer_minus_onep (gimple_call_arg (call, 2)))
1411 remove = true;
1412 else if (!targetm.goacc.fork_join
1413 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1414 remove = true;
1415 break;
1417 case IFN_UNIQUE_OACC_HEAD_MARK:
1418 case IFN_UNIQUE_OACC_TAIL_MARK:
1419 remove = true;
1420 break;
1422 break;
1426 if (gsi_end_p (gsi))
1427 /* We rewound past the beginning of the BB. */
1428 gsi = gsi_start_bb (bb);
1429 else
1430 /* Undo the rewind. */
1431 gsi_next (&gsi);
1433 if (remove)
1435 if (gimple_vdef (call))
1436 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1437 if (gimple_call_lhs (call))
1439 /* Propagate the data dependency var. */
1440 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1441 gimple_call_arg (call, 1));
1442 gsi_replace (&gsi, ass, false);
1444 else
1445 gsi_remove (&gsi, true);
1447 else if (!rescan)
1448 /* If not rescanning, advance over the call. */
1449 gsi_next (&gsi);
1452 free_oacc_loop (loops);
1454 return 0;
1457 /* Default launch dimension validator. Force everything to 1. A
1458 backend that wants to provide larger dimensions must override this
1459 hook. */
1461 bool
1462 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
1463 int ARG_UNUSED (fn_level))
1465 bool changed = false;
1467 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1469 if (dims[ix] != 1)
1471 dims[ix] = 1;
1472 changed = true;
1476 return changed;
1479 /* Default dimension bound is unknown on accelerator and 1 on host. */
1482 default_goacc_dim_limit (int ARG_UNUSED (axis))
1484 #ifdef ACCEL_COMPILER
1485 return 0;
1486 #else
1487 return 1;
1488 #endif
1491 namespace {
1493 const pass_data pass_data_oacc_device_lower =
1495 GIMPLE_PASS, /* type */
1496 "oaccdevlow", /* name */
1497 OPTGROUP_OPENMP, /* optinfo_flags */
1498 TV_NONE, /* tv_id */
1499 PROP_cfg, /* properties_required */
1500 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1501 0, /* properties_destroyed */
1502 0, /* todo_flags_start */
1503 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1506 class pass_oacc_device_lower : public gimple_opt_pass
1508 public:
1509 pass_oacc_device_lower (gcc::context *ctxt)
1510 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1513 /* opt_pass methods: */
1514 virtual bool gate (function *) { return flag_openacc; };
1516 virtual unsigned int execute (function *)
1518 return execute_oacc_device_lower ();
1521 }; // class pass_oacc_device_lower
1523 } // anon namespace
1525 gimple_opt_pass *
1526 make_pass_oacc_device_lower (gcc::context *ctxt)
1528 return new pass_oacc_device_lower (ctxt);
1531 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
1532 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
1533 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
1534 internal functions on non-SIMT targets, and likewise some SIMD internal
1535 functions on SIMT targets. */
1537 static unsigned int
1538 execute_omp_device_lower ()
1540 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
1541 basic_block bb;
1542 gimple_stmt_iterator gsi;
1543 FOR_EACH_BB_FN (bb, cfun)
1544 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1546 gimple *stmt = gsi_stmt (gsi);
1547 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
1548 continue;
1549 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
1550 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
1551 switch (gimple_call_internal_fn (stmt))
1553 case IFN_GOMP_USE_SIMT:
1554 rhs = vf == 1 ? integer_zero_node : integer_one_node;
1555 break;
1556 case IFN_GOMP_SIMT_LANE:
1557 case IFN_GOMP_SIMT_LAST_LANE:
1558 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
1559 break;
1560 case IFN_GOMP_SIMT_VF:
1561 rhs = build_int_cst (type, vf);
1562 break;
1563 case IFN_GOMP_SIMT_ORDERED_PRED:
1564 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
1565 if (rhs || !lhs)
1566 unlink_stmt_vdef (stmt);
1567 break;
1568 case IFN_GOMP_SIMT_VOTE_ANY:
1569 case IFN_GOMP_SIMT_XCHG_BFLY:
1570 case IFN_GOMP_SIMT_XCHG_IDX:
1571 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1572 break;
1573 case IFN_GOMP_SIMD_LANE:
1574 case IFN_GOMP_SIMD_LAST_LANE:
1575 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
1576 break;
1577 case IFN_GOMP_SIMD_VF:
1578 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
1579 break;
1580 default:
1581 continue;
1583 if (lhs && !rhs)
1584 continue;
1585 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
1586 gsi_replace (&gsi, stmt, false);
1588 if (vf != 1)
1589 cfun->has_force_vectorize_loops = false;
1590 return 0;
1593 namespace {
1595 const pass_data pass_data_omp_device_lower =
1597 GIMPLE_PASS, /* type */
1598 "ompdevlow", /* name */
1599 OPTGROUP_OPENMP, /* optinfo_flags */
1600 TV_NONE, /* tv_id */
1601 PROP_cfg, /* properties_required */
1602 PROP_gimple_lomp_dev, /* properties_provided */
1603 0, /* properties_destroyed */
1604 0, /* todo_flags_start */
1605 TODO_update_ssa, /* todo_flags_finish */
1608 class pass_omp_device_lower : public gimple_opt_pass
1610 public:
1611 pass_omp_device_lower (gcc::context *ctxt)
1612 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
1615 /* opt_pass methods: */
1616 virtual bool gate (function *fun)
1618 return !(fun->curr_properties & PROP_gimple_lomp_dev);
1620 virtual unsigned int execute (function *)
1622 return execute_omp_device_lower ();
1625 }; // class pass_expand_omp_ssa
1627 } // anon namespace
1629 gimple_opt_pass *
1630 make_pass_omp_device_lower (gcc::context *ctxt)
1632 return new pass_omp_device_lower (ctxt);
1635 /* "omp declare target link" handling pass. */
1637 namespace {
1639 const pass_data pass_data_omp_target_link =
1641 GIMPLE_PASS, /* type */
1642 "omptargetlink", /* name */
1643 OPTGROUP_OPENMP, /* optinfo_flags */
1644 TV_NONE, /* tv_id */
1645 PROP_ssa, /* properties_required */
1646 0, /* properties_provided */
1647 0, /* properties_destroyed */
1648 0, /* todo_flags_start */
1649 TODO_update_ssa, /* todo_flags_finish */
1652 class pass_omp_target_link : public gimple_opt_pass
1654 public:
1655 pass_omp_target_link (gcc::context *ctxt)
1656 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
1659 /* opt_pass methods: */
1660 virtual bool gate (function *fun)
1662 #ifdef ACCEL_COMPILER
1663 tree attrs = DECL_ATTRIBUTES (fun->decl);
1664 return lookup_attribute ("omp declare target", attrs)
1665 || lookup_attribute ("omp target entrypoint", attrs);
1666 #else
1667 (void) fun;
1668 return false;
1669 #endif
1672 virtual unsigned execute (function *);
1675 /* Callback for walk_gimple_stmt used to scan for link var operands. */
1677 static tree
1678 find_link_var_op (tree *tp, int *walk_subtrees, void *)
1680 tree t = *tp;
1682 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)
1683 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
1685 *walk_subtrees = 0;
1686 return t;
1689 return NULL_TREE;
1692 unsigned
1693 pass_omp_target_link::execute (function *fun)
1695 basic_block bb;
1696 FOR_EACH_BB_FN (bb, fun)
1698 gimple_stmt_iterator gsi;
1699 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1700 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
1701 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
1704 return 0;
1707 } // anon namespace
1709 gimple_opt_pass *
1710 make_pass_omp_target_link (gcc::context *ctxt)
1712 return new pass_omp_target_link (ctxt);